OpenMS
TriqlerFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
13 #include <OpenMS/FORMAT/TextFile.h>
14 
15 #include <map>
16 #include <utility>
17 #include <unordered_map>
18 #include <set>
19 #include <vector>
20 
21 namespace OpenMS
22 {
24  using IndProtGrps = std::vector<IndProtGrp>;
25 
31  class OPENMS_DLLAPI TriqlerFile
32  {
33  public:
35  TriqlerFile() = default;
37  ~TriqlerFile() = default;
38 
40  void storeLFQ(const String& filename,
41  const ConsensusMap &consensus_map,
42  const ExperimentalDesign& design,
43  const StringList& reannotate_filenames,
44  const String& condition);
45 
46  private:
49 
50  static const String na_string_;
51  static const char delim_ = ',';
52  static const char accdelim_ = ';';
53  static const char quote_ = '"';
54 
55  /*
56  * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
57  * such as filenames, intensities, retention times, labels and features (for further processing)
58  */
60  {
61  std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
62  std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
63  std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
64  std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
65  std::vector<BaseFeature> features; //<s Features of ConsensusMap
66  };
67 
68  /*
69  * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
70  * such as filenames, intensities, retention times, labels and features.
71  * Stores them in AggregatedConsensusInfo for later processing
72  */
74  const std::vector<String>& spectra_paths);
75 
76  /*
77  * @brief: Internal function to check if condition exists in Experimental Design
78  */
79  static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& condition);
80 
81  /*
82  * In OpenMS, a run is split into multiple fractions.
83  */
84  static void assembleRunMap_(
85  std::map< std::pair< String, unsigned>, unsigned> &run_map,
86  const ExperimentalDesign &design);
87 
88  /*
89  * @brief checks two vectors for same content
90  */
91  static bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second);
92 
93  OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
94  {
96  for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
97  {
98  result += intensity;
99  }
100  return result;
101  }
102 
103  OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
104  {
105  return sumIntensity_(intensities) / intensities.size();
106  }
107 
109  {
110  public :
112  const String& run,
113  const String& condition,
114  const String& precursor_charge,
115  const String& search_score,
116  const String& intensity,
117  const String& sequence,
118  const String& accession
119  ): run_(run),
120  condition_(condition),
121  precursor_charge_(precursor_charge),
122  search_score_(search_score),
123  intensity_(intensity),
124  sequence_(sequence),
125  accession_(accession)
126  {}
127 
128  TriqlerLine_(TriqlerLine_&& m) = default;
129 
130  TriqlerLine_(const TriqlerLine_& m) = default;
131 
133  String toString() const;
134 
135  friend bool operator<(const TriqlerLine_ &l,
136  const TriqlerLine_ &r)
137  {
138  return std::tie(l.accession_, l.run_, l.condition_, l.precursor_charge_, l.intensity_, l.sequence_) <
139  std::tie(r.accession_, r.run_, r.condition_, r.precursor_charge_, r.intensity_, r.sequence_);
140  }
141 
142  private:
150  };
151 
152  using MapSequenceToLines_ = std::map<String, std::set<TriqlerLine_>>;
153  /*
154  * @brief Constructs the lines and adds them to the TextFile
155  * @param peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
156  */
157  void constructFile_(TextFile& csv_out,
158  const std::set<String>& peptideseq_quantifyable,
159  const MapSequenceToLines_& peptideseq_to_line) const;
160 
161  /*
162  * @brief Constructs the accession to indist. group mapping
163  */
164  static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
165 
166 
167  /*
168  * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
169  * in a group context.
170  *
171  */
173  const std::set<String>& accs,
174  const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
175 
176  };
177 } // namespace OpenMS
A container for consensus elements.
Definition: ConsensusMap.h:66
Definition: ExperimentalDesign.h:241
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:219
float IntensityType
Intensity type.
Definition: Peak2D.h:36
double CoordinateType
Coordinate type (of the position)
Definition: Peak2D.h:38
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:105
A more convenient string class.
Definition: String.h:34
This class provides some basic file handling methods for text files.
Definition: TextFile.h:21
Definition: TriqlerFile.h:109
String intensity_
Definition: TriqlerFile.h:147
String search_score_
Definition: TriqlerFile.h:146
String toString() const
as string
String condition_
Definition: TriqlerFile.h:144
String sequence_
Definition: TriqlerFile.h:148
TriqlerLine_(TriqlerLine_ &&m)=default
friend bool operator<(const TriqlerLine_ &l, const TriqlerLine_ &r)
Definition: TriqlerFile.h:135
TriqlerLine_(const TriqlerLine_ &m)=default
String accession_
Definition: TriqlerFile.h:149
TriqlerLine_(const String &run, const String &condition, const String &precursor_charge, const String &search_score, const String &intensity, const String &sequence, const String &accession)
Definition: TriqlerFile.h:111
String precursor_charge_
Definition: TriqlerFile.h:145
String run_
Definition: TriqlerFile.h:143
File adapter for Triqler files.
Definition: TriqlerFile.h:32
~TriqlerFile()=default
Destructor.
TriqlerFile()=default
Default constructor.
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: TriqlerFile.h:93
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition: TriqlerFile.h:64
static bool checkUnorderedContent_(const std::vector< String > &first, const std::vector< String > &second)
void constructFile_(TextFile &csv_out, const std::set< String > &peptideseq_quantifyable, const MapSequenceToLines_ &peptideseq_to_line) const
static void checkConditionLFQ_(const ExperimentalDesign::SampleSection &sampleSection, const String &condition)
OpenMS::Peak2D::CoordinateType Coordinate
Definition: TriqlerFile.h:48
std::map< String, std::set< TriqlerLine_ > > MapSequenceToLines_
Definition: TriqlerFile.h:152
static const String na_string_
Definition: TriqlerFile.h:50
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition: TriqlerFile.h:62
std::vector< BaseFeature > features
Definition: TriqlerFile.h:65
std::vector< std::vector< String > > consensus_feature_filenames
Definition: TriqlerFile.h:61
static std::unordered_map< OpenMS::String, const IndProtGrp * > getAccessionToGroupMap_(const IndProtGrps &ind_prots)
TriqlerFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap &consensus_map, const std::vector< String > &spectra_paths)
bool isQuantifyable_(const std::set< String > &accs, const std::unordered_map< String, const IndProtGrp * > &accession_to_group) const
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition: TriqlerFile.h:63
static void assembleRunMap_(std::map< std::pair< String, unsigned >, unsigned > &run_map, const ExperimentalDesign &design)
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: TriqlerFile.h:103
OpenMS::Peak2D::IntensityType Intensity
Definition: TriqlerFile.h:47
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &condition)
store label free experiment
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
OpenMS::ProteinIdentification::ProteinGroup IndProtGrp
Definition: MSstatsFile.h:23
std::vector< IndProtGrp > IndProtGrps
Definition: MSstatsFile.h:24