OpenMS  2.4.0
FeatureFinderIdentificationAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
36 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
37 
45 
46 #include <vector>
47 #include <fstream>
48 #include <map>
49 
50 namespace OpenMS
51 {
52  class IsotopeDistribution;
53 
55  public DefaultParamHandler
56 {
57 public:
60 
64  void run(
65  std::vector<PeptideIdentification> peptides,
66  std::vector<ProteinIdentification> proteins,
67  std::vector<PeptideIdentification> peptides_ext,
68  std::vector<ProteinIdentification> proteins_ext,
69  FeatureMap& features
70  );
71 
72  void runOnCandidates(FeatureMap& features);
73 
74  PeakMap& getMSData() { return ms_data_; }
75  const PeakMap& getMSData() const { return ms_data_; }
76 
77  PeakMap& getChromatograms() { return chrom_data_; }
78  const PeakMap& getChromatograms() const { return chrom_data_; }
79 
80  ProgressLogger& getProgressLogger() { return prog_log_; }
81  const ProgressLogger& getProgressLogger() const { return prog_log_; }
82 
83  TargetedExperiment& getLibrary() { return library_; }
84  const TargetedExperiment& getLibrary() const { return library_; }
85 
86 protected:
89 
91  typedef std::multimap<double, PeptideIdentification*> RTMap;
93  typedef std::map<Int, std::pair<RTMap, RTMap> > ChargeMap;
95  typedef std::map<AASequence, ChargeMap> PeptideMap;
97  typedef std::map<String, std::pair<RTMap, RTMap> > PeptideRefRTMap;
98 
100 
101  Size n_internal_peps_; //< number of internal peptide
102  Size n_external_peps_; //< number of external peptides
103 
104  double rt_window_; //< RT window width
105  double mz_window_; //< m/z window width
106  bool mz_window_ppm_; //< m/z window width is given in PPM (not Da)?
107 
108  double mapping_tolerance_; //< RT tolerance for mapping IDs to features
109 
110  double isotope_pmin_; //< min. isotope probability for peptide assay
111  Size n_isotopes_; //< number of isotopes for peptide assay
112 
113  double rt_quantile_;
114 
115  double peak_width_;
118 
120 
121  // SVM related parameters
126  Size svm_n_parts_; //< number of partitions for SVM cross-validation
127  Size svm_n_samples_; //< number of samples for SVM training
128 
129  // output file (before filtering)
131 
133 
134  void updateMembers_() override;
135 
137  struct RTRegion
138  {
139  double start, end;
140  ChargeMap ids; //< internal/external peptide IDs (per charge) in this region
141  };
142 
145  {
146  bool operator()(const Feature& feature)
147  {
148  return feature.getOverallQuality() == 0.0;
149  }
150  } feature_filter_quality_;
151 
154  {
155  bool operator()(const Feature& feature)
156  {
157  return feature.getPeptideIdentifications().empty();
158  }
159  } feature_filter_peptides_;
160 
163  {
165  const PeptideIdentification& p2)
166  {
167  const String& seq1 = p1.getHits()[0].getSequence().toString();
168  const String& seq2 = p2.getHits()[0].getSequence().toString();
169  if (seq1 == seq2)
170  {
171  Int charge1 = p1.getHits()[0].getCharge();
172  Int charge2 = p2.getHits()[0].getCharge();
173  if (charge1 == charge2)
174  {
175  return p1.getRT() < p2.getRT();
176  }
177  return charge1 < charge2;
178  }
179  return seq1 < seq2;
180  }
181  } peptide_compare_;
182 
185  {
186  bool operator()(const Feature& f1, const Feature& f2)
187  {
188  const String& ref1 = f1.getMetaValue("PeptideRef");
189  const String& ref2 = f2.getMetaValue("PeptideRef");
190  if (ref1 == ref2)
191  {
192  return f1.getRT() < f2.getRT();
193  }
194  return ref1 < ref2;
195  }
196  } feature_compare_;
197 
198  PeakMap ms_data_; //< input LC-MS data
199  PeakMap chrom_data_; //< accumulated chromatograms (XICs)
200  TargetedExperiment library_; //< accumulated assays for peptides
201 
203  std::map<double, std::pair<Size, Size> > svm_probs_internal_;
205  std::multiset<double> svm_probs_external_;
206  Size n_internal_features_; //< internal feature counter (for FDR calculation)
207  Size n_external_features_; //< external feature counter (for FDR calculation)
209  TransformationDescription trafo_external_; //< transform. to external RT scale
210  std::map<String, double> isotope_probs_; //< isotope probabilities of transitions
211  MRMFeatureFinderScoring feat_finder_; //< OpenSWATH feature finder
212 
214 
216  void generateTransitions_(const String& peptide_id, double mz, Int charge,
217  const IsotopeDistribution& iso_dist);
218 
219  void addPeptideRT_(TargetedExperiment::Peptide& peptide, double rt) const;
220 
222  void getRTRegions_(ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions) const;
223 
224  void annotateFeaturesFinalizeAssay_(
225  FeatureMap& features,
226  std::map<Size, std::vector<PeptideIdentification*> >& feat_ids,
227  RTMap& rt_internal);
228 
230  void annotateFeatures_(FeatureMap& features, PeptideRefRTMap& ref_rt_map);
231 
232  void ensureConvexHulls_(Feature& feature);
233 
234  void postProcess_(FeatureMap& features, bool with_external_ids);
235 
237  void statistics_(const FeatureMap& features) const;
238 
239  void createAssayLibrary_(PeptideMap& peptide_map, PeptideRefRTMap& ref_rt_map);
240 
241  void addPeptideToMap_(PeptideIdentification& peptide,
242  PeptideMap& peptide_map,
243  bool external = false) const;
244 
245  void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const;
246 
247  void getUnbiasedSample_(const std::multimap<double, std::pair<Size, bool> >& valid_obs,
248  std::map<Size, Int>& training_labels);
249 
250  void getRandomSample_(std::map<Size, Int>& training_labels);
251 
252  void classifyFeatures_(FeatureMap& features);
253 
254  void filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality,
255  const double quality_cutoff);
256 
257  void filterFeatures_(FeatureMap& features, bool classified);
258 
259  void calculateFDR_(FeatureMap& features);
260 
261  };
262 
263 } // namespace OpenMS
264 
265 #endif
266 
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
TargetedExperiment & getLibrary()
Definition: FeatureFinderIdentificationAlgorithm.h:83
double svm_quality_cutoff
Definition: FeatureFinderIdentificationAlgorithm.h:125
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition: FeatureFinderIdentificationAlgorithm.h:91
A more convenient string class.
Definition: String.h:57
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition: FeatureFinderIdentificationAlgorithm.h:164
TransformationDescription trafo_external_
TransformationDescription trafo_; // RT transformation (to range 0-1)
Definition: FeatureFinderIdentificationAlgorithm.h:209
double mapping_tolerance_
Definition: FeatureFinderIdentificationAlgorithm.h:108
ChargeMap ids
Definition: FeatureFinderIdentificationAlgorithm.h:140
Size n_internal_features_
Definition: FeatureFinderIdentificationAlgorithm.h:206
bool operator()(const Feature &f1, const Feature &f2)
Definition: FeatureFinderIdentificationAlgorithm.h:186
double start
Definition: FeatureFinderIdentificationAlgorithm.h:139
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:109
A container for features.
Definition: FeatureMap.h:93
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Definition: FeatureFinderIdentificationAlgorithm.h:54
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition: MRMFeatureFinderScoring.h:93
const PeakMap & getChromatograms() const
Definition: FeatureFinderIdentificationAlgorithm.h:78
Definition: IsotopeDistribution.h:72
PeakMap chrom_data_
Definition: FeatureFinderIdentificationAlgorithm.h:199
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition: FeatureFinderIdentificationAlgorithm.h:88
StringList svm_predictor_names_
Definition: FeatureFinderIdentificationAlgorithm.h:123
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
returns a const reference to the PeptideIdentification vector
region in RT in which a peptide elutes:
Definition: FeatureFinderIdentificationAlgorithm.h:137
double min_peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:116
Size n_internal_peps_
Definition: FeatureFinderIdentificationAlgorithm.h:101
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:155
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Size n_isotopes_
Definition: FeatureFinderIdentificationAlgorithm.h:111
String elution_model_
Definition: FeatureFinderIdentificationAlgorithm.h:119
Size n_external_features_
Definition: FeatureFinderIdentificationAlgorithm.h:207
predicate for filtering features by assigned peptides:
Definition: FeatureFinderIdentificationAlgorithm.h:153
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string (or DataValue::EMPTY if not found)
bool mz_window_ppm_
Definition: FeatureFinderIdentificationAlgorithm.h:106
PeakMap ms_data_
Definition: FeatureFinderIdentificationAlgorithm.h:198
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition: FeatureFinderIdentificationAlgorithm.h:95
double rt_quantile_
Definition: FeatureFinderIdentificationAlgorithm.h:113
String svm_xval_out_
Definition: FeatureFinderIdentificationAlgorithm.h:124
const PeakMap & getMSData() const
Definition: FeatureFinderIdentificationAlgorithm.h:75
double isotope_pmin_
Definition: FeatureFinderIdentificationAlgorithm.h:110
double rt_window_
Definition: FeatureFinderIdentificationAlgorithm.h:104
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
PeakMap & getMSData()
Definition: FeatureFinderIdentificationAlgorithm.h:74
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:203
MRMFeatureFinderScoring feat_finder_
Definition: FeatureFinderIdentificationAlgorithm.h:211
ProgressLogger prog_log_
Definition: FeatureFinderIdentificationAlgorithm.h:213
Size svm_n_parts_
Definition: FeatureFinderIdentificationAlgorithm.h:126
const TargetedExperiment & getLibrary() const
Definition: FeatureFinderIdentificationAlgorithm.h:84
std::map< String, double > isotope_probs_
Definition: FeatureFinderIdentificationAlgorithm.h:210
const ProgressLogger & getProgressLogger() const
Definition: FeatureFinderIdentificationAlgorithm.h:81
predicate for filtering features by overall quality:
Definition: FeatureFinderIdentificationAlgorithm.h:144
An LC-MS feature.
Definition: Feature.h:70
double peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:115
String candidates_out_
Definition: FeatureFinderIdentificationAlgorithm.h:130
ProgressLogger & getProgressLogger()
Definition: FeatureFinderIdentificationAlgorithm.h:80
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
TargetedExperiment library_
Definition: FeatureFinderIdentificationAlgorithm.h:200
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
PeakMap & getChromatograms()
Definition: FeatureFinderIdentificationAlgorithm.h:77
PeptideMap peptide_map_
Definition: FeatureFinderIdentificationAlgorithm.h:99
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition: FeatureFinderIdentificationAlgorithm.h:87
comparison functor for (unassigned) peptide IDs
Definition: FeatureFinderIdentificationAlgorithm.h:162
comparison functor for features
Definition: FeatureFinderIdentificationAlgorithm.h:184
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:146
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:64
Size n_external_peps_
Definition: FeatureFinderIdentificationAlgorithm.h:102
double signal_to_noise_
Definition: FeatureFinderIdentificationAlgorithm.h:117
double svm_min_prob_
Definition: FeatureFinderIdentificationAlgorithm.h:122
Size svm_n_samples_
Definition: FeatureFinderIdentificationAlgorithm.h:127
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:60
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:93
std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:97
int Int
Signed integer type.
Definition: Types.h:102
double mz_window_
Definition: FeatureFinderIdentificationAlgorithm.h:105
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:205
Size debug_level_
Definition: FeatureFinderIdentificationAlgorithm.h:132
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:79
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:451