OpenMS  2.4.0
IDMapper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Marc Sturm, Hendrik Weisser, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 
43 
45 
47 
48 #include <algorithm>
49 #include <limits>
50 
51 namespace OpenMS
52 {
66  class OPENMS_DLLAPI IDMapper :
67  public DefaultParamHandler
68  {
69 public:
70  enum Measure {MEASURE_PPM = 0, MEASURE_DA};
71 
73  IDMapper();
74 
76  IDMapper(const IDMapper& cp);
77 
79  IDMapper& operator=(const IDMapper& rhs);
80 
96  void annotate(PeakMap& map, const std::vector<PeptideIdentification>& peptide_ids, const std::vector<ProteinIdentification>& protein_ids, const bool clear_ids = false, const bool mapMS1 = false)
97  {
98  checkHits_(peptide_ids);
99 
100  if (clear_ids)
101  { // start with empty IDs
102  std::vector<PeptideIdentification> empty_ids;
103  for (PeakMap::iterator it = map.begin(); it != map.end(); ++it)
104  {
105  it->setPeptideIdentifications(empty_ids);
106  }
107  std::vector<ProteinIdentification> empty_prot_ids;
108  map.setProteinIdentifications(empty_prot_ids);
109  }
110 
111  if (peptide_ids.empty()) return;
112 
113  // append protein identifications
114  map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end());
115 
116  // store mapping of scan RT to index
117  std::multimap<double, Size> experiment_precursors;
118  for (Size i = 0; i < map.size(); i++)
119  {
120  experiment_precursors.insert(std::make_pair(map[i].getRT(), i));
121  }
122 
123  // store mapping of identification RT to index (ignore empty hits)
124  std::multimap<double, Size> identifications_precursors;
125  for (Size i = 0; i < peptide_ids.size(); ++i)
126  {
127  if (!peptide_ids[i].empty())
128  {
129  identifications_precursors.insert(std::make_pair(peptide_ids[i].getRT(), i));
130  }
131  }
132  // note that mappings are sorted by key via multimap (we rely on that down below)
133 
134  // remember which peptides were mapped (for stats later)
135  std::set<Size> peptides_mapped;
136 
137  // calculate the actual mapping
138  std::multimap<double, Size>::const_iterator experiment_iterator = experiment_precursors.begin();
139  std::multimap<double, Size>::const_iterator identifications_iterator = identifications_precursors.begin();
140  // to achieve O(n) complexity we now move along the spectra
141  // and for each spectrum we look at the peptide id's with the allowed RT range
142  // once we finish a spectrum, we simply move back in the peptide id window a little to get from the
143  // right end of the old interval to the left end of the new interval
144  while (experiment_iterator != experiment_precursors.end())
145  {
146  // maybe we hit end() of IDs during the last scan .. go back to a real value
147  if (identifications_iterator == identifications_precursors.end())
148  {
149  --identifications_iterator; // this is valid, since we have at least one peptide ID
150  }
151 
152  // go to left border of RT interval
153  while (identifications_iterator != identifications_precursors.begin() &&
154  (experiment_iterator->first - identifications_iterator->first) < rt_tolerance_) // do NOT use fabs() here, since we want the LEFT border
155  {
156  --identifications_iterator;
157  }
158  // ... we might have stepped too far left
159  if (identifications_iterator != identifications_precursors.end() && ((experiment_iterator->first - identifications_iterator->first) > rt_tolerance_))
160  {
161  ++identifications_iterator; // get into interval again (we can potentially be at end() afterwards)
162  }
163 
164  if (identifications_iterator == identifications_precursors.end())
165  { // no more ID's, so we don't have any chance of matching the next spectra
166  break; // ... do NOT put this block below, since hitting the end of ID's for one spec, still allows to match stuff in the next (when going to left border)
167  }
168 
169  // run through RT interval
170  while (identifications_iterator != identifications_precursors.end() &&
171  (identifications_iterator->first - experiment_iterator->first) < rt_tolerance_) // fabs() not required here, since are definitely within left border, and wait until exceeding the right
172  {
173  if (mapMS1 ||
174  // testing whether the m/z fits
175  ((!map[experiment_iterator->second].getPrecursors().empty()) &&
176  isMatch_(0, peptide_ids[identifications_iterator->second].getMZ(), map[experiment_iterator->second].getPrecursors()[0].getMZ())))
177  {
178  map[experiment_iterator->second].getPeptideIdentifications().push_back(peptide_ids[identifications_iterator->second]);
179  peptides_mapped.insert(identifications_iterator->second);
180  }
181  ++identifications_iterator;
182  }
183  // we are the right border now (or likely even beyond)
184  ++experiment_iterator;
185  }
186 
187  // some statistics output
188  LOG_INFO << "Peptides assigned to a precursor: " << peptides_mapped.size() << "\n"
189  << " Unassigned peptides: " << peptide_ids.size() - peptides_mapped.size() << "\n"
190  << " Unmapped (empty) peptides: " << peptide_ids.size() - identifications_precursors.size() << std::endl;
191 
192  }
193 
210  void annotate(PeakMap& map, FeatureMap fmap, const bool clear_ids = false, const bool mapMS1 = false)
211  {
212  const std::vector<ProteinIdentification>& protein_ids = fmap.getProteinIdentifications();
213  std::vector<PeptideIdentification> peptide_ids;
214 
215  for (FeatureMap::const_iterator it = fmap.begin(); it != fmap.end(); ++it)
216  {
217  const std::vector<PeptideIdentification>& pi = it->getPeptideIdentifications();
218  for (std::vector<PeptideIdentification>::const_iterator itp = pi.begin(); itp != pi.end(); ++itp)
219  {
220  peptide_ids.push_back(*itp);
221  // if pepID has no m/z or RT, use the values of the feature
222  if (!itp->hasMZ()) peptide_ids.back().setMZ(it->getMZ());
223  if (!itp->hasRT()) peptide_ids.back().setRT(it->getRT());
224  }
225 
226  }
227  annotate(map, peptide_ids, protein_ids, clear_ids, mapMS1);
228  }
229 
249  void annotate(FeatureMap& map, const std::vector<PeptideIdentification>& ids, const std::vector<ProteinIdentification>& protein_ids, bool use_centroid_rt = false, bool use_centroid_mz = false, const PeakMap& spectra = PeakMap());
250 
267  void annotate(ConsensusMap& map, const std::vector<PeptideIdentification>& ids,
268  const std::vector<ProteinIdentification>& protein_ids,
269  bool measure_from_subelements = false,
270  bool annotate_ids_with_subelements = false,
271  const PeakMap& spectra = PeakMap());
272 
273 
278  {
279  std::vector<Size> no_precursors;
280  std::vector<Size> identified;
281  std::vector<Size> unidentified;
282  };
283 
300  const std::vector<PeptideIdentification>& ids,
301  double mz_tol = 0.001,
302  double rt_tol = 0.001)
303  {
305  for (Size spectrum_index = 0; spectrum_index < spectra.size(); ++spectrum_index)
306  {
307  const MSSpectrum& spectrum = spectra[spectrum_index];
308  if (!spectrum.getPrecursors().empty())
309  {
310  bool identified(false);
311  const std::vector<Precursor>& precursors = spectrum.getPrecursors();
312 
313  // check if precursor has been identified
314  for (Size i_p = 0; i_p < precursors.size(); ++i_p)
315  {
316  // check by precursor mass and spectrum RT
317  double mz_p = precursors[i_p].getMZ();
318  double rt_s = spectrum.getRT();
319 
320  for (Size i_id = 0; i_id != ids.size(); ++i_id)
321  {
322  const PeptideIdentification& pid = ids[i_id];
323 
324  // do not count empty ids as identification of a spectrum
325  if (pid.getHits().empty()) continue;
326 
327  double mz_id = pid.getMZ();
328  double rt_id = pid.getRT();
329 
330  if ( fabs(mz_id - mz_p) < mz_tol && fabs(rt_s - rt_id) < rt_tol )
331  {
332  identified = true;
333  break;
334  }
335  }
336  }
337  if (!identified)
338  {
339  ret.unidentified.push_back(spectrum_index);
340  }
341  else
342  {
343  ret.identified.push_back(spectrum_index);
344  }
345  }
346  else
347  {
348  ret.no_precursors.push_back(spectrum_index);
349  }
350  }
351  return ret;
352  }
353 
354 
355 protected:
356  void updateMembers_() override;
357 
366 
370  double getAbsoluteMZTolerance_(const double mz) const;
371 
373  bool isMatch_(const double rt_distance, const double mz_theoretical, const double mz_observed) const;
374 
376  void checkHits_(const std::vector<PeptideIdentification>& ids) const;
377 
381  void getIDDetails_(const PeptideIdentification& id, double& rt_pep, DoubleList& mz_values, IntList& charges, bool use_avg_mass = false) const;
382 
384  void increaseBoundingBox_(DBoundingBox<2>& box);
385 
388  bool checkMassType_(const std::vector<DataProcessing>& processing) const;
389 
390  };
391 
392 } // namespace OpenMS
393 
double rt_tolerance_
Allowed RT deviation.
Definition: IDMapper.h:359
void setProteinIdentifications(const std::vector< ProteinIdentification > &protein_identifications)
sets the protein ProteinIdentification vector
double mz_tolerance_
Allowed m/z deviation.
Definition: IDMapper.h:361
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:454
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:65
void annotate(PeakMap &map, FeatureMap fmap, const bool clear_ids=false, const bool mapMS1=false)
Mapping method for peak maps.
Definition: IDMapper.h:210
std::vector< Size > no_precursors
Definition: IDMapper.h:279
Result of a partitioning by identification state with mapPrecursorsToIdentifications().
Definition: IDMapper.h:277
A container for features.
Definition: FeatureMap.h:93
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Annotates an MSExperiment, FeatureMap or ConsensusMap with peptide identifications.
Definition: IDMapper.h:66
std::vector< Size > unidentified
Definition: IDMapper.h:281
Measure
Definition: IDMapper.h:70
Iterator begin()
Definition: MSExperiment.h:157
A container for consensus elements.
Definition: ConsensusMap.h:75
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:58
Size size() const
Definition: MSExperiment.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
double getMZ() const
returns the MZ of the MS2 spectrum
Iterator end()
Definition: MSExperiment.h:167
Base::iterator iterator
Definition: MSExperiment.h:124
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
void annotate(PeakMap &map, const std::vector< PeptideIdentification > &peptide_ids, const std::vector< ProteinIdentification > &protein_ids, const bool clear_ids=false, const bool mapMS1=false)
Mapping method for peak maps.
Definition: IDMapper.h:96
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
MSExperiment PeakMap
Two-dimensional map of raw data points or peaks.
Definition: StandardTypes.h:61
bool ignore_charge_
Ignore charge states during matching?
Definition: IDMapper.h:365
bool empty() const
Definition: MSExperiment.h:137
std::vector< Size > identified
Definition: IDMapper.h:280
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
static SpectraIdentificationState mapPrecursorsToIdentifications(const PeakMap &spectra, const std::vector< PeptideIdentification > &ids, double mz_tol=0.001, double rt_tol=0.001)
Mapping of peptide identifications to spectra This helper function partitions all spectra into those ...
Definition: IDMapper.h:299
Measure measure_
Measure used for m/z.
Definition: IDMapper.h:363
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
double getRT() const
const std::vector< ProteinIdentification > & getProteinIdentifications() const
non-mutable access to the protein identifications
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62