Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MRMTransitionGroupPicker.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_OPENSWATH_MRMTRANSITIONGROUPPICKER_H
36 #define OPENMS_ANALYSIS_OPENSWATH_MRMTRANSITIONGROUPPICKER_H
37 
43 
46 
49 
50 // Cross-correlation
53 
54 #include <numeric>
55 
56 //#define DEBUG_TRANSITIONGROUPPICKER
57 
58 namespace OpenMS
59 {
60 
79  class OPENMS_DLLAPI MRMTransitionGroupPicker :
80  public DefaultParamHandler
81  {
82 
83 public:
84 
88 
92 
112  template <typename SpectrumT, typename TransitionT>
114  {
115  OPENMS_PRECONDITION(transition_group.isInternallyConsistent(), "Consistent state required")
116  OPENMS_PRECONDITION(transition_group.chromatogramIdsMatch(), "Chromatogram native IDs need to match keys in transition group")
117 
118  std::vector<MSChromatogram > picked_chroms_;
119  PeakPickerMRM picker;
120  picker.setParameters(param_.copy("PeakPickerMRM:", true));
121 
122  // Pick fragment ion chromatograms
123  for (Size k = 0; k < transition_group.getChromatograms().size(); k++)
124  {
125  MSChromatogram& chromatogram = transition_group.getChromatograms()[k];
126  String native_id = chromatogram.getNativeID();
127 
128  // only pick detecting transitions (skip all others)
129  if (transition_group.getTransitions().size() > 0 &&
130  transition_group.hasTransition(native_id) &&
131  !transition_group.getTransition(native_id).isDetectingTransition() )
132  {
133  continue;
134  }
135 
136  if (!chromatogram.isSorted())
137  {
138  chromatogram.sortByPosition();
139  }
140 
141  MSChromatogram picked_chrom;
142  picker.pickChromatogram(chromatogram, picked_chrom);
143  picked_chrom.sortByIntensity(); // we could do without that
144  picked_chroms_.push_back(picked_chrom);
145  }
146 
147  // Pick precursor chromatograms
148  if (use_precursors_)
149  {
150  for (Size k = 0; k < transition_group.getPrecursorChromatograms().size(); k++)
151  {
152  SpectrumT picked_chrom;
153  SpectrumT& chromatogram = transition_group.getPrecursorChromatograms()[k];
154  String native_id = chromatogram.getNativeID();
155 
156  picker.pickChromatogram(chromatogram, picked_chrom);
157  picked_chrom.sortByIntensity(); // we could do without that
158  picked_chroms_.push_back(picked_chrom);
159  }
160  }
161 
162  // Find features (peak groups) in this group of transitions.
163  // While there are still peaks left, one will be picked and used to create
164  // a feature. Whenever we run out of peaks, we will get -1 back as index
165  // and terminate.
166  int chr_idx, peak_idx, cnt = 0;
167  std::vector<MRMFeature> features;
168  while (true)
169  {
170  chr_idx = -1; peak_idx = -1;
171  findLargestPeak(picked_chroms_, chr_idx, peak_idx);
172  if (chr_idx == -1 && peak_idx == -1) break;
173 
174  // Compute a feature from the individual chromatograms and add non-zero features
175  MRMFeature mrm_feature = createMRMFeature(transition_group, picked_chroms_, chr_idx, peak_idx);
176  if (mrm_feature.getIntensity() > 0)
177  {
178  features.push_back(mrm_feature);
179  }
180 
181  cnt++;
182  if ((stop_after_feature_ > 0 && cnt > stop_after_feature_) &&
183  mrm_feature.getIntensity() / (double)mrm_feature.getMetaValue("total_xic") < stop_after_intensity_ratio_)
184  {
185  break;
186  }
187  }
188 
189  // Check for completely overlapping features
190  for (Size i = 0; i < features.size(); i++)
191  {
192  MRMFeature& mrm_feature = features[i];
193  bool skip = false;
194  for (Size j = 0; j < i; j++)
195  {
196  if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") &&
197  (double)mrm_feature.getMetaValue("rightWidth") <= (double)features[j].getMetaValue("rightWidth") )
198  { skip = true; }
199  }
200  if (mrm_feature.getIntensity() > 0 && !skip)
201  {
202  transition_group.addFeature(mrm_feature);
203  }
204  }
205 
206  }
207 
209  template <typename SpectrumT, typename TransitionT>
211  std::vector<SpectrumT>& picked_chroms, const int chr_idx, const int peak_idx)
212  {
213  OPENMS_PRECONDITION(transition_group.isInternallyConsistent(), "Consistent state required")
214  OPENMS_PRECONDITION(transition_group.chromatogramIdsMatch(), "Chromatogram native IDs need to match keys in transition group")
215 
216  MRMFeature mrmFeature;
217  mrmFeature.setIntensity(0.0);
218  double best_left = picked_chroms[chr_idx].getFloatDataArrays()[1][peak_idx];
219  double best_right = picked_chroms[chr_idx].getFloatDataArrays()[2][peak_idx];
220  double peak_apex = picked_chroms[chr_idx][peak_idx].getRT();
221  LOG_DEBUG << "**** Creating MRMFeature for peak " << chr_idx << " " << peak_idx << " " <<
222  picked_chroms[chr_idx][peak_idx] << " with borders " << best_left << " " <<
223  best_right << " (" << best_right - best_left << ")" << std::endl;
224 
225  if (recalculate_peaks_)
226  {
227  // This may change best_left / best_right
228  recalculatePeakBorders_(picked_chroms, best_left, best_right, recalculate_peaks_max_z_);
229  if (peak_apex < best_left || peak_apex > best_right)
230  {
231  // apex fell out of range, lets correct it
232  peak_apex = (best_left + best_right) / 2.0;
233  }
234  }
235  picked_chroms[chr_idx][peak_idx].setIntensity(0.0);
236 
237  // Remove other, overlapping, picked peaks (in this and other
238  // chromatograms) and then ensure that at least one peak is set to zero
239  // (the currently best peak).
240  remove_overlapping_features(picked_chroms, best_left, best_right);
241 
242  // Check for minimal peak width -> return empty feature (Intensity zero)
243  if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_)
244  {
245  return mrmFeature;
246  }
247 
248  if (compute_peak_quality_)
249  {
250  String outlier = "none";
251  double qual = computeQuality_(transition_group, picked_chroms, chr_idx, best_left, best_right, outlier);
252  if (qual < min_qual_)
253  {
254  return mrmFeature;
255  }
256  mrmFeature.setMetaValue("potentialOutlier", outlier);
257  mrmFeature.setMetaValue("initialPeakQuality", qual);
258  mrmFeature.setOverallQuality(qual);
259  }
260 
261  // Prepare linear resampling of all the chromatograms, here creating the
262  // empty master_peak_container with the same RT (m/z) values as the reference
263  // chromatogram.
264  SpectrumT master_peak_container;
265  const SpectrumT& ref_chromatogram = selectChromHelper_(transition_group, picked_chroms[chr_idx].getNativeID());
266  prepareMasterContainer_(ref_chromatogram, master_peak_container, best_left, best_right);
267 
268  // Iterate over initial transitions / chromatograms (note that we may
269  // have a different number of picked chromatograms than total transitions
270  // as not all are detecting transitions).
271  double total_intensity = 0; double total_peak_apices = 0; double total_xic = 0;
272  for (Size k = 0; k < transition_group.getTransitions().size(); k++)
273  {
274  const SpectrumT& chromatogram = selectChromHelper_(transition_group, transition_group.getTransitions()[k].getNativeID());
275  if (transition_group.getTransitions()[k].isDetectingTransition())
276  {
277  for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++)
278  {
279  total_xic += it->getIntensity();
280  }
281  }
282 
283  // resample the current chromatogram
284  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left, best_right);
285  // const SpectrumT& used_chromatogram = chromatogram; // instead of resampling
286 
287  Feature f;
288  double quality = 0;
289  f.setQuality(0, quality);
290  f.setOverallQuality(quality);
291 
292  ConvexHull2D::PointArrayType hull_points;
293  double intensity_sum(0.0), rt_sum(0.0);
294  double peak_apex_int = -1;
295  double peak_apex_dist = std::fabs(used_chromatogram.begin()->getMZ() - peak_apex);
296  // FEATURE : use RTBegin / MZBegin -> for this we need to know whether the template param is a real chromatogram or a spectrum!
297  for (typename SpectrumT::const_iterator it = used_chromatogram.begin(); it != used_chromatogram.end(); it++)
298  {
299  if (it->getMZ() > best_left && it->getMZ() < best_right)
300  {
301  DPosition<2> p;
302  p[0] = it->getMZ();
303  p[1] = it->getIntensity();
304  hull_points.push_back(p);
305  if (std::fabs(it->getMZ() - peak_apex) <= peak_apex_dist)
306  {
307  peak_apex_int = p[1];
308  peak_apex_dist = std::fabs(it->getMZ() - peak_apex);
309  }
310  rt_sum += it->getMZ();
311  intensity_sum += it->getIntensity();
312  }
313  }
314 
315  double background(0), avg_noise_level(0);
316  if (background_subtraction_ != "none")
317  {
318  if (background_subtraction_ == "smoothed")
319  {
320  throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
321  /*
322  * Currently we do not have access to the smoothed chromatogram any more
323  if (smoothed_chroms_.size() <= k)
324  {
325  std::cerr << "Tried to calculate background estimation without any smoothed chromatograms" << std::endl;
326  background = 0;
327  }
328  else
329  {
330  background = calculateBgEstimation_(smoothed_chroms_[k], best_left, best_right);
331  }
332  */
333  }
334  else if (background_subtraction_ == "original")
335  {
336  calculateBgEstimation_(used_chromatogram, best_left, best_right, background, avg_noise_level);
337  }
338  intensity_sum -= background;
339  peak_apex_int -= avg_noise_level;
340  if (intensity_sum < 0) {intensity_sum = 0;}
341  if (peak_apex_int < 0) {peak_apex_int = 0;}
342  }
343 
344  f.setRT(picked_chroms[chr_idx][peak_idx].getMZ());
345  f.setIntensity(intensity_sum);
346  ConvexHull2D hull;
347  hull.setHullPoints(hull_points);
348  f.getConvexHulls().push_back(hull);
349  if (chromatogram.metaValueExists("product_mz"))
350  {
351  f.setMetaValue("MZ", chromatogram.getMetaValue("product_mz"));
352  f.setMZ(chromatogram.getMetaValue("product_mz"));
353  }
354  else
355  {
356  LOG_WARN << "Please set meta value 'product_mz' on chromatogram to populate feature m/z value" << std::endl;
357  }
358  f.setMetaValue("native_id", chromatogram.getNativeID());
359  f.setMetaValue("peak_apex_int", peak_apex_int);
360  if (background_subtraction_ != "none")
361  {
362  f.setMetaValue("area_background_level", background);
363  f.setMetaValue("noise_background_level", avg_noise_level);
364  }
365 
366  if (transition_group.getTransitions()[k].isDetectingTransition())
367  {
368  total_intensity += intensity_sum;
369  total_peak_apices += peak_apex_int;
370  }
371  mrmFeature.addFeature(f, chromatogram.getNativeID()); //map index and feature
372  }
373 
374  // Also pick the precursor chromatogram(s); note total_xic is not
375  // extracted here, only for fragment traces
376  for (Size k = 0; k < transition_group.getPrecursorChromatograms().size(); k++)
377  {
378  const SpectrumT& chromatogram = transition_group.getPrecursorChromatograms()[k];
379  // resample the current chromatogram
380  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left, best_right);
381 
382  Feature f;
383  double quality = 0;
384  f.setQuality(0, quality);
385  f.setOverallQuality(quality);
386 
387  ConvexHull2D::PointArrayType hull_points;
388  double intensity_sum(0.0), rt_sum(0.0);
389  double peak_apex_int = -1;
390  double peak_apex_dist = std::fabs(used_chromatogram.begin()->getMZ() - peak_apex);
391  // FEATURE : use RTBegin / MZBegin -> for this we need to know whether the template param is a real chromatogram or a spectrum!
392  for (typename SpectrumT::const_iterator it = used_chromatogram.begin(); it != used_chromatogram.end(); it++)
393  {
394  if (it->getMZ() > best_left && it->getMZ() < best_right)
395  {
396  DPosition<2> p;
397  p[0] = it->getMZ();
398  p[1] = it->getIntensity();
399  hull_points.push_back(p);
400  if (std::fabs(it->getMZ() - peak_apex) <= peak_apex_dist)
401  {
402  peak_apex_int = p[1];
403  peak_apex_dist = std::fabs(it->getMZ() - peak_apex);
404  }
405  rt_sum += it->getMZ();
406  intensity_sum += it->getIntensity();
407  }
408  }
409 
410  if (chromatogram.metaValueExists("precursor_mz"))
411  {
412  f.setMZ(chromatogram.getMetaValue("precursor_mz"));
413  mrmFeature.setMZ(chromatogram.getMetaValue("precursor_mz"));
414  }
415 
416  f.setRT(picked_chroms[chr_idx][peak_idx].getMZ());
417  f.setIntensity(intensity_sum);
418  ConvexHull2D hull;
419  hull.setHullPoints(hull_points);
420  f.getConvexHulls().push_back(hull);
421  f.setMetaValue("native_id", chromatogram.getNativeID());
422  f.setMetaValue("peak_apex_int", peak_apex_int);
423 
424  if (use_precursors_ && transition_group.getTransitions().empty())
425  {
426  total_intensity += intensity_sum;
427  }
428 
429  mrmFeature.addPrecursorFeature(f, chromatogram.getNativeID());
430  }
431 
432  mrmFeature.setRT(peak_apex);
433  mrmFeature.setIntensity(total_intensity);
434  mrmFeature.setMetaValue("PeptideRef", transition_group.getTransitionGroupID());
435  mrmFeature.setMetaValue("leftWidth", best_left);
436  mrmFeature.setMetaValue("rightWidth", best_right);
437  mrmFeature.setMetaValue("total_xic", total_xic);
438  mrmFeature.setMetaValue("peak_apices_sum", total_peak_apices);
439 
440  mrmFeature.ensureUniqueId();
441  return mrmFeature;
442  }
443 
444  // maybe private, but we have tests
445 
457  template <typename SpectrumT>
458  void remove_overlapping_features(std::vector<SpectrumT>& picked_chroms, double best_left, double best_right)
459  {
460  // delete all seeds that lie within the current seed
461  //std::cout << "Removing features for peak between " << best_left << " " << best_right << std::endl;
462  for (Size k = 0; k < picked_chroms.size(); k++)
463  {
464  for (Size i = 0; i < picked_chroms[k].size(); i++)
465  {
466  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
467  {
468  //std::cout << "For Chrom " << k << " removing peak " << picked_chroms[k][i].getMZ() << " l/r : " << picked_chroms[k].getFloatDataArrays()[1][i] << " " <<
469  // picked_chroms[k].getFloatDataArrays()[2][i] << " with int " << picked_chroms[k][i].getIntensity() <<std::endl;
470  picked_chroms[k][i].setIntensity(0.0);
471  }
472  }
473  }
474 
475  // delete all seeds that overlap within the current seed
476  for (Size k = 0; k < picked_chroms.size(); k++)
477  {
478  for (Size i = 0; i < picked_chroms[k].size(); i++)
479  {
480  if (picked_chroms[k][i].getIntensity() <= 0.0) {continue; }
481 
482  double left = picked_chroms[k].getFloatDataArrays()[1][i];
483  double right = picked_chroms[k].getFloatDataArrays()[2][i];
484  if ((left > best_left && left < best_right)
485  || (right > best_left && right < best_right))
486  {
487  //std::cout << "= For Chrom " << k << " removing contained peak " << picked_chroms[k][i].getMZ() << " l/r : " << picked_chroms[k].getFloatDataArrays()[1][i] << " " <<
488  // picked_chroms[k].getFloatDataArrays()[2][i] << " with int " << picked_chroms[k][i].getIntensity() <<std::endl;
489  picked_chroms[k][i].setIntensity(0.0);
490  }
491  }
492  }
493  }
494 
496  void findLargestPeak(std::vector<MSChromatogram >& picked_chroms, int& chr_idx, int& peak_idx);
497 
498 protected:
499 
501  void updateMembers_();
502 
504  MRMTransitionGroupPicker& operator=(const MRMTransitionGroupPicker& rhs);
505 
509  template <typename SpectrumT, typename TransitionT>
510  const SpectrumT& selectChromHelper_(MRMTransitionGroup<SpectrumT, TransitionT>& transition_group, String native_id)
511  {
512  if (transition_group.hasChromatogram(native_id))
513  {
514  return transition_group.getChromatogram(native_id);
515  }
516  else if (transition_group.hasPrecursorChromatogram(native_id))
517  {
518  return transition_group.getPrecursorChromatogram(native_id);
519  }
520  else
521  {
522  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Did not find chromatogram for id '" + native_id + "'.");
523  }
524  }
525 
542  template <typename SpectrumT, typename TransitionT>
544  std::vector<SpectrumT>& picked_chroms, const int chr_idx,
545  const double best_left, const double best_right, String& outlier)
546  {
547 
548  // Resample all chromatograms around the current estimated peak and
549  // collect the raw intensities. For resampling, use a bit more on either
550  // side to correctly identify shoulders etc.
551  double resample_boundary = resample_boundary_; // sample 15 seconds more on each side
552  SpectrumT master_peak_container;
553  const SpectrumT& ref_chromatogram = selectChromHelper_(transition_group, picked_chroms[chr_idx].getNativeID());
554  prepareMasterContainer_(ref_chromatogram, master_peak_container, best_left - resample_boundary, best_right + resample_boundary);
555  std::vector<std::vector<double> > all_ints;
556  for (Size k = 0; k < picked_chroms.size(); k++)
557  {
558  const SpectrumT chromatogram = selectChromHelper_(transition_group, picked_chroms[k].getNativeID());
559  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram,
560  master_peak_container, best_left - resample_boundary, best_right + resample_boundary);
561 
562  std::vector<double> int_here;
563  for (Size i = 0; i < used_chromatogram.size(); i++)
564  {
565  int_here.push_back(used_chromatogram[i].getIntensity());
566  }
567  all_ints.push_back(int_here);
568  }
569 
570  // Compute the cross-correlation for the collected intensities
571  std::vector<double> mean_shapes;
572  std::vector<double> mean_coel;
573  for (Size k = 0; k < all_ints.size(); k++)
574  {
575  std::vector<double> shapes;
576  std::vector<double> coel;
577  for (Size i = 0; i < all_ints.size(); i++)
578  {
579  if (i == k) {continue;}
580  std::map<int, double> res = OpenSwath::Scoring::normalizedCrossCorrelation(
581  all_ints[k], all_ints[i], boost::numeric_cast<int>(all_ints[i].size()), 1);
582 
583  // the first value is the x-axis (retention time) and should be an int -> it show the lag between the two
584  double res_coelution = std::abs(OpenSwath::Scoring::xcorrArrayGetMaxPeak(res)->first);
585  double res_shape = std::abs(OpenSwath::Scoring::xcorrArrayGetMaxPeak(res)->second);
586 
587  shapes.push_back(res_shape);
588  coel.push_back(res_coelution);
589  }
590 
591  // We have computed the cross-correlation of chromatogram k against
592  // all others. Use the mean of these computations as the value for k.
594  msc = std::for_each(shapes.begin(), shapes.end(), msc);
595  double shapes_mean = msc.mean();
596  msc = std::for_each(coel.begin(), coel.end(), msc);
597  double coel_mean = msc.mean();
598 
599  // mean shape scores below 0.5-0.6 should be a real sign of trouble ... !
600  // mean coel scores above 3.5 should be a real sign of trouble ... !
601  mean_shapes.push_back(shapes_mean);
602  mean_coel.push_back(coel_mean);
603  }
604 
605  // find the chromatogram with the minimal shape score and the maximal
606  // coelution score -> if it is the same chromatogram, the chance is
607  // pretty good that it is different from the others...
608  int min_index_shape = std::distance(mean_shapes.begin(), std::min_element(mean_shapes.begin(), mean_shapes.end()));
609  int max_index_coel = std::distance(mean_coel.begin(), std::max_element(mean_coel.begin(), mean_coel.end()));
610 
611  // Look at the picked peaks that are within the current left/right borders
612  int missing_peaks = 0;
613  int multiple_peaks = 0;
614 
615  // collect all seeds that lie within the current seed
616  std::vector<double> left_borders;
617  std::vector<double> right_borders;
618  for (Size k = 0; k < picked_chroms.size(); k++)
619  {
620  double l_tmp;
621  double r_tmp;
622  double max_int = -1;
623 
624  int pfound = 0;
625  l_tmp = -1;
626  r_tmp = -1;
627  for (Size i = 0; i < picked_chroms[k].size(); i++)
628  {
629  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
630  {
631  pfound++;
632  if (picked_chroms[k][i].getIntensity() > max_int)
633  {
634  max_int = picked_chroms[k][i].getIntensity() > max_int;
635  l_tmp = picked_chroms[k].getFloatDataArrays()[1][i];
636  r_tmp = picked_chroms[k].getFloatDataArrays()[2][i];
637  }
638  }
639  }
640 
641  if (l_tmp > 0.0) left_borders.push_back(l_tmp);
642  if (r_tmp > 0.0) right_borders.push_back(r_tmp);
643 
644  if (pfound == 0) missing_peaks++;
645  if (pfound > 1) multiple_peaks++;
646  }
647 
648  // Check how many chromatograms had exactly one peak picked between our
649  // current left/right borders -> this would be a sign of consistency.
650  LOG_DEBUG << " Overall found missing : " << missing_peaks << " and multiple : " << multiple_peaks << std::endl;
651 
653 
654  // Is there one transitions that is very different from the rest (e.g.
655  // the same element has a bad shape and a bad coelution score) -> potential outlier
656  if (min_index_shape == max_index_coel)
657  {
658  LOG_DEBUG << " element " << min_index_shape << " is a candidate for removal ... " << std::endl;
659  outlier = String(picked_chroms[min_index_shape].getNativeID());
660  }
661  else
662  {
663  outlier = "none";
664  }
665 
666  // For the final score (larger is better), consider these scores:
667  // - missing_peaks (the more peaks are missing, the worse)
668  // - multiple_peaks
669  // - mean of the shapes (1 is very good, 0 is bad)
670  // - mean of the co-elution scores (0 is good, 1 is ok, above 1 is pretty bad)
671  double shape_score = std::accumulate(mean_shapes.begin(), mean_shapes.end(), 0.0) / mean_shapes.size();
672  double coel_score = std::accumulate(mean_coel.begin(), mean_coel.end(), 0.0) / mean_coel.size();
673  coel_score = (coel_score - 1.0) / 2.0;
674 
675  double score = shape_score - coel_score - 1.0 * missing_peaks / picked_chroms.size();
676 
677  LOG_DEBUG << " computed score " << score << " (from " << shape_score <<
678  " - " << coel_score << " - " << 1.0 * missing_peaks / picked_chroms.size() << ")" << std::endl;
679 
680  return score;
681  }
682 
692  template <typename SpectrumT>
693  void recalculatePeakBorders_(std::vector<SpectrumT>& picked_chroms, double& best_left, double& best_right, double max_z)
694  {
695  // 1. Collect all seeds that lie within the current seed
696  // - Per chromatogram only the most intense one counts, otherwise very
697  // - low intense peaks can contribute disproportionally to the voting
698  // - procedure.
699  std::vector<double> left_borders;
700  std::vector<double> right_borders;
701  for (Size k = 0; k < picked_chroms.size(); k++)
702  {
703  double max_int = -1;
704  double left = -1;
705  double right = -1;
706  for (Size i = 0; i < picked_chroms[k].size(); i++)
707  {
708  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
709  {
710  if (picked_chroms[k].getFloatDataArrays()[0][i] > max_int)
711  {
712  max_int = picked_chroms[k].getFloatDataArrays()[0][i];
713  left = picked_chroms[k].getFloatDataArrays()[1][i];
714  right = picked_chroms[k].getFloatDataArrays()[2][i];
715  }
716  }
717  }
718  if (max_int > -1 )
719  {
720  left_borders.push_back(left);
721  right_borders.push_back(right);
722  LOG_DEBUG << " * " << k << " left boundary " << left_borders.back() << " with int " << max_int << std::endl;
723  LOG_DEBUG << " * " << k << " right boundary " << right_borders.back() << " with int " << max_int << std::endl;
724  }
725  }
726 
727  // Return for empty peak list
728  if (right_borders.empty())
729  {
730  return;
731  }
732 
733  // FEATURE IDEA: instead of Z-score use modified Z-score for small data sets
734  // http://d-scholarship.pitt.edu/7948/1/Seo.pdf
735  // http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
736  // 1. calculate median
737  // 2. MAD = calculate difference to median for each value -> take median of that
738  // 3. Mi = 0.6745*(xi - median) / MAD
739 
740  // 2. Calculate mean and standard deviation
741  // If the coefficient of variation is too large for one border, we use a
742  // "pseudo-median" instead of the border of the most intense peak.
743  double mean, stdev;
744 
745  // Right borders
746  mean = std::accumulate(right_borders.begin(), right_borders.end(), 0.0) / (double) right_borders.size();
747  stdev = std::sqrt(std::inner_product(right_borders.begin(), right_borders.end(), right_borders.begin(), 0.0)
748  / right_borders.size() - mean * mean);
749  std::sort(right_borders.begin(), right_borders.end());
750 
751  LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best "
752  << best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev
753  << " coefficient of variation" << std::endl;
754 
755  // Compare right borders of best transition with the mean
756  if (std::fabs(best_right - mean) / stdev > max_z)
757  {
758  best_right = right_borders[right_borders.size() / 2]; // pseudo median
759  LOG_DEBUG << " - Setting right boundary to " << best_right << std::endl;
760  }
761 
762  // Left borders
763  mean = std::accumulate(left_borders.begin(), left_borders.end(), 0.0) / (double) left_borders.size();
764  stdev = std::sqrt(std::inner_product(left_borders.begin(), left_borders.end(), left_borders.begin(), 0.0)
765  / left_borders.size() - mean * mean);
766  std::sort(left_borders.begin(), left_borders.end());
767 
768  LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best "
769  << best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev
770  << " coefficient of variation" << std::endl;
771 
772  // Compare left borders of best transition with the mean
773  if (std::fabs(best_left - mean) / stdev > max_z)
774  {
775  best_left = left_borders[left_borders.size() / 2]; // pseudo median
776  LOG_DEBUG << " - Setting left boundary to " << best_left << std::endl;
777  }
778 
779  }
780 
782 
783 
798  template <typename SpectrumT>
799  void prepareMasterContainer_(const SpectrumT& ref_chromatogram,
800  SpectrumT& master_peak_container, double left_boundary, double right_boundary)
801  {
802  OPENMS_PRECONDITION(master_peak_container.empty(), "Master peak container must be empty")
803 
804  // get the start / end point of this chromatogram => then add one more
805  // point beyond the two boundaries to make the resampling accurate also
806  // at the edge.
807  typename SpectrumT::const_iterator begin = ref_chromatogram.begin();
808  while (begin != ref_chromatogram.end() && begin->getMZ() < left_boundary) {begin++; }
809  if (begin != ref_chromatogram.begin()) {begin--; }
810 
811  typename SpectrumT::const_iterator end = begin;
812  while (end != ref_chromatogram.end() && end->getMZ() < right_boundary) {end++; }
813  if (end != ref_chromatogram.end()) {end++; }
814 
815  // resize the master container and set the m/z values to the ones of the master container
816  master_peak_container.resize(distance(begin, end)); // initialize to zero
817  typename SpectrumT::iterator it = master_peak_container.begin();
818  for (typename SpectrumT::const_iterator chrom_it = begin; chrom_it != end; chrom_it++, it++)
819  {
820  it->setMZ(chrom_it->getMZ());
821  }
822  }
823 
834  template <typename SpectrumT>
835  SpectrumT resampleChromatogram_(const SpectrumT& chromatogram,
836  const SpectrumT& master_peak_container, double left_boundary, double right_boundary)
837  {
838  // get the start / end point of this chromatogram => then add one more
839  // point beyond the two boundaries to make the resampling accurate also
840  // at the edge.
841  typename SpectrumT::const_iterator begin = chromatogram.begin();
842  while (begin != chromatogram.end() && begin->getMZ() < left_boundary) {begin++;}
843  if (begin != chromatogram.begin()) {begin--;}
844 
845  typename SpectrumT::const_iterator end = begin;
846  while (end != chromatogram.end() && end->getMZ() < right_boundary) {end++;}
847  if (end != chromatogram.end()) {end++;}
848 
849  SpectrumT resampled_peak_container = master_peak_container; // copy the master container, which contains the RT values
850  LinearResamplerAlign lresampler;
851  lresampler.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end());
852 
853  return resampled_peak_container;
854  }
855 
857 
864  void calculateBgEstimation_(const MSChromatogram& chromatogram,
865  double best_left, double best_right, double & background, double & avg_noise_level);
866 
867  // Members
872  double min_qual_;
873 
879  };
880 }
881 
882 #endif // OPENMS_ANALYSIS_OPENSWATH_MRMTRANSITIONGROUPPICKER_H
883 
const double k
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
bool hasTransition(String key) const
Definition: MRMTransitionGroup.h:159
double min_qual_
Definition: MRMTransitionGroupPicker.h:872
String background_subtraction_
Definition: MRMTransitionGroupPicker.h:868
double recalculate_peaks_max_z_
Definition: MRMTransitionGroupPicker.h:877
A more convenient string class.
Definition: String.h:57
bool compute_peak_quality_
Definition: MRMTransitionGroupPicker.h:871
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:203
double mean() const
Definition: StatsHelpers.h:208
The representation of a chromatogram.
Definition: MSChromatogram.h:55
OPENSWATHALGO_DLLAPI XCorrArrayType normalizedCrossCorrelation(std::vector< double > &data1, std::vector< double > &data2, int maxdelay, int lag)
const std::vector< TransitionType > & getTransitions() const
Definition: MRMTransitionGroup.h:143
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:107
std::vector< PointType > PointArrayType
Definition: ConvexHull2D.h:77
bool hasPrecursorChromatogram(String key) const
Definition: MRMTransitionGroup.h:237
const std::vector< ChromatogramType > & getPrecursorChromatograms() const
Definition: MRMTransitionGroup.h:209
The PeakPickerMRM finds peaks a single chromatogram.
Definition: PeakPickerMRM.h:63
int stop_after_feature_
Definition: MRMTransitionGroupPicker.h:874
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
const std::vector< ConvexHull2D > & getConvexHulls() const
Non-mutable access to the convex hulls.
IntensityType getIntensity() const
Definition: Peak2D.h:167
void setParameters(const Param &param)
Sets the parameters.
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling...
Definition: ConvexHull2D.h:73
ChromatogramType & getChromatogram(String key)
Definition: MRMTransitionGroup.h:198
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:135
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string (or DataValue::EMPTY if not found)
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:173
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
void sortByIntensity(bool reverse=false)
Lexicographically sorts the peaks by their intensity.
double min_peak_width_
Definition: MRMTransitionGroupPicker.h:876
double resample_boundary_
Definition: MRMTransitionGroupPicker.h:878
const SpectrumT & selectChromHelper_(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, String native_id)
Select matching precursor or fragment ion chromatogram.
Definition: MRMTransitionGroupPicker.h:510
bool isInternallyConsistent() const
Check whether internal state is consistent, e.g. same number of chromatograms and transitions are pre...
Definition: MRMTransitionGroup.h:274
bool hasChromatogram(String key) const
Definition: MRMTransitionGroup.h:193
const std::vector< ChromatogramType > & getChromatograms() const
Definition: MRMTransitionGroup.h:175
A method or algorithm argument contains illegal values.
Definition: Exception.h:649
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:215
The MRMTransitionGroupPicker finds peaks in chromatograms that belong to the same precursors...
Definition: MRMTransitionGroupPicker.h:79
bool recalculate_peaks_
Definition: MRMTransitionGroupPicker.h:869
void setQuality(Size index, QualityType q)
Set the quality in dimension c.
void raster(SpecT &container)
Applies the resampling algorithm to a container (MSSpectrum or MSChromatogram).
Definition: LinearResamplerAlign.h:81
Linear Resampling of raw data with alignment.
Definition: LinearResamplerAlign.h:58
void prepareMasterContainer_(const SpectrumT &ref_chromatogram, SpectrumT &master_peak_container, double left_boundary, double right_boundary)
Create an empty master peak container that has the correct mz / RT values set.
Definition: MRMTransitionGroupPicker.h:799
void setHullPoints(const PointArrayType &points)
accessor for the outer(!) points (no checking is performed if this is actually a convex hull) ...
const String & getTransitionGroupID() const
Definition: MRMTransitionGroup.h:131
double computeQuality_(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, std::vector< SpectrumT > &picked_chroms, const int chr_idx, const double best_left, const double best_right, String &outlier)
Compute transition group quality (higher score is better)
Definition: MRMTransitionGroupPicker.h:543
An LC-MS feature.
Definition: Feature.h:70
ChromatogramType & getPrecursorChromatogram(String key)
Definition: MRMTransitionGroup.h:242
functor to compute the mean and stddev of sequence using the std::foreach algorithm ...
Definition: StatsHelpers.h:170
bool isSorted() const
Checks if all peaks are sorted with respect to ascending RT.
void addFeature(MRMFeature &feature)
Definition: MRMTransitionGroup.h:263
void setOverallQuality(QualityType q)
Set the overall quality.
SpectrumT resampleChromatogram_(const SpectrumT &chromatogram, const SpectrumT &master_peak_container, double left_boundary, double right_boundary)
Resample a container at the positions indicated by the master peak container.
Definition: MRMTransitionGroupPicker.h:835
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software. ...
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
void sortByPosition()
Lexicographically sorts the peaks by their position.
void pickTransitionGroup(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group)
Pick a group of chromatograms belonging to the same peptide.
Definition: MRMTransitionGroupPicker.h:113
bool chromatogramIdsMatch()
Ensure that chromatogram native ids match their keys in the map.
Definition: MRMTransitionGroup.h:283
bool use_precursors_
Definition: MRMTransitionGroupPicker.h:870
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void remove_overlapping_features(std::vector< SpectrumT > &picked_chroms, double best_left, double best_right)
Remove overlapping features.
Definition: MRMTransitionGroupPicker.h:458
MRMFeature createMRMFeature(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, std::vector< SpectrumT > &picked_chroms, const int chr_idx, const int peak_idx)
Create feature from a vector of chromatograms and a specified peak.
Definition: MRMTransitionGroupPicker.h:210
OPENSWATHALGO_DLLAPI XCorrArrayType::iterator xcorrArrayGetMaxPeak(XCorrArrayType &array)
Find best peak in an cross-correlation (highest apex)
double stop_after_intensity_ratio_
Definition: MRMTransitionGroupPicker.h:875
A multi-chromatogram MRM feature.
Definition: MRMFeature.h:50
void recalculatePeakBorders_(std::vector< SpectrumT > &picked_chroms, double &best_left, double &best_right, double max_z)
Recalculate the borders of the peak.
Definition: MRMTransitionGroupPicker.h:693
Not implemented exception.
Definition: Exception.h:437
const TransitionType & getTransition(String key)
Definition: MRMTransitionGroup.h:164

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:02 using doxygen 1.8.13