OpenMS  2.8.0
PeakPickerIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 
42 // #define DEBUG_PEAK_PICKING
43 
44 namespace OpenMS
45 {
46 
52  {
53  int index;
55 
57  double leftWidth;
58  double rightWidth;
59  float mz;
60  };
61 
62  bool sort_peaks_by_intensity(const PeakCandidate& a, const PeakCandidate& b); // prototype
64  {
66  }
67 
97  class OPENMS_DLLAPI PeakPickerIterative :
98  public DefaultParamHandler,
99  public ProgressLogger
100  {
101 
102 private:
104  double peak_width_;
108  double sn_win_len_;
110 
111 public:
112 
115  DefaultParamHandler("PeakPickerIterative"),
117  {
118  defaults_.setValue("signal_to_noise_", 1.0, "Signal to noise value, each peak is required to be above this value (turn off by setting it to 0.0)");
119  defaults_.setValue("peak_width", 0.0, "Expected peak width half width in Dalton - peaks will be extended until this half width is reached (even if the intensitity is increasing). In conjunction with check_width_internally it will also be used to remove peaks whose spacing is larger than this value.");
120 
121 
122  defaults_.setValue("spacing_difference", 1.5, "Difference between peaks in multiples of the minimal difference to continue. The higher this value is set, the further apart peaks are allowed to be to still extend a peak. E.g. if the value is set to 1.5 and in a current peak the minimal spacing between peaks is 10 mDa, then only peaks at most 15 mDa apart will be added to the peak.", {"advanced"});
123  defaults_.setValue("sn_bin_count_", 30, "Bin count for the Signal to Noise estimation.", {"advanced"});
124  defaults_.setValue("nr_iterations_", 5, "Nr of iterations to perform (how many times the peaks are re-centered).", {"advanced"});
125  defaults_.setMinInt("nr_iterations_", 1);
126  defaults_.setValue("sn_win_len_", 20.0, "Window length for the Signal to Noise estimation.", {"advanced"});
127 
128  defaults_.setValue("check_width_internally", "false", "Delete peaks where the spacing is larger than the peak width (should be set to true to avoid artefacts)", {"advanced"});
129  defaults_.setValidStrings("check_width_internally", {"true","false"});
130 
131  defaults_.setValue("ms1_only", "false", "Only do MS1");
132  defaults_.setValidStrings("ms1_only", {"true","false"});
133  defaults_.setValue("clear_meta_data", "false", "Delete meta data about peak width");
134  defaults_.setValidStrings("clear_meta_data", {"true","false"});
135 
136  // write defaults into Param object param_
137  defaultsToParam_();
138  }
139 
140  void updateMembers_() override
141  {
142  signal_to_noise_ = (double)param_.getValue("signal_to_noise_");
143  peak_width_ = (double)param_.getValue("peak_width");
144  spacing_difference_ = (double)param_.getValue("spacing_difference");
145  sn_bin_count_ = (double)param_.getValue("sn_bin_count_");
146  nr_iterations_ = (double)param_.getValue("nr_iterations_");
147  sn_win_len_ = (double)param_.getValue("sn_win_len_");
148 
149  check_width_internally_ = param_.getValue("check_width_internally").toBool();
150  }
151 
153  ~PeakPickerIterative() override {}
154 
155 private:
156 
157  /*
158  * This will re-center the peaks by using the seeds (ordered by intensity) to
159  * find raw signals that may belong to this peak. Then the peak is centered
160  * using a weighted average.
161  * Signals are added to the peak as long as they are still inside the
162  * peak_width or as long as the signal intensity keeps falling. Also the
163  * distance to the previous signal and the whether the signal is below the
164  * noise level is taken into account.
165  * This function implements a single iteration of this algorithm.
166  *
167  */
168  void pickRecenterPeaks_(const MSSpectrum& input,
169  std::vector<PeakCandidate>& PeakCandidates,
171  {
172  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
173  {
174  int i = PeakCandidates[peak_it].index;
175  double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
176  double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
177  double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
178 
179  // MZ spacing sanity checks
180  double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
181  double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
182  double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
183  double est_peak_width = peak_width_;
184 
185  if (check_width_internally_ && (left_to_central > est_peak_width || central_to_right > est_peak_width))
186  {
187  // something has gone wrong, the points are further away than the peak width -> delete this peak
188  PeakCandidates[peak_it].integrated_intensity = -1;
189  PeakCandidates[peak_it].leftWidth = -1;
190  PeakCandidates[peak_it].rightWidth = -1;
191  PeakCandidates[peak_it].mz = -1;
192  continue;
193  }
194 
195  std::map<double, double> peak_raw_data;
196 
197  peak_raw_data[central_peak_mz] = central_peak_int;
198  peak_raw_data[left_neighbor_mz] = left_neighbor_int;
199  peak_raw_data[right_neighbor_mz] = right_neighbor_int;
200 
201  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
202  // COPY - from PeakPickerHiRes
203  // peak core found, now extend it to the left
204  Size k = 2;
205  while ((i - k + 1) > 0
206  && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < spacing_difference_ * min_spacing
207  && (input[i - k].getIntensity() < peak_raw_data.begin()->second
208  || std::fabs(input[i - k].getMZ() - central_peak_mz) < est_peak_width)
209  )
210  {
211  if (signal_to_noise_ > 0.0)
212  {
213  if (snt.getSignalToNoise(i - k) < signal_to_noise_)
214  {
215  break;
216  }
217  }
218  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
219  ++k;
220  }
221  double leftborder = input[i - k + 1].getMZ();
222 
223  // to the right
224  k = 2;
225  while ((i + k) < input.size()
226  && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference_ * min_spacing
227  && (input[i + k].getIntensity() < peak_raw_data.rbegin()->second
228  || std::fabs(input[i + k].getMZ() - central_peak_mz) < est_peak_width)
229  )
230  {
231  if (signal_to_noise_ > 0.0)
232  {
233  if (snt.getSignalToNoise(i + k) < signal_to_noise_)
234  {
235  break;
236  }
237  }
238 
239  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
240  ++k;
241  }
242  // END COPY - from PeakPickerHiRes
243  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
244 
245  double rightborder = input[i + k - 1].getMZ();
246 
247  double weighted_mz = 0;
248  double integrated_intensity = 0;
249  for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
250  {
251  weighted_mz += map_it->first * map_it->second;
252  integrated_intensity += map_it->second;
253  }
254  weighted_mz /= integrated_intensity;
255 
256  // store the data
257  PeakCandidates[peak_it].integrated_intensity = integrated_intensity;
258  PeakCandidates[peak_it].leftWidth = leftborder;
259  PeakCandidates[peak_it].rightWidth = rightborder;
260  PeakCandidates[peak_it].mz = weighted_mz;
261 
262  // find the closest raw signal peak to where we just put our peak and store it
263  double min_diff = std::fabs(weighted_mz - input[i].getMZ());
264  int min_i = i;
265 
266  // Search to the left
267  for (int m = 1; i - m > 0 && leftborder < input[i - m].getMZ(); m++)
268  {
269  if (std::fabs(weighted_mz - input[i - m].getMZ()) < min_diff)
270  {
271  min_diff = std::fabs(weighted_mz - input[i - m].getMZ());
272  min_i = i - m;
273  }
274  }
275  // Search to the right
276  for (int m = 1; i - m > 0 && rightborder > input[i + m].getMZ(); m++)
277  {
278  if (std::fabs(weighted_mz - input[i + m].getMZ()) < min_diff)
279  {
280  min_diff = std::fabs(weighted_mz - input[i + m].getMZ());
281  min_i = i + m;
282  }
283  }
284  PeakCandidates[peak_it].index = min_i;
285  }
286  }
287 
288 public:
289 
290  /*
291  This will pick one single spectrum. The PeakPickerHiRes is used to
292  generate seeds, these seeds are then used to re-center the mass and
293  compute peak width and integrated intensity of the peak.
294 
295  Finally, other peaks that would fall within the primary peak are
296  discarded
297 
298  The output are the remaining peaks.
299  */
300  void pick(const MSSpectrum& input, MSSpectrum& output)
301  {
302  // don't pick a spectrum with less than 3 data points
303  if (input.size() < 3) return;
304 
305  // copy the spectrum meta data
306  copySpectrumMeta(input, output);
308  output.getFloatDataArrays().clear();
309 
310  std::vector<PeakCandidate> PeakCandidates;
311  MSSpectrum picked_spectrum;
312 
313  // Use the PeakPickerHiRes to find candidates ...
315  Param pepi_param = OpenMS::PeakPickerHiRes().getDefaults();
316  pepi_param.setValue("signal_to_noise", signal_to_noise_);
317  pepi_param.setValue("spacing_difference", spacing_difference_);
318  pp.setParameters(pepi_param);
319  pp.pick(input, picked_spectrum);
320 
321  // after picking peaks, we store the closest index of the raw spectrum and the picked intensity
322  std::vector<PeakCandidate> newPeakCandidates_;
323  Size j = 0;
324  OPENMS_LOG_DEBUG << "Candidates " << picked_spectrum.size() << std::endl;
325  for (Size k = 0; k < input.size() && j < picked_spectrum.size(); k++)
326  {
327  if (input[k].getMZ() > picked_spectrum[j].getMZ())
328  {
329  OPENMS_LOG_DEBUG << "got a value " << k << " @ " << input[k] << std::endl;
330  PeakCandidate pc = { /*.index=*/ static_cast<int>(k), /*.intensity=*/ picked_spectrum[j].getIntensity(), -1, -1, -1, -1};
331  newPeakCandidates_.push_back(pc);
332  j++;
333  }
334  }
335 
336  PeakCandidates = newPeakCandidates_;
337  std::sort(PeakCandidates.begin(), PeakCandidates.end(), sort_peaks_by_intensity);
338 
339  // signal-to-noise estimation
341  if (signal_to_noise_ > 0.0)
342  {
343  Param snt_parameters = snt.getParameters();
344  snt_parameters.setValue("win_len", sn_win_len_);
345  snt_parameters.setValue("bin_count", sn_bin_count_);
346  snt.setParameters(snt_parameters);
347  snt.init(input);
348  }
349 
350  // The peak candidates are re-centered and the width is computed for each peak
351  for (int i = 0; i < nr_iterations_; i++)
352  {
353  pickRecenterPeaks_(input, PeakCandidates, snt);
354  }
355 
356  output.getFloatDataArrays().resize(3);
357  output.getFloatDataArrays()[0].setName("IntegratedIntensity");
358  output.getFloatDataArrays()[1].setName("leftWidth");
359  output.getFloatDataArrays()[2].setName("rightWidth");
360 
361  // Go through all candidates and exclude all lower-intensity candidates
362  // that are within the borders of another peak
363  OPENMS_LOG_DEBUG << "Will now merge candidates" << std::endl;
364  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
365  {
366  if (PeakCandidates[peak_it].leftWidth < 0) continue;
367 
368  //Remove all peak candidates that are enclosed by this peak
369  for (Size m = peak_it + 1; m < PeakCandidates.size(); m++)
370  {
371  if (PeakCandidates[m].mz >= PeakCandidates[peak_it].leftWidth && PeakCandidates[m].mz <= PeakCandidates[peak_it].rightWidth)
372  {
373  OPENMS_LOG_DEBUG << "Remove peak " << m << " : " << PeakCandidates[m].mz << " " <<
374  PeakCandidates[m].peak_apex_intensity << " (too close to " << PeakCandidates[peak_it].mz <<
375  " " << PeakCandidates[peak_it].peak_apex_intensity << ")" << std::endl;
376  PeakCandidates[m].leftWidth = PeakCandidates[m].rightWidth = -1;
377  }
378  }
379 
380  Peak1D peak;
381  peak.setMZ(PeakCandidates[peak_it].mz);
382  peak.setIntensity(PeakCandidates[peak_it].integrated_intensity);
383  output.push_back(peak);
384 
385  OPENMS_LOG_DEBUG << "Push peak " << peak_it << " " << peak << std::endl;
386  output.getFloatDataArrays()[0].push_back(PeakCandidates[peak_it].integrated_intensity);
387  output.getFloatDataArrays()[1].push_back(PeakCandidates[peak_it].leftWidth);
388  output.getFloatDataArrays()[2].push_back(PeakCandidates[peak_it].rightWidth);
389  }
390 
391  OPENMS_LOG_DEBUG << "Found seeds: " << PeakCandidates.size() << " / Found peaks: " << output.size() << std::endl;
392  output.sortByPosition();
393  }
394 
395  void pickExperiment(const PeakMap& input, PeakMap& output)
396  {
397  // make sure that output is clear
398  output.clear(true);
399 
400  // copy experimental settings
401  static_cast<ExperimentalSettings&>(output) = input;
402 
403  // resize output with respect to input
404  output.resize(input.size());
405 
406  bool ms1_only = param_.getValue("ms1_only").toBool();
407  bool clear_meta_data = param_.getValue("clear_meta_data").toBool();
408 
409  Size progress = 0;
410  startProgress(0, input.size(), "picking peaks");
411  for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
412  {
413  if (ms1_only && (input[scan_idx].getMSLevel() != 1))
414  {
415  output[scan_idx] = input[scan_idx];
416  }
417  else
418  {
419  pick(input[scan_idx], output[scan_idx]);
420  if (clear_meta_data) {output[scan_idx].getFloatDataArrays().clear();}
421  }
422  setProgress(progress++);
423  }
424  endProgress();
425  }
426 
427  };
428 
429 }
430 
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:470
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
const Param & getParameters() const
Non-mutable access to the parameters.
void setParameters(const Param &param)
Sets the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
Description of the experimental settings.
Definition: ExperimentalSettings.h:62
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:73
void resize(Size s)
Definition: MSExperiment.h:125
Size size() const
Definition: MSExperiment.h:120
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
void sortByPosition()
Lexicographically sorts the peaks by their position.
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:104
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:113
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition: PeakPickerHiRes.h:76
void pick(const MSSpectrum &input, MSSpectrum &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
This class implements a peak-picking algorithm for high-resolution MS data (specifically designed for...
Definition: PeakPickerIterative.h:100
void pickExperiment(const PeakMap &input, PeakMap &output)
Definition: PeakPickerIterative.h:395
int nr_iterations_
Definition: PeakPickerIterative.h:107
double sn_win_len_
Definition: PeakPickerIterative.h:108
PeakPickerIterative()
Constructor.
Definition: PeakPickerIterative.h:114
~PeakPickerIterative() override
Destructor.
Definition: PeakPickerIterative.h:153
double spacing_difference_
Definition: PeakPickerIterative.h:105
bool check_width_internally_
Definition: PeakPickerIterative.h:109
double signal_to_noise_
Definition: PeakPickerIterative.h:103
int sn_bin_count_
Definition: PeakPickerIterative.h:106
double peak_width_
Definition: PeakPickerIterative.h:104
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: PeakPickerIterative.h:140
void pick(const MSSpectrum &input, MSSpectrum &output)
Definition: PeakPickerIterative.h:300
void pickRecenterPeaks_(const MSSpectrum &input, std::vector< PeakCandidate > &PeakCandidates, SignalToNoiseEstimatorMedian< MSSpectrum > &snt) const
Definition: PeakPickerIterative.h:168
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:83
virtual void init(const Container &c)
Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimat...
Definition: SignalToNoiseEstimator.h:101
virtual double getSignalToNoise(const Size index) const
Definition: SignalToNoiseEstimator.h:109
void setType(SpectrumType type)
sets the spectrum type
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double k
Definition: Constants.h:153
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double leftWidth
Definition: PeakPickerIterative.h:57
bool sort_peaks_by_intensity(const PeakCandidate &a, const PeakCandidate &b)
Definition: PeakPickerIterative.h:63
double peak_apex_intensity
Definition: PeakPickerIterative.h:54
int index
Definition: PeakPickerIterative.h:53
double integrated_intensity
Definition: PeakPickerIterative.h:56
void copySpectrumMeta(const MSSpectrum &input, MSSpectrum &output, bool clear_spectrum=true)
Copies only the meta data contained in the input spectrum to the output spectrum.
float mz
Definition: PeakPickerIterative.h:59
double rightWidth
Definition: PeakPickerIterative.h:58
A small structure to hold peak candidates.
Definition: PeakPickerIterative.h:52