Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
PeakPickerIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERITERATIVE_H
36 #define OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERITERATIVE_H
37 
41 
42 // #define DEBUG_PEAK_PICKING
43 
44 namespace OpenMS
45 {
46 
52  {
53  int index;
55 
57  double leftWidth;
58  double rightWidth;
59  float mz;
60  };
61 
62  bool sort_peaks_by_intensity(const PeakCandidate& a, const PeakCandidate& b); // prototype
64  {
66  }
67 
97  class OPENMS_DLLAPI PeakPickerIterative :
98  public DefaultParamHandler,
99  public ProgressLogger
100  {
101 
102 private:
104  double peak_width_;
108  double sn_win_len_;
110 
111 public:
112 
115  DefaultParamHandler("PeakPickerIterative"),
117  {
118  defaults_.setValue("signal_to_noise_", 1.0, "Signal to noise value, each peak is required to be above this value (turn off by setting it to 0.0)");
119  defaults_.setValue("peak_width", 0.0, "Expected peak width half width in Dalton - peaks will be extended until this half width is reached (even if the intensitity is increasing). In conjunction with check_width_internally it will also be used to remove peaks whose spacing is larger than this value.");
120 
121 
122  defaults_.setValue("spacing_difference", 1.5, "Difference between peaks in multiples of the minimal difference to continue. The higher this value is set, the further apart peaks are allowed to be to still extend a peak. E.g. if the value is set to 1.5 and in a current peak the minimal spacing between peaks is 10 mDa, then only peaks at most 15 mDa apart will be added to the peak.", ListUtils::create<String>("advanced"));
123  defaults_.setValue("sn_bin_count_", 30, "Bin count for the Signal to Noise estimation.", ListUtils::create<String>("advanced"));
124  defaults_.setValue("nr_iterations_", 5, "Nr of iterations to perform (how many times the peaks are re-centered).", ListUtils::create<String>("advanced"));
125  defaults_.setMinInt("nr_iterations_", 1);
126  defaults_.setValue("sn_win_len_", 20.0, "Window length for the Signal to Noise estimation.", ListUtils::create<String>("advanced"));
127 
128  defaults_.setValue("check_width_internally", "false", "Delete peaks where the spacing is larger than the peak width (should be set to true to avoid artefacts)", ListUtils::create<String>("advanced"));
129  defaults_.setValidStrings("check_width_internally", ListUtils::create<String>("true,false"));
130 
131  defaults_.setValue("ms1_only", "false", "Only do MS1");
132  defaults_.setValidStrings("ms1_only", ListUtils::create<String>("true,false"));
133  defaults_.setValue("clear_meta_data", "false", "Delete meta data about peak width");
134  defaults_.setValidStrings("clear_meta_data", ListUtils::create<String>("true,false"));
135 
136  // write defaults into Param object param_
137  defaultsToParam_();
138  }
139 
141  {
142  signal_to_noise_ = (double)param_.getValue("signal_to_noise_");
143  peak_width_ = (double)param_.getValue("peak_width");
144  spacing_difference_ = (double)param_.getValue("spacing_difference");
145  sn_bin_count_ = (double)param_.getValue("sn_bin_count_");
146  nr_iterations_ = (double)param_.getValue("nr_iterations_");
147  sn_win_len_ = (double)param_.getValue("sn_win_len_");
148 
149  check_width_internally_ = param_.getValue("check_width_internally").toBool();
150  }
151 
154 
155 private:
156 
157  /*
158  * This will re-center the peaks by using the seeds (ordered by intensity) to
159  * find raw signals that may belong to this peak. Then the peak is centered
160  * using a weighted average.
161  * Signals are added to the peak as long as they are still inside the
162  * peak_width or as long as the signal intensity keeps falling. Also the
163  * distance to the previous signal and the whether the signal is below the
164  * noise level is taken into account.
165  * This function implements a single iteration of this algorithm.
166  *
167  */
168  void pickRecenterPeaks_(const MSSpectrum& input,
169  std::vector<PeakCandidate>& PeakCandidates,
171  {
172  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
173  {
174  int i = PeakCandidates[peak_it].index;
175  double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
176  double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
177  double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
178 
179  // MZ spacing sanity checks
180  double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
181  double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
182  double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
183  double est_peak_width = peak_width_;
184 
185  if (check_width_internally_ && (left_to_central > est_peak_width || central_to_right > est_peak_width))
186  {
187  // something has gone wrong, the points are further away than the peak width -> delete this peak
188  PeakCandidates[peak_it].integrated_intensity = -1;
189  PeakCandidates[peak_it].leftWidth = -1;
190  PeakCandidates[peak_it].rightWidth = -1;
191  PeakCandidates[peak_it].mz = -1;
192  continue;
193  }
194 
195  std::map<double, double> peak_raw_data;
196 
197  peak_raw_data[central_peak_mz] = central_peak_int;
198  peak_raw_data[left_neighbor_mz] = left_neighbor_int;
199  peak_raw_data[right_neighbor_mz] = right_neighbor_int;
200 
201  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
202  // COPY - from PeakPickerHiRes
203  // peak core found, now extend it to the left
204  Size k = 2;
205  while ((i - k + 1) > 0
206  && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < spacing_difference_ * min_spacing
207  && (input[i - k].getIntensity() < peak_raw_data.begin()->second
208  || std::fabs(input[i - k].getMZ() - central_peak_mz) < est_peak_width)
209  )
210  {
211  if (signal_to_noise_ > 0.0)
212  {
213  if (snt.getSignalToNoise(input[i - k]) < signal_to_noise_)
214  {
215  break;
216  }
217  }
218  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
219  ++k;
220  }
221  double leftborder = input[i - k + 1].getMZ();
222 
223  // to the right
224  k = 2;
225  while ((i + k) < input.size()
226  && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference_ * min_spacing
227  && (input[i + k].getIntensity() < peak_raw_data.rbegin()->second
228  || std::fabs(input[i + k].getMZ() - central_peak_mz) < est_peak_width)
229  )
230  {
231  if (signal_to_noise_ > 0.0)
232  {
233  if (snt.getSignalToNoise(input[i + k]) < signal_to_noise_)
234  {
235  break;
236  }
237  }
238 
239  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
240  ++k;
241  }
242  // END COPY - from PeakPickerHiRes
243  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
244 
245  double rightborder = input[i + k - 1].getMZ();
246 
247  double weighted_mz = 0;
248  double integrated_intensity = 0;
249  for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
250  {
251  weighted_mz += map_it->first * map_it->second;
252  integrated_intensity += map_it->second;
253  }
254  weighted_mz /= integrated_intensity;
255 
256  // store the data
257  PeakCandidates[peak_it].integrated_intensity = integrated_intensity;
258  PeakCandidates[peak_it].leftWidth = leftborder;
259  PeakCandidates[peak_it].rightWidth = rightborder;
260  PeakCandidates[peak_it].mz = weighted_mz;
261 
262  // find the closest raw signal peak to where we just put our peak and store it
263  double min_diff = std::fabs(weighted_mz - input[i].getMZ());
264  int min_i = i;
265 
266  // Search to the left
267  for (int m = 1; i - m > 0 && leftborder < input[i - m].getMZ(); m++)
268  {
269  if (std::fabs(weighted_mz - input[i - m].getMZ()) < min_diff)
270  {
271  min_diff = std::fabs(weighted_mz - input[i - m].getMZ());
272  min_i = i - m;
273  }
274  }
275  // Search to the right
276  for (int m = 1; i - m > 0 && rightborder > input[i + m].getMZ(); m++)
277  {
278  if (std::fabs(weighted_mz - input[i + m].getMZ()) < min_diff)
279  {
280  min_diff = std::fabs(weighted_mz - input[i + m].getMZ());
281  min_i = i + m;
282  }
283  }
284  PeakCandidates[peak_it].index = min_i;
285  }
286  }
287 
288 public:
289 
290  /*
291  * This will pick one single spectrum. The PeakPickerHiRes is used to
292  * generate seeds, these seeds are then used to re-center the mass and
293  * compute peak width and integrated intensity of the peak.
294  *
295  * Finally, other peaks that would fall within the primary peak are
296  * discarded
297  *
298  * The output are the remaining peaks.
299  */
300  void pick(const MSSpectrum& input, MSSpectrum& output)
301  {
302  // don't pick a spectrum with less than 3 data points
303  if (input.size() < 3) return;
304 
305  // copy meta data of the input spectrum
306  output.clear(true);
307  output.SpectrumSettings::operator=(input);
308  output.MetaInfoInterface::operator=(input);
309  output.setRT(input.getRT());
310  output.setMSLevel(input.getMSLevel());
311  output.setName(input.getName());
313  output.getFloatDataArrays().clear();
314 
315  std::vector<PeakCandidate> PeakCandidates;
316  MSSpectrum picked_spectrum;
317 
318  // Use the PeakPickerHiRes to find candidates ...
320  Param pepi_param = OpenMS::PeakPickerHiRes().getDefaults();
321  pepi_param.setValue("signal_to_noise", signal_to_noise_);
322  pepi_param.setValue("spacing_difference", spacing_difference_);
323  pp.setParameters(pepi_param);
324  pp.pick(input, picked_spectrum);
325 
326  // after picking peaks, we store the closest index of the raw spectrum and the picked intensity
327  std::vector<PeakCandidate> newPeakCandidates_;
328  Size j = 0;
329  LOG_DEBUG << "Candidates " << picked_spectrum.size() << std::endl;
330  for (Size k = 0; k < input.size() && j < picked_spectrum.size(); k++)
331  {
332  if (input[k].getMZ() > picked_spectrum[j].getMZ())
333  {
334  LOG_DEBUG << "got a value " << k << " @ " << input[k] << std::endl;
335  PeakCandidate pc = { /*.index=*/ static_cast<int>(k), /*.intensity=*/ picked_spectrum[j].getIntensity(), -1, -1, -1, -1};
336  newPeakCandidates_.push_back(pc);
337  j++;
338  }
339  }
340 
341  PeakCandidates = newPeakCandidates_;
342  std::sort(PeakCandidates.begin(), PeakCandidates.end(), sort_peaks_by_intensity);
343 
344  // signal-to-noise estimation
346  if (signal_to_noise_ > 0.0)
347  {
348  Param snt_parameters = snt.getParameters();
349  snt_parameters.setValue("win_len", sn_win_len_);
350  snt_parameters.setValue("bin_count", sn_bin_count_);
351  snt.setParameters(snt_parameters);
352  snt.init(input);
353  }
354 
355  // The peak candidates are re-centered and the width is computed for each peak
356  for (int i = 0; i < nr_iterations_; i++)
357  {
358  pickRecenterPeaks_(input, PeakCandidates, snt);
359  }
360 
361  output.getFloatDataArrays().resize(3);
362  output.getFloatDataArrays()[0].setName("IntegratedIntensity");
363  output.getFloatDataArrays()[1].setName("leftWidth");
364  output.getFloatDataArrays()[2].setName("rightWidth");
365 
366  // Go through all candidates and exclude all lower-intensity candidates
367  // that are within the borders of another peak
368  LOG_DEBUG << "Will now merge candidates" << std::endl;
369  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
370  {
371  if (PeakCandidates[peak_it].leftWidth < 0) continue;
372 
373  //Remove all peak candidates that are enclosed by this peak
374  for (Size m = peak_it + 1; m < PeakCandidates.size(); m++)
375  {
376  if (PeakCandidates[m].mz >= PeakCandidates[peak_it].leftWidth && PeakCandidates[m].mz <= PeakCandidates[peak_it].rightWidth)
377  {
378  LOG_DEBUG << "Remove peak " << m << " : " << PeakCandidates[m].mz << " " <<
379  PeakCandidates[m].peak_apex_intensity << " (too close to " << PeakCandidates[peak_it].mz <<
380  " " << PeakCandidates[peak_it].peak_apex_intensity << ")" << std::endl;
381  PeakCandidates[m].leftWidth = PeakCandidates[m].rightWidth = -1;
382  }
383  }
384 
385  Peak1D peak;
386  peak.setMZ(PeakCandidates[peak_it].mz);
387  peak.setIntensity(PeakCandidates[peak_it].integrated_intensity);
388  output.push_back(peak);
389 
390  LOG_DEBUG << "Push peak " << peak_it << " " << peak << std::endl;
391  output.getFloatDataArrays()[0].push_back(PeakCandidates[peak_it].integrated_intensity);
392  output.getFloatDataArrays()[1].push_back(PeakCandidates[peak_it].leftWidth);
393  output.getFloatDataArrays()[2].push_back(PeakCandidates[peak_it].rightWidth);
394  }
395 
396  LOG_DEBUG << "Found seeds: " << PeakCandidates.size() << " / Found peaks: " << output.size() << std::endl;
397  output.sortByPosition();
398  }
399 
400  void pickExperiment(const PeakMap& input, PeakMap& output)
401  {
402  // make sure that output is clear
403  output.clear(true);
404 
405  // copy experimental settings
406  static_cast<ExperimentalSettings&>(output) = input;
407 
408  // resize output with respect to input
409  output.resize(input.size());
410 
411  bool ms1_only = param_.getValue("ms1_only").toBool();
412  bool clear_meta_data = param_.getValue("clear_meta_data").toBool();
413 
414  Size progress = 0;
415  startProgress(0, input.size(), "picking peaks");
416  for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
417  {
418  if (ms1_only && (input[scan_idx].getMSLevel() != 1))
419  {
420  output[scan_idx] = input[scan_idx];
421  }
422  else
423  {
424  pick(input[scan_idx], output[scan_idx]);
425  if (clear_meta_data) {output[scan_idx].getFloatDataArrays().clear();}
426  }
427  setProgress(progress++);
428  }
429  endProgress();
430  }
431 
432  };
433 
434 }
435 
436 #endif
This class implements a peak-picking algorithm for high-resolution MS data (specifically designed for...
Definition: PeakPickerIterative.h:97
~PeakPickerIterative()
Destructor.
Definition: PeakPickerIterative.h:153
const double k
virtual double getSignalToNoise(const PeakIterator &data_point)
Definition: SignalToNoiseEstimator.h:129
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
double spacing_difference_
Definition: PeakPickerIterative.h:105
void sortByPosition()
Lexicographically sorts the peaks by their position.
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
double signal_to_noise_
Definition: PeakPickerIterative.h:103
const String & getName() const
Returns the name.
double leftWidth
Definition: PeakPickerIterative.h:57
int index
Definition: PeakPickerIterative.h:53
void setName(const String &name)
Sets the name.
void resize(Size s)
Definition: MSExperiment.h:137
Size size() const
Definition: MSExperiment.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:120
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:111
void pick(const MSSpectrum &input, MSSpectrum &output)
Definition: PeakPickerIterative.h:300
void setParameters(const Param &param)
Sets the parameters.
float mz
Definition: PeakPickerIterative.h:59
int nr_iterations_
Definition: PeakPickerIterative.h:107
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
const Param & getDefaults() const
Non-mutable access to the default parameters.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
double integrated_intensity
Definition: PeakPickerIterative.h:56
double peak_width_
Definition: PeakPickerIterative.h:104
int sn_bin_count_
Definition: PeakPickerIterative.h:106
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
void setMSLevel(UInt ms_level)
Sets the MS level.
void pickRecenterPeaks_(const MSSpectrum &input, std::vector< PeakCandidate > &PeakCandidates, SignalToNoiseEstimatorMedian< MSSpectrum > &snt)
Definition: PeakPickerIterative.h:168
void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: PeakPickerIterative.h:140
void pickExperiment(const PeakMap &input, PeakMap &output)
Definition: PeakPickerIterative.h:400
void pick(const MSSpectrum &input, MSSpectrum &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
Definition: PeakPickerHiRes.h:102
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:75
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:81
bool sort_peaks_by_intensity(const PeakCandidate &a, const PeakCandidate &b)
Definition: PeakPickerIterative.h:63
PeakPickerIterative()
Constructor.
Definition: PeakPickerIterative.h:114
void setType(SpectrumType type)
sets the spectrum type
double rightWidth
Definition: PeakPickerIterative.h:58
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
double sn_win_len_
Definition: PeakPickerIterative.h:108
A small structure to hold peak candidates.
Definition: PeakPickerIterative.h:51
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:929
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
double peak_apex_intensity
Definition: PeakPickerIterative.h:54
UInt getMSLevel() const
Returns the MS level.
bool check_width_internally_
Definition: PeakPickerIterative.h:109
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
double getRT() const
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition: PeakPickerHiRes.h:76
Description of the experimental settings.
Definition: ExperimentalSettings.h:59

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:03 using doxygen 1.8.13