OpenMS  2.5.0
SignalToNoiseEstimatorMeanIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #pragma once
37 
41 #include <vector>
42 
43 namespace OpenMS
44 {
68  template <typename Container = MSSpectrum>
70  public SignalToNoiseEstimator<Container>
71  {
72 
73 public:
74 
77 
84 
87 
89 
90 
93  {
94  //set the name for DefaultParamHandler error messages
95  this->setName("SignalToNoiseEstimatorMeanIterative");
96 
97  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
98  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
99  " All intensities EQUAL/ABOVE 'max_intensity' will not be added to the histogram." \
100  " If you choose 'max_intensity' too small, the noise estimate might be too small as well." \
101  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime).", ListUtils::create<String>("advanced"));
102  defaults_.setMinInt("max_intensity", -1);
103 
104  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", ListUtils::create<String>("advanced"));
105  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
106  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
107 
108 
109  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", ListUtils::create<String>("advanced"));
110  defaults_.setMinInt("auto_max_percentile", 0);
111  defaults_.setMaxInt("auto_max_percentile", 100);
112 
113  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", ListUtils::create<String>("advanced"));
114  defaults_.setMinInt("auto_mode", -1);
115  defaults_.setMaxInt("auto_mode", 1);
116 
117  defaults_.setValue("win_len", 200.0, "window length in Thomson");
118  defaults_.setMinFloat("win_len", 1.0);
119 
120  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
121  defaults_.setMinInt("bin_count", 3);
122 
123  defaults_.setValue("stdev_mp", 3.0, "multiplier for stdev", ListUtils::create<String>("advanced"));
124  defaults_.setMinFloat("stdev_mp", 0.01);
125  defaults_.setMaxFloat("stdev_mp", 999.0);
126 
127  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
128  defaults_.setMinInt("min_required_elements", 1);
129 
130  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", ListUtils::create<String>("advanced"));
131 
133  }
134 
137  SignalToNoiseEstimator<Container>(source)
138  {
139  updateMembers_();
140  }
141 
147  {
148  if (&source == this) return *this;
149 
151  updateMembers_();
152  return *this;
153  }
154 
156 
157 
160  {}
161 
162 
163 protected:
164 
165 
171  void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) override
172  {
173  // reset counter for sparse windows
174  double sparse_window_percent = 0;
175 
176  // reset the results
177  stn_estimates_.clear();
178 
179  // maximal range of histogram needs to be calculated first
180  if (auto_mode_ == AUTOMAXBYSTDEV)
181  {
182  // use MEAN+auto_max_intensity_*STDEV as threshold
183  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
184  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
185  }
186  else if (auto_mode_ == AUTOMAXBYPERCENT)
187  {
188  // get value at "auto_max_percentile_"th percentile
189  // we use a histogram approach here as well.
190  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
191  {
193  throw Exception::InvalidValue(__FILE__,
194  __LINE__,
195  OPENMS_PRETTY_FUNCTION,
196  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
197  s);
198  }
199 
200  std::vector<int> histogram_auto(100, 0);
201 
202  // find maximum of current scan
203  int size = 0;
204  typename PeakType::IntensityType maxInt = 0;
205  PeakIterator run = scan_first_;
206  while (run != scan_last_)
207  {
208  maxInt = std::max(maxInt, (*run).getIntensity());
209  ++size;
210  ++run;
211  }
212 
213  double bin_size = maxInt / 100;
214 
215  // fill histogram
216  run = scan_first_;
217  while (run != scan_last_)
218  {
219  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
220  ++run;
221  }
222 
223  // add up element counts in histogram until ?th percentile is reached
224  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
225  int elements_seen = 0;
226  int i = -1;
227  run = scan_first_;
228 
229  while (run != scan_last_ && elements_seen < elements_below_percentile)
230  {
231  ++i;
232  elements_seen += histogram_auto[i];
233  ++run;
234  }
235 
236  max_intensity_ = (((double)i) + 0.5) * bin_size;
237  }
238  else //if (auto_mode_ == MANUAL)
239  {
240  if (max_intensity_ <= 0)
241  {
243  throw Exception::InvalidValue(__FILE__,
244  __LINE__,
245  OPENMS_PRETTY_FUNCTION,
246  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
247  s);
248  }
249  }
250 
251  if (max_intensity_ < 0)
252  {
253  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
254  return;
255  }
256 
257  PeakIterator window_pos_center = scan_first_;
258  PeakIterator window_pos_borderleft = scan_first_;
259  PeakIterator window_pos_borderright = scan_first_;
260 
261  double window_half_size = win_len_ / 2;
262  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
263 
264  std::vector<int> histogram(bin_count_, 0);
265  std::vector<double> bin_value(bin_count_, 0);
266  // calculate average intensity that is represented by a bin
267  for (int bin = 0; bin < bin_count_; bin++)
268  {
269  histogram[bin] = 0;
270  bin_value[bin] = (bin + 0.5) * bin_size;
271  }
272  // index of last valid bin during iteration
273  int hist_rightmost_bin;
274  // bin in which a datapoint would fall
275  int to_bin;
276  // mean & stdev of the histogram
277  double hist_mean;
278  double hist_stdev;
279 
280  // tracks elements in current window, which may vary because of unevenly spaced data
281  int elements_in_window = 0;
282  int window_count = 0;
283 
284  double noise; // noise value of a datapoint
285 
286  // determine how many elements we need to estimate (for progress estimation)
287  int windows_overall = 0;
288  PeakIterator run = scan_first_;
289  while (run != scan_last_)
290  {
291  ++windows_overall;
292  ++run;
293  }
294  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
295 
296  // MAIN LOOP
297  while (window_pos_center != scan_last_)
298  {
299  // erase all elements from histogram that will leave the window on the LEFT side
300  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
301  {
302  //std::cout << "S: " << (*window_pos_borderleft).getMZ() << " " << ( (*window_pos_center).getMZ() - window_half_size ) << "\n";
303  to_bin = (int) ((std::max((*window_pos_borderleft).getIntensity(), 0.0f)) / bin_size);
304  if (to_bin < bin_count_)
305  {
306  --histogram[to_bin];
307  --elements_in_window;
308  }
309  ++window_pos_borderleft;
310  }
311 
312  //std::printf("S1: %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size);
313 
314 
315  // add all elements to histogram that will enter the window on the RIGHT side
316  while ((window_pos_borderright != scan_last_)
317  && ((*window_pos_borderright).getMZ() < (*window_pos_center).getMZ() + window_half_size))
318  {
319  //std::printf("Sb: %E %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size, (*window_pos_borderright).getMZ() - ((*window_pos_center).getMZ() + window_half_size));
320 
321  to_bin = (int) ((std::max((*window_pos_borderright).getIntensity(), 0.0f)) / bin_size);
322  if (to_bin < bin_count_)
323  {
324  ++histogram[to_bin];
325  ++elements_in_window;
326  }
327  ++window_pos_borderright;
328  }
329 
330  if (elements_in_window < min_required_elements_)
331  {
332  noise = noise_for_empty_window_;
333  ++sparse_window_percent;
334  }
335  else
336  {
337 
338  hist_rightmost_bin = bin_count_;
339 
340  // do iteration on histogram and find threshold
341  for (int i = 0; i < 3; ++i)
342  {
343  // mean
344  hist_mean = 0;
345  for (int bin = 0; bin < hist_rightmost_bin; ++bin)
346  {
347  //std::cout << "V: " << bin << " " << hist_mean << " " << histogram[bin] << " " << elements_in_window << " " << bin_value[bin] << "\n";
348  // immediate division is numerically more stable
349  hist_mean += histogram[bin] / (double) elements_in_window * bin_value[bin];
350  }
351  //hist_mean = hist_mean / elements_in_window;
352 
353  // stdev
354  hist_stdev = 0;
355  for (int bin = 0; bin < hist_rightmost_bin; ++bin)
356  {
357  double tmp(bin_value[bin] - hist_mean);
358  hist_stdev += histogram[bin] / (double) elements_in_window * tmp * tmp;
359  }
360  hist_stdev = std::sqrt(hist_stdev);
361 
362  //determine new threshold (i.e. the rightmost bin we consider)
363  int estimate = (int) ((hist_mean + hist_stdev * stdev_ - 1) / bin_size + 1);
364  //std::cout << "E: " << hist_mean << " " << hist_stdev << " " << stdev_ << " " << bin_size<< " " << estimate << "\n";
365  hist_rightmost_bin = std::min(estimate, bin_count_);
366  }
367 
368  // just avoid division by 0
369  noise = std::max(1.0, hist_mean);
370  }
371 
372  // store result
373  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
374 
375 
376 
377  // advance the window center by one datapoint
378  ++window_pos_center;
379  ++window_count;
380  // update progress
382 
383  } // end while
384 
386 
387  sparse_window_percent = sparse_window_percent * 100 / window_count;
388  // warn if percentage of sparse windows is above 20%
389  if (sparse_window_percent > 20)
390  {
391  std::cerr << "WARNING in SignalToNoiseEstimatorMeanIterative: "
392  << sparse_window_percent
393  << "% of all windows were sparse. You should consider increasing 'win_len' or increasing 'min_required_elements'"
394  << " You should also check the MaximalIntensity value (or the parameters for its heuristic estimation)"
395  << " If it is too low, then too many high intensity peaks will be discarded, which leads to a sparse window!"
396  << std::endl;
397  }
398 
399  return;
400 
401  } // end of shiftWindow_
402 
404  void updateMembers_() override
405  {
406  max_intensity_ = (double)param_.getValue("max_intensity");
407  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
408  auto_max_percentile_ = param_.getValue("auto_max_percentile");
409  auto_mode_ = param_.getValue("auto_mode");
410  win_len_ = (double)param_.getValue("win_len");
411  bin_count_ = param_.getValue("bin_count");
412  stdev_ = (double)param_.getValue("stdev_mp");
413  min_required_elements_ = param_.getValue("min_required_elements");
414  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
415  is_result_valid_ = false;
416  }
417 
427  double win_len_;
431  double stdev_;
437 
438 
439 
440 
441  };
442 
443 } // namespace OpenMS
444 
OpenMS::SignalToNoiseEstimatorMeanIterative::auto_max_percentile_
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev
Definition: SignalToNoiseEstimatorMeanIterative.h:423
OpenMS::SignalToNoiseEstimator::is_result_valid_
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed....
Definition: SignalToNoiseEstimator.h:214
OpenMS::ProgressLogger::setProgress
void setProgress(SignedSize value) const
Sets the current progress.
OpenMS::Param::setMinFloat
void setMinFloat(const String &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
double
OpenMS::SignalToNoiseEstimatorMeanIterative::min_required_elements_
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMeanIterative.h:433
OpenMS::SignalToNoiseEstimatorMeanIterative::PeakIterator
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMeanIterative.h:85
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:335
OpenMS::SignalToNoiseEstimator::stn_estimates_
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:207
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::SignalToNoiseEstimator
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:56
OpenMS::DefaultParamHandler::setName
void setName(const String &name)
Mutable access to the name.
OpenMS::SignalToNoiseEstimatorMeanIterative::auto_max_stdev_Factor_
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier
Definition: SignalToNoiseEstimatorMeanIterative.h:421
OpenMS::Param::setMaxInt
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
OpenMS::SignalToNoiseEstimatorMeanIterative::AUTOMAXBYPERCENT
Definition: SignalToNoiseEstimatorMeanIterative.h:76
OpenMS::SignalToNoiseEstimatorMeanIterative::GaussianEstimate
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMeanIterative.h:88
OpenMS::SignalToNoiseEstimator::GaussianEstimate::variance
double variance
variance of estimated Gaussian
Definition: SignalToNoiseEstimator.h:169
OpenMS::Param::getValue
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
OpenMS::ProgressLogger::startProgress
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
OpenMS::SignalToNoiseEstimatorMeanIterative
Estimates the signal/noise (S/N) ratio of each data point in a scan based on an iterative scheme whic...
Definition: SignalToNoiseEstimatorMeanIterative.h:69
OpenMS::SignalToNoiseEstimatorMeanIterative::IntensityThresholdCalculation
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation
Definition: SignalToNoiseEstimatorMeanIterative.h:76
OpenMS::ProgressLogger::endProgress
void endProgress() const
Ends the progress display.
OpenMS::SignalToNoiseEstimatorMeanIterative::max_intensity_
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMeanIterative.h:419
ListUtils.h
OpenMS::SignalToNoiseEstimator::PeakType
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:65
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::SignalToNoiseEstimatorMeanIterative::PeakType
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMeanIterative.h:86
Exception.h
OpenMS::SignalToNoiseEstimatorMeanIterative::win_len_
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMeanIterative.h:427
int
OpenMS::SignalToNoiseEstimator::operator=
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:91
OpenMS::Param::setMinInt
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
OpenMS::Param::setMaxFloat
void setMaxFloat(const String &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
OpenMS::SignalToNoiseEstimatorMeanIterative::computeSTN_
void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) override
Definition: SignalToNoiseEstimatorMeanIterative.h:171
OpenMS::SignalToNoiseEstimatorMeanIterative::stdev_
double stdev_
multiplier for the stdev of intensities
Definition: SignalToNoiseEstimatorMeanIterative.h:431
OpenMS::SignalToNoiseEstimatorMeanIterative::auto_mode_
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1,...
Definition: SignalToNoiseEstimatorMeanIterative.h:425
OpenMS::SignalToNoiseEstimatorMeanIterative::AUTOMAXBYSTDEV
Definition: SignalToNoiseEstimatorMeanIterative.h:76
OpenMS::SignalToNoiseEstimatorMeanIterative::bin_count_
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMeanIterative.h:429
OpenMS::SignalToNoiseEstimator::GaussianEstimate::mean
double mean
mean of estimated Gaussian
Definition: SignalToNoiseEstimator.h:168
OpenMS::SignalToNoiseEstimator::estimate_
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:174
OpenMS::SignalToNoiseEstimatorMeanIterative::noise_for_empty_window_
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMeanIterative.h:436
float
OpenMS::DefaultParamHandler::defaults_
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!
Definition: DefaultParamHandler.h:156
SignalToNoiseEstimator.h
OpenMS::DefaultParamHandler::defaultsToParam_
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
OpenMS::SignalToNoiseEstimatorMeanIterative::SignalToNoiseEstimatorMeanIterative
SignalToNoiseEstimatorMeanIterative(const SignalToNoiseEstimatorMeanIterative &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:136
OpenMS::SignalToNoiseEstimatorMeanIterative::updateMembers_
void updateMembers_() override
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed
Definition: SignalToNoiseEstimatorMeanIterative.h:404
OpenMS::SignalToNoiseEstimator::PeakIterator
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:64
OpenMS::SignalToNoiseEstimatorMeanIterative::SignalToNoiseEstimatorMeanIterative
SignalToNoiseEstimatorMeanIterative()
default constructor
Definition: SignalToNoiseEstimatorMeanIterative.h:92
OpenMS::SignalToNoiseEstimatorMeanIterative::operator=
SignalToNoiseEstimatorMeanIterative & operator=(const SignalToNoiseEstimatorMeanIterative &source)
Definition: SignalToNoiseEstimatorMeanIterative.h:146
OpenMS::SignalToNoiseEstimatorMeanIterative::MANUAL
Definition: SignalToNoiseEstimatorMeanIterative.h:76
OpenMS::SignalToNoiseEstimator::GaussianEstimate
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:166
OpenMS::DefaultParamHandler::param_
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:149
OpenMS::SignalToNoiseEstimatorMeanIterative::~SignalToNoiseEstimatorMeanIterative
~SignalToNoiseEstimatorMeanIterative() override
Destructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:159