OpenMS  2.8.0
SignalToNoiseEstimatorMeanIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #pragma once
37 
41 #include <vector>
42 #include <algorithm> //for std::max_element
43 
44 namespace OpenMS
45 {
69  template <typename Container = MSSpectrum>
71  public SignalToNoiseEstimator<Container>
72  {
73 
74 public:
75 
78 
82 
85 
87 
88 
91  {
92  //set the name for DefaultParamHandler error messages
93  this->setName("SignalToNoiseEstimatorMeanIterative");
94 
95  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
96  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
97  " All intensities EQUAL/ABOVE 'max_intensity' will not be added to the histogram." \
98  " If you choose 'max_intensity' too small, the noise estimate might be too small as well." \
99  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime).", {"advanced"});
100  defaults_.setMinInt("max_intensity", -1);
101 
102  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", {"advanced"});
103  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
104  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
105 
106 
107  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", {"advanced"});
108  defaults_.setMinInt("auto_max_percentile", 0);
109  defaults_.setMaxInt("auto_max_percentile", 100);
110 
111  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", {"advanced"});
112  defaults_.setMinInt("auto_mode", -1);
113  defaults_.setMaxInt("auto_mode", 1);
114 
115  defaults_.setValue("win_len", 200.0, "window length in Thomson");
116  defaults_.setMinFloat("win_len", 1.0);
117 
118  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
119  defaults_.setMinInt("bin_count", 3);
120 
121  defaults_.setValue("stdev_mp", 3.0, "multiplier for stdev", {"advanced"});
122  defaults_.setMinFloat("stdev_mp", 0.01);
123  defaults_.setMaxFloat("stdev_mp", 999.0);
124 
125  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
126  defaults_.setMinInt("min_required_elements", 1);
127 
128  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", {"advanced"});
129 
131  }
132 
135  SignalToNoiseEstimator<Container>(source)
136  {
137  updateMembers_();
138  }
139 
145  {
146  if (&source == this) return *this;
147 
149  updateMembers_();
150  return *this;
151  }
152 
154 
155 
158  {}
159 
160 
161 protected:
162 
163 
168  void computeSTN_(const Container& c) override
169  {
170  //first element in the scan
171  PeakIterator scan_first_ = c.begin();
172  //last element in the scan
173  PeakIterator scan_last_ = c.end();
174 
175  // reset counter for sparse windows
176  double sparse_window_percent = 0;
177 
178  // reset the results
179  stn_estimates_.clear();
180  stn_estimates_.resize(c.size());
181 
182  // maximal range of histogram needs to be calculated first
183  if (auto_mode_ == AUTOMAXBYSTDEV)
184  {
185  // use MEAN+auto_max_intensity_*STDEV as threshold
186  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
187  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
188  }
189  else if (auto_mode_ == AUTOMAXBYPERCENT)
190  {
191  // get value at "auto_max_percentile_"th percentile
192  // we use a histogram approach here as well.
193  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
194  {
196  throw Exception::InvalidValue(__FILE__,
197  __LINE__,
198  OPENMS_PRETTY_FUNCTION,
199  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
200  s);
201  }
202 
203  std::vector<int> histogram_auto(100, 0);
204 
205  // find maximum of current scan
206  auto maxIt = std::max_element(c.begin(), c.end() ,[](const PeakType& a, const PeakType& b){ return a.getIntensity() > b.getIntensity();});
207  typename PeakType::IntensityType maxInt = maxIt->getIntensity();
208 
209  double bin_size = maxInt / 100;
210 
211  // fill histogram
212  for(auto& run : c)
213  {
214  ++histogram_auto[(int) (((run).getIntensity() - 1) / bin_size)];
215  }
216 
217  // add up element counts in histogram until ?th percentile is reached
218  int elements_below_percentile = (int) (auto_max_percentile_ * c.size() / 100);
219  int elements_seen = 0;
220  int i = -1;
221  PeakIterator run = scan_first_;
222 
223  while (run != scan_last_ && elements_seen < elements_below_percentile)
224  {
225  ++i;
226  elements_seen += histogram_auto[i];
227  ++run;
228  }
229 
230  max_intensity_ = (((double)i) + 0.5) * bin_size;
231  }
232  else //if (auto_mode_ == MANUAL)
233  {
234  if (max_intensity_ <= 0)
235  {
237  throw Exception::InvalidValue(__FILE__,
238  __LINE__,
239  OPENMS_PRETTY_FUNCTION,
240  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
241  s);
242  }
243  }
244 
245  if (max_intensity_ < 0)
246  {
247  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
248  return;
249  }
250 
251  PeakIterator window_pos_center = scan_first_;
252  PeakIterator window_pos_borderleft = scan_first_;
253  PeakIterator window_pos_borderright = scan_first_;
254 
255  double window_half_size = win_len_ / 2;
256  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
257 
258  std::vector<int> histogram(bin_count_, 0);
259  std::vector<double> bin_value(bin_count_, 0);
260  // calculate average intensity that is represented by a bin
261  for (int bin = 0; bin < bin_count_; bin++)
262  {
263  histogram[bin] = 0;
264  bin_value[bin] = (bin + 0.5) * bin_size;
265  }
266  // index of last valid bin during iteration
267  int hist_rightmost_bin;
268  // bin in which a datapoint would fall
269  int to_bin;
270  // mean & stdev of the histogram
271  double hist_mean;
272  double hist_stdev;
273 
274  // tracks elements in current window, which may vary because of unevenly spaced data
275  int elements_in_window = 0;
276  int window_count = 0;
277 
278  double noise; // noise value of a datapoint
279 
281  SignalToNoiseEstimator<Container>::startProgress(0, c.size(), "noise estimation of data");
282 
283  // MAIN LOOP
284  while (window_pos_center != scan_last_)
285  {
286  // erase all elements from histogram that will leave the window on the LEFT side
287  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
288  {
289  //std::cout << "S: " << (*window_pos_borderleft).getMZ() << " " << ( (*window_pos_center).getMZ() - window_half_size ) << "\n";
290  to_bin = (int) ((std::max((*window_pos_borderleft).getIntensity(), 0.0f)) / bin_size);
291  if (to_bin < bin_count_)
292  {
293  --histogram[to_bin];
294  --elements_in_window;
295  }
296  ++window_pos_borderleft;
297  }
298 
299  //std::printf("S1: %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size);
300 
301 
302  // add all elements to histogram that will enter the window on the RIGHT side
303  while ((window_pos_borderright != scan_last_)
304  && ((*window_pos_borderright).getMZ() < (*window_pos_center).getMZ() + window_half_size))
305  {
306  //std::printf("Sb: %E %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size, (*window_pos_borderright).getMZ() - ((*window_pos_center).getMZ() + window_half_size));
307 
308  to_bin = (int) ((std::max((*window_pos_borderright).getIntensity(), 0.0f)) / bin_size);
309  if (to_bin < bin_count_)
310  {
311  ++histogram[to_bin];
312  ++elements_in_window;
313  }
314  ++window_pos_borderright;
315  }
316 
317  if (elements_in_window < min_required_elements_)
318  {
319  noise = noise_for_empty_window_;
320  ++sparse_window_percent;
321  }
322  else
323  {
324 
325  hist_rightmost_bin = bin_count_;
326 
327  // do iteration on histogram and find threshold
328  for (int i = 0; i < 3; ++i)
329  {
330  // mean
331  hist_mean = 0;
332  for (int bin = 0; bin < hist_rightmost_bin; ++bin)
333  {
334  //std::cout << "V: " << bin << " " << hist_mean << " " << histogram[bin] << " " << elements_in_window << " " << bin_value[bin] << "\n";
335  // immediate division is numerically more stable
336  hist_mean += histogram[bin] / (double) elements_in_window * bin_value[bin];
337  }
338  //hist_mean = hist_mean / elements_in_window;
339 
340  // stdev
341  hist_stdev = 0;
342  for (int bin = 0; bin < hist_rightmost_bin; ++bin)
343  {
344  double tmp(bin_value[bin] - hist_mean);
345  hist_stdev += histogram[bin] / (double) elements_in_window * tmp * tmp;
346  }
347  hist_stdev = std::sqrt(hist_stdev);
348 
349  //determine new threshold (i.e. the rightmost bin we consider)
350  int estimate = (int) ((hist_mean + hist_stdev * stdev_ - 1) / bin_size + 1);
351  //std::cout << "E: " << hist_mean << " " << hist_stdev << " " << stdev_ << " " << bin_size<< " " << estimate << "\n";
352  hist_rightmost_bin = std::min(estimate, bin_count_);
353  }
354 
355  // just avoid division by 0
356  noise = std::max(1.0, hist_mean);
357  }
358 
359  // store result
360  stn_estimates_[window_count] = (*window_pos_center).getIntensity() / noise;
361 
362 
363 
364  // advance the window center by one datapoint
365  ++window_pos_center;
366  ++window_count;
367  // update progress
369 
370  } // end while
371 
373 
374  sparse_window_percent = sparse_window_percent * 100 / window_count;
375  // warn if percentage of sparse windows is above 20%
376  if (sparse_window_percent > 20)
377  {
378  std::cerr << "WARNING in SignalToNoiseEstimatorMeanIterative: "
379  << sparse_window_percent
380  << "% of all windows were sparse. You should consider increasing 'win_len' or increasing 'min_required_elements'"
381  << " You should also check the MaximalIntensity value (or the parameters for its heuristic estimation)"
382  << " If it is too low, then too many high intensity peaks will be discarded, which leads to a sparse window!"
383  << std::endl;
384  }
385 
386  return;
387 
388  } // end of shiftWindow_
389 
391  void updateMembers_() override
392  {
393  max_intensity_ = (double)param_.getValue("max_intensity");
394  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
395  auto_max_percentile_ = param_.getValue("auto_max_percentile");
396  auto_mode_ = param_.getValue("auto_mode");
397  win_len_ = (double)param_.getValue("win_len");
398  bin_count_ = param_.getValue("bin_count");
399  stdev_ = (double)param_.getValue("stdev_mp");
400  min_required_elements_ = param_.getValue("min_required_elements");
401  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
402  stn_estimates_.clear();
403  }
404 
414  double win_len_;
418  double stdev_;
424 
425 
426 
427 
428  };
429 
430 } // namespace OpenMS
431 
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:166
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!
Definition: DefaultParamHandler.h:173
void setName(const String &name)
Mutable access to the name.
Invalid value exception.
Definition: Exception.h:329
void setMaxFloat(const std::string &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
void setMaxInt(const std::string &key, int max)
Sets the maximum value for the integer or integer list parameter key.
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
void setMinInt(const std::string &key, int min)
Sets the minimum value for the integer or integer list parameter key.
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
void setMinFloat(const std::string &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
float IntensityType
Intensity type.
Definition: Peak2D.h:62
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
Estimates the signal/noise (S/N) ratio of each data point in a scan based on an iterative scheme whic...
Definition: SignalToNoiseEstimatorMeanIterative.h:72
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMeanIterative.h:83
SignalToNoiseEstimatorMeanIterative()
default constructor
Definition: SignalToNoiseEstimatorMeanIterative.h:90
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMeanIterative.h:414
double stdev_
multiplier for the stdev of intensities
Definition: SignalToNoiseEstimatorMeanIterative.h:418
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMeanIterative.h:423
~SignalToNoiseEstimatorMeanIterative() override
Destructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:157
SignalToNoiseEstimatorMeanIterative(const SignalToNoiseEstimatorMeanIterative &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:134
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMeanIterative.h:406
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev
Definition: SignalToNoiseEstimatorMeanIterative.h:410
void computeSTN_(const Container &c) override
Definition: SignalToNoiseEstimatorMeanIterative.h:168
void updateMembers_() override
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed
Definition: SignalToNoiseEstimatorMeanIterative.h:391
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMeanIterative.h:420
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMeanIterative.h:84
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMeanIterative.h:86
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1,...
Definition: SignalToNoiseEstimatorMeanIterative.h:412
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation
Definition: SignalToNoiseEstimatorMeanIterative.h:77
@ MANUAL
Definition: SignalToNoiseEstimatorMeanIterative.h:77
@ AUTOMAXBYSTDEV
Definition: SignalToNoiseEstimatorMeanIterative.h:77
@ AUTOMAXBYPERCENT
Definition: SignalToNoiseEstimatorMeanIterative.h:77
SignalToNoiseEstimatorMeanIterative & operator=(const SignalToNoiseEstimatorMeanIterative &source)
Definition: SignalToNoiseEstimatorMeanIterative.h:144
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMeanIterative.h:416
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier
Definition: SignalToNoiseEstimatorMeanIterative.h:408
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:59
double variance
variance of estimated Gaussian
Definition: SignalToNoiseEstimator.h:134
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:86
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:66
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:139
double mean
mean of estimated Gaussian
Definition: SignalToNoiseEstimator.h:133
std::vector< double > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:172
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:65
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:132
A more convenient string class.
Definition: String.h:60
const double c
Definition: Constants.h:209
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47