Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SignalToNoiseEstimatorMeanIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEANITERATIVE_H
37 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEANITERATIVE_H
38 
42 #include <vector>
43 
44 namespace OpenMS
45 {
69  template <typename Container = MSSpectrum>
71  public SignalToNoiseEstimator<Container>
72  {
73 
74 public:
75 
78 
85 
88 
90 
91 
94  {
95  //set the name for DefaultParamHandler error messages
96  this->setName("SignalToNoiseEstimatorMeanIterative");
97 
98  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
99  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
100  " All intensities EQUAL/ABOVE 'max_intensity' will not be added to the histogram." \
101  " If you choose 'max_intensity' too small, the noise estimate might be too small as well." \
102  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime).", ListUtils::create<String>("advanced"));
103  defaults_.setMinInt("max_intensity", -1);
104 
105  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", ListUtils::create<String>("advanced"));
106  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
107  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
108 
109 
110  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", ListUtils::create<String>("advanced"));
111  defaults_.setMinInt("auto_max_percentile", 0);
112  defaults_.setMaxInt("auto_max_percentile", 100);
113 
114  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", ListUtils::create<String>("advanced"));
115  defaults_.setMinInt("auto_mode", -1);
116  defaults_.setMaxInt("auto_mode", 1);
117 
118  defaults_.setValue("win_len", 200.0, "window length in Thomson");
119  defaults_.setMinFloat("win_len", 1.0);
120 
121  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
122  defaults_.setMinInt("bin_count", 3);
123 
124  defaults_.setValue("stdev_mp", 3.0, "multiplier for stdev", ListUtils::create<String>("advanced"));
125  defaults_.setMinFloat("stdev_mp", 0.01);
126  defaults_.setMaxFloat("stdev_mp", 999.0);
127 
128  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
129  defaults_.setMinInt("min_required_elements", 1);
130 
131  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", ListUtils::create<String>("advanced"));
132 
134  }
135 
138  SignalToNoiseEstimator<Container>(source)
139  {
140  updateMembers_();
141  }
142 
148  {
149  if (&source == this) return *this;
150 
152  updateMembers_();
153  return *this;
154  }
155 
157 
158 
161  {}
162 
163 
164 protected:
165 
166 
172  virtual void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_)
173  {
174  // reset counter for sparse windows
175  double sparse_window_percent = 0;
176 
177  // reset the results
178  stn_estimates_.clear();
179 
180  // maximal range of histogram needs to be calculated first
181  if (auto_mode_ == AUTOMAXBYSTDEV)
182  {
183  // use MEAN+auto_max_intensity_*STDEV as threshold
184  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
185  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
186  }
187  else if (auto_mode_ == AUTOMAXBYPERCENT)
188  {
189  // get value at "auto_max_percentile_"th percentile
190  // we use a histogram approach here as well.
191  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
192  {
194  throw Exception::InvalidValue(__FILE__,
195  __LINE__,
196  OPENMS_PRETTY_FUNCTION,
197  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
198  s);
199  }
200 
201  std::vector<int> histogram_auto(100, 0);
202 
203  // find maximum of current scan
204  int size = 0;
205  typename PeakType::IntensityType maxInt = 0;
206  PeakIterator run = scan_first_;
207  while (run != scan_last_)
208  {
209  maxInt = std::max(maxInt, (*run).getIntensity());
210  ++size;
211  ++run;
212  }
213 
214  double bin_size = maxInt / 100;
215 
216  // fill histogram
217  run = scan_first_;
218  while (run != scan_last_)
219  {
220  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
221  ++run;
222  }
223 
224  // add up element counts in histogram until ?th percentile is reached
225  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
226  int elements_seen = 0;
227  int i = -1;
228  run = scan_first_;
229 
230  while (run != scan_last_ && elements_seen < elements_below_percentile)
231  {
232  ++i;
233  elements_seen += histogram_auto[i];
234  ++run;
235  }
236 
237  max_intensity_ = (((double)i) + 0.5) * bin_size;
238  }
239  else //if (auto_mode_ == MANUAL)
240  {
241  if (max_intensity_ <= 0)
242  {
244  throw Exception::InvalidValue(__FILE__,
245  __LINE__,
246  OPENMS_PRETTY_FUNCTION,
247  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
248  s);
249  }
250  }
251 
252  if (max_intensity_ < 0)
253  {
254  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
255  return;
256  }
257 
258  PeakIterator window_pos_center = scan_first_;
259  PeakIterator window_pos_borderleft = scan_first_;
260  PeakIterator window_pos_borderright = scan_first_;
261 
262  double window_half_size = win_len_ / 2;
263  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
264 
265  std::vector<int> histogram(bin_count_, 0);
266  std::vector<double> bin_value(bin_count_, 0);
267  // calculate average intensity that is represented by a bin
268  for (int bin = 0; bin < bin_count_; bin++)
269  {
270  histogram[bin] = 0;
271  bin_value[bin] = (bin + 0.5) * bin_size;
272  }
273  // index of last valid bin during iteration
274  int hist_rightmost_bin;
275  // bin in which a datapoint would fall
276  int to_bin;
277  // mean & stdev of the histogram
278  double hist_mean;
279  double hist_stdev;
280 
281  // tracks elements in current window, which may vary because of unevenly spaced data
282  int elements_in_window = 0;
283  int window_count = 0;
284 
285  double noise; // noise value of a datapoint
286 
287  // determine how many elements we need to estimate (for progress estimation)
288  int windows_overall = 0;
289  PeakIterator run = scan_first_;
290  while (run != scan_last_)
291  {
292  ++windows_overall;
293  ++run;
294  }
295  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
296 
297  // MAIN LOOP
298  while (window_pos_center != scan_last_)
299  {
300  // erase all elements from histogram that will leave the window on the LEFT side
301  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
302  {
303  //std::cout << "S: " << (*window_pos_borderleft).getMZ() << " " << ( (*window_pos_center).getMZ() - window_half_size ) << "\n";
304  to_bin = (int) ((std::max((*window_pos_borderleft).getIntensity(), 0.0f)) / bin_size);
305  if (to_bin < bin_count_)
306  {
307  --histogram[to_bin];
308  --elements_in_window;
309  }
310  ++window_pos_borderleft;
311  }
312 
313  //std::printf("S1: %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size);
314 
315 
316  // add all elements to histogram that will enter the window on the RIGHT side
317  while ((window_pos_borderright != scan_last_)
318  && ((*window_pos_borderright).getMZ() < (*window_pos_center).getMZ() + window_half_size))
319  {
320  //std::printf("Sb: %E %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size, (*window_pos_borderright).getMZ() - ((*window_pos_center).getMZ() + window_half_size));
321 
322  to_bin = (int) ((std::max((*window_pos_borderright).getIntensity(), 0.0f)) / bin_size);
323  if (to_bin < bin_count_)
324  {
325  ++histogram[to_bin];
326  ++elements_in_window;
327  }
328  ++window_pos_borderright;
329  }
330 
331  if (elements_in_window < min_required_elements_)
332  {
333  noise = noise_for_empty_window_;
334  ++sparse_window_percent;
335  }
336  else
337  {
338 
339  hist_rightmost_bin = bin_count_;
340 
341  // do iteration on histogram and find threshold
342  for (int i = 0; i < 3; ++i)
343  {
344  // mean
345  hist_mean = 0;
346  for (int bin = 0; bin < hist_rightmost_bin; ++bin)
347  {
348  //std::cout << "V: " << bin << " " << hist_mean << " " << histogram[bin] << " " << elements_in_window << " " << bin_value[bin] << "\n";
349  // immediate division is numerically more stable
350  hist_mean += histogram[bin] / (double) elements_in_window * bin_value[bin];
351  }
352  //hist_mean = hist_mean / elements_in_window;
353 
354  // stdev
355  hist_stdev = 0;
356  for (int bin = 0; bin < hist_rightmost_bin; ++bin)
357  {
358  double tmp(bin_value[bin] - hist_mean);
359  hist_stdev += histogram[bin] / (double) elements_in_window * tmp * tmp;
360  }
361  hist_stdev = std::sqrt(hist_stdev);
362 
363  //determine new threshold (i.e. the rightmost bin we consider)
364  int estimate = (int) ((hist_mean + hist_stdev * stdev_ - 1) / bin_size + 1);
365  //std::cout << "E: " << hist_mean << " " << hist_stdev << " " << stdev_ << " " << bin_size<< " " << estimate << "\n";
366  hist_rightmost_bin = std::min(estimate, bin_count_);
367  }
368 
369  // just avoid division by 0
370  noise = std::max(1.0, hist_mean);
371  }
372 
373  // store result
374  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
375 
376 
377 
378  // advance the window center by one datapoint
379  ++window_pos_center;
380  ++window_count;
381  // update progress
383 
384  } // end while
385 
387 
388  sparse_window_percent = sparse_window_percent * 100 / window_count;
389  // warn if percentage of sparse windows is above 20%
390  if (sparse_window_percent > 20)
391  {
392  std::cerr << "WARNING in SignalToNoiseEstimatorMeanIterative: "
393  << sparse_window_percent
394  << "% of all windows were sparse. You should consider increasing 'win_len' or increasing 'min_required_elements'"
395  << " You should also check the MaximalIntensity value (or the parameters for its heuristic estimation)"
396  << " If it is too low, then too many high intensity peaks will be discarded, which leads to a sparse window!"
397  << std::endl;
398  }
399 
400  return;
401 
402  } // end of shiftWindow_
403 
406  {
407  max_intensity_ = (double)param_.getValue("max_intensity");
408  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
409  auto_max_percentile_ = param_.getValue("auto_max_percentile");
410  auto_mode_ = param_.getValue("auto_mode");
411  win_len_ = (double)param_.getValue("win_len");
412  bin_count_ = param_.getValue("bin_count");
413  stdev_ = (double)param_.getValue("stdev_mp");
414  min_required_elements_ = param_.getValue("min_required_elements");
415  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
416  is_result_valid_ = false;
417  }
418 
428  double win_len_;
432  double stdev_;
438 
439 
440 
441 
442  };
443 
444 } // namespace OpenMS
445 
446 #endif //OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEANITERATIVE_H
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:157
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:57
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier ...
Definition: SignalToNoiseEstimatorMeanIterative.h:422
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:150
void endProgress() const
Ends the progress display.
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMeanIterative.h:434
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMeanIterative.h:89
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:92
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:65
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMeanIterative.h:430
double variance
mean of estimated Gaussian
Definition: SignalToNoiseEstimator.h:170
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
virtual void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_)
Definition: SignalToNoiseEstimatorMeanIterative.h:172
double mean
Definition: SignalToNoiseEstimator.h:169
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed...
Definition: SignalToNoiseEstimator.h:215
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:175
Estimates the signal/noise (S/N) ratio of each data point in a scan based on an iterative scheme whic...
Definition: SignalToNoiseEstimatorMeanIterative.h:70
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:167
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev ...
Definition: SignalToNoiseEstimatorMeanIterative.h:424
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMeanIterative.h:437
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMeanIterative.h:428
SignalToNoiseEstimatorMeanIterative(const SignalToNoiseEstimatorMeanIterative &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:137
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
void setProgress(SignedSize value) const
Sets the current progress.
void setMaxFloat(const String &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
virtual ~SignalToNoiseEstimatorMeanIterative()
Destructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:160
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMeanIterative.h:86
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:66
SignalToNoiseEstimatorMeanIterative & operator=(const SignalToNoiseEstimatorMeanIterative &source)
Definition: SignalToNoiseEstimatorMeanIterative.h:147
Definition: SignalToNoiseEstimatorMeanIterative.h:77
Invalid value exception.
Definition: Exception.h:336
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1, AUTOMAXBYSTDEV=0 or AUTOMAXBYPERCENT=1
Definition: SignalToNoiseEstimatorMeanIterative.h:426
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMeanIterative.h:87
Definition: SignalToNoiseEstimatorMeanIterative.h:77
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:57
SignalToNoiseEstimatorMeanIterative()
default constructor
Definition: SignalToNoiseEstimatorMeanIterative.h:93
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:208
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation ...
Definition: SignalToNoiseEstimatorMeanIterative.h:77
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMeanIterative.h:420
void setName(const String &name)
Mutable access to the name.
double stdev_
multiplier for the stdev of intensities
Definition: SignalToNoiseEstimatorMeanIterative.h:432
void setMinFloat(const String &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
void updateMembers_()
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed ...
Definition: SignalToNoiseEstimatorMeanIterative.h:405
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
Definition: SignalToNoiseEstimatorMeanIterative.h:77

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:03 using doxygen 1.8.13