Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SignalToNoiseEstimatorMedian.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
37 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
38 
39 
44 #include <vector>
45 
46 namespace OpenMS
47 {
80  template <typename Container = MSSpectrum>
82  public SignalToNoiseEstimator<Container>
83  {
84 
85 public:
86 
89 
96 
99 
101 
104  {
105  //set the name for DefaultParamHandler error messages
106  this->setName("SignalToNoiseEstimatorMedian");
107 
108  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
109  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
110  " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin." \
111  " If you choose 'max_intensity' too small, the noise estimate might be too small as well. " \
112  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)." \
113  " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", ListUtils::create<String>("advanced"));
114  defaults_.setMinInt("max_intensity", -1);
115 
116  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", ListUtils::create<String>("advanced"));
117  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
118  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
119 
120  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", ListUtils::create<String>("advanced"));
121  defaults_.setMinInt("auto_max_percentile", 0);
122  defaults_.setMaxInt("auto_max_percentile", 100);
123 
124  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", ListUtils::create<String>("advanced"));
125  defaults_.setMinInt("auto_mode", -1);
126  defaults_.setMaxInt("auto_mode", 1);
127 
128  defaults_.setValue("win_len", 200.0, "window length in Thomson");
129  defaults_.setMinFloat("win_len", 1.0);
130 
131  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
132  defaults_.setMinInt("bin_count", 3);
133 
134  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
135  defaults_.setMinInt("min_required_elements", 1);
136 
137  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", ListUtils::create<String>("advanced"));
138 
139  defaults_.setValue("write_log_messages", "true", "Write out log messages in case of sparse windows or median in rightmost histogram bin");
140  defaults_.setValidStrings("write_log_messages", ListUtils::create<String>("true,false"));
141 
143  }
144 
147  SignalToNoiseEstimator<Container>(source)
148  {
149  updateMembers_();
150  }
151 
157  {
158  if (&source == this) return *this;
159 
161  updateMembers_();
162  return *this;
163  }
164 
166 
169  {}
170 
172  double getSparseWindowPercent() const
173  {
174  return sparse_window_percent_;
175  }
176 
179  {
180  return histogram_oob_percent_;
181  }
182 
183 protected:
184 
185 
192  void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_)
193  {
194  // reset counter for sparse windows
196  // reset counter for histogram overflow
198 
199  // reset the results
200  stn_estimates_.clear();
201 
202  // maximal range of histogram needs to be calculated first
203  if (auto_mode_ == AUTOMAXBYSTDEV)
204  {
205  // use MEAN+auto_max_intensity_*STDEV as threshold
206  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
207  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
208  }
209  else if (auto_mode_ == AUTOMAXBYPERCENT)
210  {
211  // get value at "auto_max_percentile_"th percentile
212  // we use a histogram approach here as well.
213  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
214  {
216  throw Exception::InvalidValue(__FILE__,
217  __LINE__,
218  OPENMS_PRETTY_FUNCTION,
219  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
220  s);
221  }
222 
223  std::vector<int> histogram_auto(100, 0);
224 
225  // find maximum of current scan
226  int size = 0;
227  typename PeakType::IntensityType maxInt = 0;
228  PeakIterator run = scan_first_;
229  while (run != scan_last_)
230  {
231  maxInt = std::max(maxInt, (*run).getIntensity());
232  ++size;
233  ++run;
234  }
235 
236  double bin_size = maxInt / 100;
237 
238  // fill histogram
239  run = scan_first_;
240  while (run != scan_last_)
241  {
242  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
243  ++run;
244  }
245 
246  // add up element counts in histogram until ?th percentile is reached
247  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
248  int elements_seen = 0;
249  int i = -1;
250  run = scan_first_;
251 
252  while (run != scan_last_ && elements_seen < elements_below_percentile)
253  {
254  ++i;
255  elements_seen += histogram_auto[i];
256  ++run;
257  }
258 
259  max_intensity_ = (((double)i) + 0.5) * bin_size;
260  }
261  else //if (auto_mode_ == MANUAL)
262  {
263  if (max_intensity_ <= 0)
264  {
266  throw Exception::InvalidValue(__FILE__,
267  __LINE__,
268  OPENMS_PRETTY_FUNCTION,
269  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
270  s);
271  }
272  }
273 
274  if (max_intensity_ < 0)
275  {
276  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
277  return;
278  }
279 
280  PeakIterator window_pos_center = scan_first_;
281  PeakIterator window_pos_borderleft = scan_first_;
282  PeakIterator window_pos_borderright = scan_first_;
283 
284  double window_half_size = win_len_ / 2;
285  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
286  int bin_count_minus_1 = bin_count_ - 1;
287 
288  std::vector<int> histogram(bin_count_, 0);
289  std::vector<double> bin_value(bin_count_, 0);
290  // calculate average intensity that is represented by a bin
291  for (int bin = 0; bin < bin_count_; bin++)
292  {
293  histogram[bin] = 0;
294  bin_value[bin] = (bin + 0.5) * bin_size;
295  }
296  // bin in which a datapoint would fall
297  int to_bin = 0;
298 
299  // index of bin where the median is located
300  int median_bin = 0;
301  // additive number of elements from left to x in histogram
302  int element_inc_count = 0;
303 
304  // tracks elements in current window, which may vary because of unevenly spaced data
305  int elements_in_window = 0;
306  // number of windows
307  int window_count = 0;
308 
309  // number of elements where we find the median
310  int element_in_window_half = 0;
311 
312  double noise; // noise value of a datapoint
313 
314  // determine how many elements we need to estimate (for progress estimation)
315  int windows_overall = 0;
316  PeakIterator run = scan_first_;
317  while (run != scan_last_)
318  {
319  ++windows_overall;
320  ++run;
321  }
322  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
323 
324  // MAIN LOOP
325  while (window_pos_center != scan_last_)
326  {
327 
328  // erase all elements from histogram that will leave the window on the LEFT side
329  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
330  {
331  to_bin = std::max(std::min<int>((int)((*window_pos_borderleft).getIntensity() / bin_size), bin_count_minus_1), 0);
332  --histogram[to_bin];
333  --elements_in_window;
334  ++window_pos_borderleft;
335  }
336 
337  // add all elements to histogram that will enter the window on the RIGHT side
338  while ((window_pos_borderright != scan_last_)
339  && ((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size))
340  {
341  //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
342  to_bin = std::max(std::min<int>((int)((*window_pos_borderright).getIntensity() / bin_size), bin_count_minus_1), 0);
343  ++histogram[to_bin];
344  ++elements_in_window;
345  ++window_pos_borderright;
346  }
347 
348  if (elements_in_window < min_required_elements_)
349  {
350  noise = noise_for_empty_window_;
352  }
353  else
354  {
355  // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
356  median_bin = -1;
357  element_inc_count = 0;
358  element_in_window_half = (elements_in_window + 1) / 2;
359  while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half)
360  {
361  ++median_bin;
362  element_inc_count += histogram[median_bin];
363  }
364 
365  // increase the error count
366  if (median_bin == bin_count_minus_1) {++histogram_oob_percent_; }
367 
368  // just avoid division by 0
369  noise = std::max(1.0, bin_value[median_bin]);
370  }
371 
372  // store result
373  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
374 
375 
376  // advance the window center by one datapoint
377  ++window_pos_center;
378  ++window_count;
379  // update progress
381 
382  } // end while
383 
385 
386  sparse_window_percent_ = sparse_window_percent_ * 100 / window_count;
387  histogram_oob_percent_ = histogram_oob_percent_ * 100 / window_count;
388 
389  // warn if percentage of sparse windows is above 20%
391  {
392  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
394  << "% of all windows were sparse. You should consider increasing 'win_len' or decreasing 'min_required_elements'"
395  << std::endl;
396  }
397 
398  // warn if percentage of possibly wrong median estimates is above 1%
400  {
401  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
403  << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
404  << "You should consider increasing 'max_intensity' (and maybe 'bin_count' with it, to keep bin width reasonable)"
405  << std::endl;
406  }
407 
408  } // end of shiftWindow_
409 
412  {
413  max_intensity_ = (double)param_.getValue("max_intensity");
414  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
415  auto_max_percentile_ = param_.getValue("auto_max_percentile");
416  auto_mode_ = param_.getValue("auto_mode");
417  win_len_ = (double)param_.getValue("win_len");
418  bin_count_ = param_.getValue("bin_count");
419  min_required_elements_ = param_.getValue("min_required_elements");
420  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
421  write_log_messages_ = (bool)param_.getValue("write_log_messages").toBool();
422  is_result_valid_ = false;
423  }
424 
434  double win_len_;
442 
443  // whether to write out log messages in the case of failure
445 
446  // counter for sparse windows
448  // counter for histogram overflow
450 
451 
452  };
453 
454 } // namespace OpenMS
455 
456 #endif //OPENMS_FILTERING_NOISEESTIMATION_DSIGNALTONOISEESTIMATORMEDIAN_H
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:157
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMedian.h:98
A more convenient string class.
Definition: String.h:57
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev ...
Definition: SignalToNoiseEstimatorMedian.h:430
Definition: SignalToNoiseEstimatorMedian.h:88
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1, AUTOMAXBYSTDEV=0 or AUTOMAXBYPERCENT=1
Definition: SignalToNoiseEstimatorMedian.h:432
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:150
void endProgress() const
Ends the progress display.
void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_)
Definition: SignalToNoiseEstimatorMedian.h:192
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMedian.h:436
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:92
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMedian.h:434
double getSparseWindowPercent() const
Returns how many percent of the windows were sparse.
Definition: SignalToNoiseEstimatorMedian.h:172
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:65
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMedian.h:426
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMedian.h:100
bool toBool() const
Conversion to bool.
SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMedian.h:146
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed...
Definition: SignalToNoiseEstimator.h:215
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
void updateMembers_()
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed ...
Definition: SignalToNoiseEstimatorMedian.h:411
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:175
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:167
bool write_log_messages_
Definition: SignalToNoiseEstimatorMedian.h:444
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
void setProgress(SignedSize value) const
Sets the current progress.
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier ...
Definition: SignalToNoiseEstimatorMedian.h:428
void setMaxFloat(const String &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation ...
Definition: SignalToNoiseEstimatorMedian.h:88
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:66
double getHistogramRightmostPercent() const
Returns the percentage where the median was found in the rightmost bin.
Definition: SignalToNoiseEstimatorMedian.h:178
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMedian.h:441
Definition: SignalToNoiseEstimatorMedian.h:88
void setValidStrings(const String &key, const std::vector< String > &strings)
Sets the valid strings for the parameter key.
Invalid value exception.
Definition: Exception.h:336
SignalToNoiseEstimatorMedian & operator=(const SignalToNoiseEstimatorMedian &source)
Definition: SignalToNoiseEstimatorMedian.h:156
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:81
SignalToNoiseEstimatorMedian()
default constructor
Definition: SignalToNoiseEstimatorMedian.h:103
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMedian.h:438
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:57
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:208
void setName(const String &name)
Mutable access to the name.
double histogram_oob_percent_
Definition: SignalToNoiseEstimatorMedian.h:449
virtual ~SignalToNoiseEstimatorMedian()
Destructor.
Definition: SignalToNoiseEstimatorMedian.h:168
Definition: SignalToNoiseEstimatorMedian.h:88
double sparse_window_percent_
Definition: SignalToNoiseEstimatorMedian.h:447
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMedian.h:97
void setMinFloat(const String &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:03 using doxygen 1.8.13