OpenMS  2.5.0
SignalToNoiseEstimatorMedian.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #pragma once
37 
38 
43 #include <vector>
44 
45 namespace OpenMS
46 {
79  template <typename Container = MSSpectrum>
81  public SignalToNoiseEstimator<Container>
82  {
83 
84 public:
85 
88 
95 
98 
100 
103  {
104  //set the name for DefaultParamHandler error messages
105  this->setName("SignalToNoiseEstimatorMedian");
106 
107  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
108  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
109  " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin." \
110  " If you choose 'max_intensity' too small, the noise estimate might be too small as well. " \
111  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)." \
112  " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", ListUtils::create<String>("advanced"));
113  defaults_.setMinInt("max_intensity", -1);
114 
115  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", ListUtils::create<String>("advanced"));
116  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
117  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
118 
119  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", ListUtils::create<String>("advanced"));
120  defaults_.setMinInt("auto_max_percentile", 0);
121  defaults_.setMaxInt("auto_max_percentile", 100);
122 
123  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", ListUtils::create<String>("advanced"));
124  defaults_.setMinInt("auto_mode", -1);
125  defaults_.setMaxInt("auto_mode", 1);
126 
127  defaults_.setValue("win_len", 200.0, "window length in Thomson");
128  defaults_.setMinFloat("win_len", 1.0);
129 
130  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
131  defaults_.setMinInt("bin_count", 3);
132 
133  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
134  defaults_.setMinInt("min_required_elements", 1);
135 
136  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", ListUtils::create<String>("advanced"));
137 
138  defaults_.setValue("write_log_messages", "true", "Write out log messages in case of sparse windows or median in rightmost histogram bin");
139  defaults_.setValidStrings("write_log_messages", ListUtils::create<String>("true,false"));
140 
142  }
143 
146  SignalToNoiseEstimator<Container>(source)
147  {
148  updateMembers_();
149  }
150 
156  {
157  if (&source == this) return *this;
158 
160  updateMembers_();
161  return *this;
162  }
163 
165 
168  {}
169 
171  double getSparseWindowPercent() const
172  {
173  return sparse_window_percent_;
174  }
175 
178  {
179  return histogram_oob_percent_;
180  }
181 
182 protected:
183 
184 
191  void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) override
192  {
193  // reset counter for sparse windows
195  // reset counter for histogram overflow
197 
198  // reset the results
199  stn_estimates_.clear();
200 
201  // maximal range of histogram needs to be calculated first
202  if (auto_mode_ == AUTOMAXBYSTDEV)
203  {
204  // use MEAN+auto_max_intensity_*STDEV as threshold
205  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
206  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
207  }
208  else if (auto_mode_ == AUTOMAXBYPERCENT)
209  {
210  // get value at "auto_max_percentile_"th percentile
211  // we use a histogram approach here as well.
212  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
213  {
215  throw Exception::InvalidValue(__FILE__,
216  __LINE__,
217  OPENMS_PRETTY_FUNCTION,
218  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
219  s);
220  }
221 
222  std::vector<int> histogram_auto(100, 0);
223 
224  // find maximum of current scan
225  int size = 0;
226  typename PeakType::IntensityType maxInt = 0;
227  PeakIterator run = scan_first_;
228  while (run != scan_last_)
229  {
230  maxInt = std::max(maxInt, (*run).getIntensity());
231  ++size;
232  ++run;
233  }
234 
235  double bin_size = maxInt / 100;
236 
237  // fill histogram
238  run = scan_first_;
239  while (run != scan_last_)
240  {
241  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
242  ++run;
243  }
244 
245  // add up element counts in histogram until ?th percentile is reached
246  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
247  int elements_seen = 0;
248  int i = -1;
249  run = scan_first_;
250 
251  while (run != scan_last_ && elements_seen < elements_below_percentile)
252  {
253  ++i;
254  elements_seen += histogram_auto[i];
255  ++run;
256  }
257 
258  max_intensity_ = (((double)i) + 0.5) * bin_size;
259  }
260  else //if (auto_mode_ == MANUAL)
261  {
262  if (max_intensity_ <= 0)
263  {
265  throw Exception::InvalidValue(__FILE__,
266  __LINE__,
267  OPENMS_PRETTY_FUNCTION,
268  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
269  s);
270  }
271  }
272 
273  if (max_intensity_ < 0)
274  {
275  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
276  return;
277  }
278 
279  PeakIterator window_pos_center = scan_first_;
280  PeakIterator window_pos_borderleft = scan_first_;
281  PeakIterator window_pos_borderright = scan_first_;
282 
283  double window_half_size = win_len_ / 2;
284  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
285  int bin_count_minus_1 = bin_count_ - 1;
286 
287  std::vector<int> histogram(bin_count_, 0);
288  std::vector<double> bin_value(bin_count_, 0);
289  // calculate average intensity that is represented by a bin
290  for (int bin = 0; bin < bin_count_; bin++)
291  {
292  histogram[bin] = 0;
293  bin_value[bin] = (bin + 0.5) * bin_size;
294  }
295  // bin in which a datapoint would fall
296  int to_bin = 0;
297 
298  // index of bin where the median is located
299  int median_bin = 0;
300  // additive number of elements from left to x in histogram
301  int element_inc_count = 0;
302 
303  // tracks elements in current window, which may vary because of unevenly spaced data
304  int elements_in_window = 0;
305  // number of windows
306  int window_count = 0;
307 
308  // number of elements where we find the median
309  int element_in_window_half = 0;
310 
311  double noise; // noise value of a datapoint
312 
313  // determine how many elements we need to estimate (for progress estimation)
314  int windows_overall = 0;
315  PeakIterator run = scan_first_;
316  while (run != scan_last_)
317  {
318  ++windows_overall;
319  ++run;
320  }
321  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
322 
323  // MAIN LOOP
324  while (window_pos_center != scan_last_)
325  {
326 
327  // erase all elements from histogram that will leave the window on the LEFT side
328  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
329  {
330  to_bin = std::max(std::min<int>((int)((*window_pos_borderleft).getIntensity() / bin_size), bin_count_minus_1), 0);
331  --histogram[to_bin];
332  --elements_in_window;
333  ++window_pos_borderleft;
334  }
335 
336  // add all elements to histogram that will enter the window on the RIGHT side
337  while ((window_pos_borderright != scan_last_)
338  && ((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size))
339  {
340  //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
341  to_bin = std::max(std::min<int>((int)((*window_pos_borderright).getIntensity() / bin_size), bin_count_minus_1), 0);
342  ++histogram[to_bin];
343  ++elements_in_window;
344  ++window_pos_borderright;
345  }
346 
347  if (elements_in_window < min_required_elements_)
348  {
349  noise = noise_for_empty_window_;
351  }
352  else
353  {
354  // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
355  median_bin = -1;
356  element_inc_count = 0;
357  element_in_window_half = (elements_in_window + 1) / 2;
358  while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half)
359  {
360  ++median_bin;
361  element_inc_count += histogram[median_bin];
362  }
363 
364  // increase the error count
365  if (median_bin == bin_count_minus_1) {++histogram_oob_percent_; }
366 
367  // just avoid division by 0
368  noise = std::max(1.0, bin_value[median_bin]);
369  }
370 
371  // store result
372  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
373 
374 
375  // advance the window center by one datapoint
376  ++window_pos_center;
377  ++window_count;
378  // update progress
380 
381  } // end while
382 
384 
385  sparse_window_percent_ = sparse_window_percent_ * 100 / window_count;
386  histogram_oob_percent_ = histogram_oob_percent_ * 100 / window_count;
387 
388  // warn if percentage of sparse windows is above 20%
390  {
391  OPENMS_LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
393  << "% of all windows were sparse. You should consider increasing 'win_len' or decreasing 'min_required_elements'"
394  << std::endl;
395  }
396 
397  // warn if percentage of possibly wrong median estimates is above 1%
399  {
400  OPENMS_LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
402  << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
403  << "You should consider increasing 'max_intensity' (and maybe 'bin_count' with it, to keep bin width reasonable)"
404  << std::endl;
405  }
406 
407  } // end of shiftWindow_
408 
410  void updateMembers_() override
411  {
412  max_intensity_ = (double)param_.getValue("max_intensity");
413  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
414  auto_max_percentile_ = param_.getValue("auto_max_percentile");
415  auto_mode_ = param_.getValue("auto_mode");
416  win_len_ = (double)param_.getValue("win_len");
417  bin_count_ = param_.getValue("bin_count");
418  min_required_elements_ = param_.getValue("min_required_elements");
419  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
420  write_log_messages_ = (bool)param_.getValue("write_log_messages").toBool();
421  is_result_valid_ = false;
422  }
423 
433  double win_len_;
441 
442  // whether to write out log messages in the case of failure
444 
445  // counter for sparse windows
447  // counter for histogram overflow
449 
450 
451  };
452 
453 } // namespace OpenMS
454 
LogStream.h
OpenMS::SignalToNoiseEstimator::is_result_valid_
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed....
Definition: SignalToNoiseEstimator.h:214
OpenMS::SignalToNoiseEstimatorMedian::PeakIterator
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMedian.h:96
OpenMS::SignalToNoiseEstimatorMedian::max_intensity_
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMedian.h:425
OpenMS::ProgressLogger::setProgress
void setProgress(SignedSize value) const
Sets the current progress.
OpenMS::SignalToNoiseEstimatorMedian::auto_mode_
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1,...
Definition: SignalToNoiseEstimatorMedian.h:431
OpenMS::SignalToNoiseEstimatorMedian::min_required_elements_
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMedian.h:437
OpenMS::SignalToNoiseEstimatorMedian< ContainerT >::IntensityThresholdCalculation
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation
Definition: SignalToNoiseEstimatorMedian.h:87
OpenMS::Param::setMinFloat
void setMinFloat(const String &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
double
OpenMS::SignalToNoiseEstimatorMedian::operator=
SignalToNoiseEstimatorMedian & operator=(const SignalToNoiseEstimatorMedian &source)
Definition: SignalToNoiseEstimatorMedian.h:155
OpenMS::SignalToNoiseEstimatorMedian::auto_max_percentile_
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev
Definition: SignalToNoiseEstimatorMedian.h:429
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:335
OpenMS::SignalToNoiseEstimatorMedian::GaussianEstimate
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMedian.h:99
OpenMS::SignalToNoiseEstimator::stn_estimates_
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:207
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
OpenMS::SignalToNoiseEstimatorMedian::sparse_window_percent_
double sparse_window_percent_
Definition: SignalToNoiseEstimatorMedian.h:446
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::SignalToNoiseEstimator
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:56
OpenMS::SignalToNoiseEstimatorMedian::bin_count_
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMedian.h:435
OpenMS::DefaultParamHandler::setName
void setName(const String &name)
Mutable access to the name.
OpenMS::Param::setValidStrings
void setValidStrings(const String &key, const std::vector< String > &strings)
Sets the valid strings for the parameter key.
OpenMS::Param::setMaxInt
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
OpenMS::SignalToNoiseEstimatorMedian::computeSTN_
void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) override
Definition: SignalToNoiseEstimatorMedian.h:191
OpenMS::Param::getValue
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
OpenMS::ProgressLogger::startProgress
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::ProgressLogger::endProgress
void endProgress() const
Ends the progress display.
ListUtils.h
OpenMS::SignalToNoiseEstimatorMedian::AUTOMAXBYPERCENT
Definition: SignalToNoiseEstimatorMedian.h:87
OpenMS::SignalToNoiseEstimator::PeakType
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:65
OpenMS::SignalToNoiseEstimatorMedian::auto_max_stdev_Factor_
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier
Definition: SignalToNoiseEstimatorMedian.h:427
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::SignalToNoiseEstimatorMedian::PeakType
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMedian.h:97
OpenMS::DataValue::toBool
bool toBool() const
Conversion to bool.
OpenMS::SignalToNoiseEstimatorMedian::getSparseWindowPercent
double getSparseWindowPercent() const
Returns how many percent of the windows were sparse.
Definition: SignalToNoiseEstimatorMedian.h:171
OpenMS::SignalToNoiseEstimatorMedian::SignalToNoiseEstimatorMedian
SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMedian.h:145
Exception.h
OpenMS::SignalToNoiseEstimatorMedian::noise_for_empty_window_
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMedian.h:440
OpenMS::SignalToNoiseEstimatorMedian::histogram_oob_percent_
double histogram_oob_percent_
Definition: SignalToNoiseEstimatorMedian.h:448
int
OpenMS::SignalToNoiseEstimator::operator=
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:91
OpenMS::Param::setMinInt
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
OpenMS::SignalToNoiseEstimatorMedian
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:80
OpenMS::SignalToNoiseEstimatorMedian::~SignalToNoiseEstimatorMedian
~SignalToNoiseEstimatorMedian() override
Destructor.
Definition: SignalToNoiseEstimatorMedian.h:167
OpenMS::Param::setMaxFloat
void setMaxFloat(const String &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
OpenMS::SignalToNoiseEstimatorMedian::getHistogramRightmostPercent
double getHistogramRightmostPercent() const
Returns the percentage where the median was found in the rightmost bin.
Definition: SignalToNoiseEstimatorMedian.h:177
OpenMS::SignalToNoiseEstimatorMedian::MANUAL
Definition: SignalToNoiseEstimatorMedian.h:87
OpenMS::SignalToNoiseEstimatorMedian::SignalToNoiseEstimatorMedian
SignalToNoiseEstimatorMedian()
default constructor
Definition: SignalToNoiseEstimatorMedian.h:102
OpenMS::SignalToNoiseEstimatorMedian::write_log_messages_
bool write_log_messages_
Definition: SignalToNoiseEstimatorMedian.h:443
OpenMS::SignalToNoiseEstimator::estimate_
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:174
float
OpenMS::DefaultParamHandler::defaults_
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!
Definition: DefaultParamHandler.h:156
OpenMS::SignalToNoiseEstimatorMedian::updateMembers_
void updateMembers_() override
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed
Definition: SignalToNoiseEstimatorMedian.h:410
SignalToNoiseEstimator.h
OpenMS::DefaultParamHandler::defaultsToParam_
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
OpenMS::SignalToNoiseEstimator::PeakIterator
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:64
OpenMS::SignalToNoiseEstimatorMedian::AUTOMAXBYSTDEV
Definition: SignalToNoiseEstimatorMedian.h:87
OpenMS::SignalToNoiseEstimator::GaussianEstimate
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:166
OpenMS::SignalToNoiseEstimatorMedian::win_len_
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMedian.h:433
OpenMS::DefaultParamHandler::param_
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:149