OpenMS
Loading...
Searching...
No Matches
SignalToNoiseEstimatorMeanIterative.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: $
7// --------------------------------------------------------------------------
8//
9
10#pragma once
11
16#include <vector>
17#include <algorithm> //for std::max_element
18
19namespace OpenMS
20{
44 template <typename Container = MSSpectrum>
46 public SignalToNoiseEstimator<Container>
47 {
48
49public:
50
53
55 using SignalToNoiseEstimator<Container>::defaults_;
56 using SignalToNoiseEstimator<Container>::param_;
57
60
62
63
66 {
67 //set the name for DefaultParamHandler error messages
68 this->setName("SignalToNoiseEstimatorMeanIterative");
69
70 defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
71 " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
72 " All intensities EQUAL/ABOVE 'max_intensity' will not be added to the histogram." \
73 " If you choose 'max_intensity' too small, the noise estimate might be too small as well." \
74 " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime).", {"advanced"});
75 defaults_.setMinInt("max_intensity", -1);
76
77 defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", {"advanced"});
78 defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
79 defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
80
81
82 defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", {"advanced"});
83 defaults_.setMinInt("auto_max_percentile", 0);
84 defaults_.setMaxInt("auto_max_percentile", 100);
85
86 defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", {"advanced"});
87 defaults_.setMinInt("auto_mode", -1);
88 defaults_.setMaxInt("auto_mode", 1);
89
90 defaults_.setValue("win_len", 200.0, "window length in Thomson");
91 defaults_.setMinFloat("win_len", 1.0);
92
93 defaults_.setValue("bin_count", 30, "number of bins for intensity values");
94 defaults_.setMinInt("bin_count", 3);
95
96 defaults_.setValue("stdev_mp", 3.0, "multiplier for stdev", {"advanced"});
97 defaults_.setMinFloat("stdev_mp", 0.01);
98 defaults_.setMaxFloat("stdev_mp", 999.0);
99
100 defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
101 defaults_.setMinInt("min_required_elements", 1);
102
103 defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", {"advanced"});
104
106 }
107
114
120 {
121 if (&source == this) return *this;
122
125 return *this;
126 }
127
129
130
134
135
136protected:
137
138
143 void computeSTN_(const Container& c) override
144 {
145 //first element in the scan
146 PeakIterator scan_first_ = c.begin();
147 //last element in the scan
148 PeakIterator scan_last_ = c.end();
149
150 // reset counter for sparse windows
151 double sparse_window_percent = 0;
152
153 // reset the results
154 stn_estimates_.clear();
155 stn_estimates_.resize(c.size());
156
157 // maximal range of histogram needs to be calculated first
159 {
160 // use MEAN+auto_max_intensity_*STDEV as threshold
161 GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
162 max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
163 }
164 else if (auto_mode_ == AUTOMAXBYPERCENT)
165 {
166 // get value at "auto_max_percentile_"th percentile
167 // we use a histogram approach here as well.
168 if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
169 {
171 throw Exception::InvalidValue(__FILE__,
172 __LINE__,
173 OPENMS_PRETTY_FUNCTION,
174 "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
175 s);
176 }
177
178 std::vector<int> histogram_auto(100, 0);
179
180 // find maximum of current scan
181 auto maxIt = std::max_element(c.begin(), c.end() ,[](const PeakType& a, const PeakType& b){ return a.getIntensity() > b.getIntensity();});
182 typename PeakType::IntensityType maxInt = maxIt->getIntensity();
183
184 double bin_size = maxInt / 100;
185
186 // fill histogram
187 for(auto& run : c)
188 {
189 ++histogram_auto[(int) (((run).getIntensity() - 1) / bin_size)];
190 }
191
192 // add up element counts in histogram until ?th percentile is reached
193 int elements_below_percentile = (int) (auto_max_percentile_ * c.size() / 100);
194 int elements_seen = 0;
195 int i = -1;
196 PeakIterator run = scan_first_;
197
198 while (run != scan_last_ && elements_seen < elements_below_percentile)
199 {
200 ++i;
201 elements_seen += histogram_auto[i];
202 ++run;
203 }
204
205 max_intensity_ = (((double)i) + 0.5) * bin_size;
206 }
207 else //if (auto_mode_ == MANUAL)
208 {
209 if (max_intensity_ <= 0)
210 {
212 throw Exception::InvalidValue(__FILE__,
213 __LINE__,
214 OPENMS_PRETTY_FUNCTION,
215 "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
216 s);
217 }
218 }
219
220 if (max_intensity_ < 0)
221 {
222 OPENMS_LOG_WARN << "SignalToNoiseEstimatorMeanIterative: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
223 return;
224 }
225
226 PeakIterator window_pos_center = scan_first_;
227 PeakIterator window_pos_borderleft = scan_first_;
228 PeakIterator window_pos_borderright = scan_first_;
229
230 double window_half_size = win_len_ / 2;
231 double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
232
233 std::vector<int> histogram(bin_count_, 0);
234 std::vector<double> bin_value(bin_count_, 0);
235 // calculate average intensity that is represented by a bin
236 for (int bin = 0; bin < bin_count_; bin++)
237 {
238 histogram[bin] = 0;
239 bin_value[bin] = (bin + 0.5) * bin_size;
240 }
241 // index of last valid bin during iteration
242 int hist_rightmost_bin;
243 // bin in which a datapoint would fall
244 int to_bin;
245 // mean & stdev of the histogram
246 double hist_mean;
247 double hist_stdev;
248
249 // tracks elements in current window, which may vary because of unevenly spaced data
250 int elements_in_window = 0;
251 int window_count = 0;
252
253 double noise; // noise value of a datapoint
254
256 SignalToNoiseEstimator<Container>::startProgress(0, c.size(), "noise estimation of data");
257
258 // MAIN LOOP
259 while (window_pos_center != scan_last_)
260 {
261 // erase all elements from histogram that will leave the window on the LEFT side
262 while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
263 {
264 //std::cout << "S: " << (*window_pos_borderleft).getMZ() << " " << ( (*window_pos_center).getMZ() - window_half_size ) << "\n";
265 to_bin = (int) ((std::max((*window_pos_borderleft).getIntensity(), 0.0f)) / bin_size);
266 if (to_bin < bin_count_)
267 {
268 --histogram[to_bin];
269 --elements_in_window;
270 }
271 ++window_pos_borderleft;
272 }
273
274 //std::printf("S1: %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size);
275
276
277 // add all elements to histogram that will enter the window on the RIGHT side
278 while ((window_pos_borderright != scan_last_)
279 && ((*window_pos_borderright).getMZ() < (*window_pos_center).getMZ() + window_half_size))
280 {
281 //std::printf("Sb: %E %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size, (*window_pos_borderright).getMZ() - ((*window_pos_center).getMZ() + window_half_size));
282
283 to_bin = (int) ((std::max((*window_pos_borderright).getIntensity(), 0.0f)) / bin_size);
284 if (to_bin < bin_count_)
285 {
286 ++histogram[to_bin];
287 ++elements_in_window;
288 }
289 ++window_pos_borderright;
290 }
291
292 if (elements_in_window < min_required_elements_)
293 {
295 ++sparse_window_percent;
296 }
297 else
298 {
299
300 hist_rightmost_bin = bin_count_;
301
302 // do iteration on histogram and find threshold
303 for (int i = 0; i < 3; ++i)
304 {
305 // mean
306 hist_mean = 0;
307 for (int bin = 0; bin < hist_rightmost_bin; ++bin)
308 {
309 //std::cout << "V: " << bin << " " << hist_mean << " " << histogram[bin] << " " << elements_in_window << " " << bin_value[bin] << "\n";
310 // immediate division is numerically more stable
311 hist_mean += histogram[bin] / (double) elements_in_window * bin_value[bin];
312 }
313 //hist_mean = hist_mean / elements_in_window;
314
315 // stdev
316 hist_stdev = 0;
317 for (int bin = 0; bin < hist_rightmost_bin; ++bin)
318 {
319 double tmp(bin_value[bin] - hist_mean);
320 hist_stdev += histogram[bin] / (double) elements_in_window * tmp * tmp;
321 }
322 hist_stdev = std::sqrt(hist_stdev);
323
324 //determine new threshold (i.e. the rightmost bin we consider)
325 int estimate = (int) ((hist_mean + hist_stdev * stdev_ - 1) / bin_size + 1);
326 //std::cout << "E: " << hist_mean << " " << hist_stdev << " " << stdev_ << " " << bin_size<< " " << estimate << "\n";
327 hist_rightmost_bin = std::min(estimate, bin_count_);
328 }
329
330 // just avoid division by 0
331 noise = std::max(1.0, hist_mean);
332 }
333
334 // store result
335 stn_estimates_[window_count] = (*window_pos_center).getIntensity() / noise;
336
337
338
339 // advance the window center by one datapoint
340 ++window_pos_center;
341 ++window_count;
342 // update progress
344
345 } // end while
346
348
349 sparse_window_percent = sparse_window_percent * 100 / window_count;
350 // warn if percentage of sparse windows is above 20%
351 if (sparse_window_percent > 20)
352 {
353 OPENMS_LOG_WARN << "WARNING in SignalToNoiseEstimatorMeanIterative: "
354 << sparse_window_percent
355 << "% of all windows were sparse. You should consider increasing 'win_len' or increasing 'min_required_elements'"
356 << " You should also check the MaximalIntensity value (or the parameters for its heuristic estimation)"
357 << " If it is too low, then too many high intensity peaks will be discarded, which leads to a sparse window!"
358 << std::endl;
359 }
360
361 return;
362
363 } // end of shiftWindow_
364
366 void updateMembers_() override
367 {
368 max_intensity_ = (double)param_.getValue("max_intensity");
369 auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
370 auto_max_percentile_ = param_.getValue("auto_max_percentile");
371 auto_mode_ = param_.getValue("auto_mode");
372 win_len_ = (double)param_.getValue("win_len");
373 bin_count_ = param_.getValue("bin_count");
374 stdev_ = (double)param_.getValue("stdev_mp");
375 min_required_elements_ = param_.getValue("min_required_elements");
376 noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
377 stn_estimates_.clear();
378 }
379
389 double win_len_;
393 double stdev_;
399
400
401
402
403 };
404
405} // namespace OpenMS
406
#define OPENMS_LOG_WARN
Macro for warnings.
Definition LogStream.h:550
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
Param param_
Container for current parameters.
Definition DefaultParamHandler.h:139
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!
Definition DefaultParamHandler.h:146
void setName(const String &name)
Mutable access to the name.
Invalid value exception.
Definition Exception.h:306
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
void setMaxFloat(const std::string &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
void setMaxInt(const std::string &key, int max)
Sets the maximum value for the integer or integer list parameter key.
void setMinInt(const std::string &key, int min)
Sets the minimum value for the integer or integer list parameter key.
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
void setMinFloat(const std::string &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
float IntensityType
Intensity type.
Definition Peak2D.h:37
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
Estimates the signal/noise (S/N) ratio of each data point in a scan based on an iterative scheme whic...
Definition SignalToNoiseEstimatorMeanIterative.h:47
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition SignalToNoiseEstimatorMeanIterative.h:58
SignalToNoiseEstimatorMeanIterative()
default constructor
Definition SignalToNoiseEstimatorMeanIterative.h:65
double win_len_
range of data points which belong to a window in Thomson
Definition SignalToNoiseEstimatorMeanIterative.h:389
double stdev_
multiplier for the stdev of intensities
Definition SignalToNoiseEstimatorMeanIterative.h:393
double noise_for_empty_window_
Definition SignalToNoiseEstimatorMeanIterative.h:398
~SignalToNoiseEstimatorMeanIterative() override
Destructor.
Definition SignalToNoiseEstimatorMeanIterative.h:132
SignalToNoiseEstimatorMeanIterative(const SignalToNoiseEstimatorMeanIterative &source)
Copy Constructor.
Definition SignalToNoiseEstimatorMeanIterative.h:109
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition SignalToNoiseEstimatorMeanIterative.h:381
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev
Definition SignalToNoiseEstimatorMeanIterative.h:385
void computeSTN_(const Container &c) override
Definition SignalToNoiseEstimatorMeanIterative.h:143
void updateMembers_() override
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed
Definition SignalToNoiseEstimatorMeanIterative.h:366
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition SignalToNoiseEstimatorMeanIterative.h:395
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition SignalToNoiseEstimatorMeanIterative.h:59
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition SignalToNoiseEstimatorMeanIterative.h:61
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1,...
Definition SignalToNoiseEstimatorMeanIterative.h:387
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation
Definition SignalToNoiseEstimatorMeanIterative.h:52
@ MANUAL
Definition SignalToNoiseEstimatorMeanIterative.h:52
@ AUTOMAXBYSTDEV
Definition SignalToNoiseEstimatorMeanIterative.h:52
@ AUTOMAXBYPERCENT
Definition SignalToNoiseEstimatorMeanIterative.h:52
SignalToNoiseEstimatorMeanIterative & operator=(const SignalToNoiseEstimatorMeanIterative &source)
Definition SignalToNoiseEstimatorMeanIterative.h:119
int bin_count_
number of bins in the histogram
Definition SignalToNoiseEstimatorMeanIterative.h:391
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier
Definition SignalToNoiseEstimatorMeanIterative.h:383
This class represents the abstract base class of a signal to noise estimator.
Definition SignalToNoiseEstimator.h:33
double variance
variance of estimated Gaussian
Definition SignalToNoiseEstimator.h:108
PeakIterator::value_type PeakType
Definition SignalToNoiseEstimator.h:40
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition SignalToNoiseEstimator.h:60
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition SignalToNoiseEstimator.h:113
double mean
mean of estimated Gaussian
Definition SignalToNoiseEstimator.h:107
std::vector< double > stn_estimates_
stores the noise estimate for each peak
Definition SignalToNoiseEstimator.h:146
Container::const_iterator PeakIterator
Definition SignalToNoiseEstimator.h:39
protected struct to store parameters my, sigma for a Gaussian distribution
Definition SignalToNoiseEstimator.h:106
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19