OpenMS
GaussFilterAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Eva Lange $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
41 
42 #include <cmath>
43 #include <vector>
44 
45 namespace OpenMS
46 {
69 // #define DEBUG_FILTERING
70 
71  class OPENMS_DLLAPI GaussFilterAlgorithm
72  {
73 public:
76 
79 
84  {
85  // create new arrays for mz / intensity data and set their size
88  mz_array->data.resize(spectrum->getMZArray()->data.size());
89  intensity_array->data.resize(spectrum->getMZArray()->data.size());
90 
91  // apply the filter
92  bool ret_val = filter(
93  spectrum->getMZArray()->data.begin(),
94  spectrum->getMZArray()->data.end(),
95  spectrum->getIntensityArray()->data.begin(),
96  mz_array->data.begin(), intensity_array->data.begin()
97  );
98  // set the data of the spectrum to the new mz / int arrays
99  spectrum->setMZArray(mz_array);
100  spectrum->setIntensityArray(intensity_array);
101  return ret_val;
102  }
103 
108  {
109  // create new arrays for rt / intensity data and set their size
112  rt_array->data.resize(chromatogram->getTimeArray()->data.size());
113  intensity_array->data.resize(chromatogram->getTimeArray()->data.size());
114 
115  // apply the filter
116  bool ret_val = filter(
117  chromatogram->getTimeArray()->data.begin(),
118  chromatogram->getTimeArray()->data.end(),
119  chromatogram->getIntensityArray()->data.begin(),
120  rt_array->data.begin(), intensity_array->data.begin()
121  );
122  // set the data of the chromatogram to the new rt / int arrays
123  chromatogram->setTimeArray(rt_array);
124  chromatogram->setIntensityArray(intensity_array);
125  return ret_val;
126  }
127 
133  template <typename ConstIterT, typename IterT>
134  bool filter(
135  ConstIterT mz_in_start,
136  ConstIterT mz_in_end,
137  ConstIterT int_in_start,
138  IterT mz_out,
139  IterT int_out)
140  {
141  bool found_signal = false;
142 
143  ConstIterT mz_it = mz_in_start;
144  ConstIterT int_it = int_in_start;
145  for (; mz_it != mz_in_end; mz_it++, int_it++)
146  {
147  // if ppm tolerance is used, calculate a reasonable width value for this m/z
148  if (use_ppm_tolerance_)
149  {
150  initialize(Math::ppmToMass(ppm_tolerance_, *mz_it), spacing_, ppm_tolerance_, use_ppm_tolerance_);
151  }
152 
153  double new_int = integrate_(mz_it, int_it, mz_in_start, mz_in_end);
154 
155  // store new intensity and m/z into output iterator
156  *mz_out = *mz_it;
157  *int_out = new_int;
158  ++mz_out;
159  ++int_out;
160 
161  if (fabs(new_int) > 0) found_signal = true;
162  }
163  return found_signal;
164  }
165 
166  void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance);
167 
168 protected:
169 
171  std::vector<double> coeffs_;
173  double sigma_;
175  double spacing_;
176 
177  // tolerance in ppm
180 
182  template <typename InputPeakIterator>
183  double integrate_(InputPeakIterator x /* mz */, InputPeakIterator y /* int */, InputPeakIterator first, InputPeakIterator last)
184  {
185  double v = 0.;
186  // norm the gaussian kernel area to one
187  double norm = 0.;
188  Size middle = coeffs_.size();
189 
190  double start_pos = (( (*x) - (middle * spacing_)) > (*first)) ? ((*x) - (middle * spacing_)) : (*first);
191  double end_pos = (( (*x) + (middle * spacing_)) < (*(last - 1))) ? ((*x) + (middle * spacing_)) : (*(last - 1));
192 
193  InputPeakIterator help_x = x;
194  InputPeakIterator help_y = y;
195 #ifdef DEBUG_FILTERING
196 
197  std::cout << "integrate from middle to start_pos " << *help_x << " until " << start_pos << std::endl;
198 #endif
199 
200  //integrate from middle to start_pos
201  while ((help_x != first) && (*(help_x - 1) > start_pos))
202  {
203  // search for the corresponding datapoint of help in the gaussian (take the left most adjacent point)
204  double distance_in_gaussian = fabs(*x - *help_x);
205  Size left_position = (Size)floor(distance_in_gaussian / spacing_);
206 
207  // search for the true left adjacent data point (because of rounding errors)
208  for (int j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
209  {
210  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
211  {
212  left_position -= j;
213  break;
214  }
215 
216  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
217  {
218  left_position += j;
219  break;
220  }
221  }
222 
223  // interpolate between the left and right data points in the gaussian to get the true value at position distance_in_gaussian
224  Size right_position = left_position + 1;
225  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
226  // check if the right data point in the gaussian exists
227  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
228  : coeffs_[left_position];
229 #ifdef DEBUG_FILTERING
230 
231  std::cout << "distance_in_gaussian " << distance_in_gaussian << std::endl;
232  std::cout << " right_position " << right_position << std::endl;
233  std::cout << " left_position " << left_position << std::endl;
234  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
235  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
236  std::cout << "interpolated value left " << coeffs_right << std::endl;
237 #endif
238 
239 
240  // search for the corresponding datapoint for (help-1) in the gaussian (take the left most adjacent point)
241  distance_in_gaussian = fabs((*x) - (*(help_x - 1)));
242  left_position = (Size)floor(distance_in_gaussian / spacing_);
243 
244  // search for the true left adjacent data point (because of rounding errors)
245  for (UInt j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
246  {
247  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
248  {
249  left_position -= j;
250  break;
251  }
252 
253  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
254  {
255  left_position += j;
256  break;
257  }
258  }
259 
260  // start the interpolation for the true value in the gaussian
261  right_position = left_position + 1;
262  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
263  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
264  : coeffs_[left_position];
265 #ifdef DEBUG_FILTERING
266 
267  std::cout << " help_x-1 " << *(help_x - 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
268  std::cout << " right_position " << right_position << std::endl;
269  std::cout << " left_position " << left_position << std::endl;
270  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
271  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
272  std::cout << "interpolated value right " << coeffs_left << std::endl;
273 
274  std::cout << " intensity " << fabs(*(help_x - 1) - (*help_x)) / 2. << " * " << *(help_y - 1) << " * " << coeffs_left << " + " << *help_y << "* " << coeffs_right
275  << std::endl;
276 #endif
277 
278 
279  norm += fabs((*(help_x - 1)) - (*help_x)) / 2. * (coeffs_left + coeffs_right);
280 
281  v += fabs((*(help_x - 1)) - (*help_x)) / 2. * (*(help_y - 1) * coeffs_left + (*help_y) * coeffs_right);
282  --help_x;
283  --help_y;
284  }
285 
286 
287  //integrate from middle to end_pos
288  help_x = x;
289  help_y = y;
290 #ifdef DEBUG_FILTERING
291 
292  std::cout << "integrate from middle to endpos " << *help_x << " until " << end_pos << std::endl;
293 #endif
294 
295  while ((help_x != (last - 1)) && (*(help_x + 1) < end_pos))
296  {
297  // search for the corresponding datapoint for help in the gaussian (take the left most adjacent point)
298  double distance_in_gaussian = fabs((*x) - (*help_x));
299  int left_position = (UInt)floor(distance_in_gaussian / spacing_);
300 
301  // search for the true left adjacent data point (because of rounding errors)
302  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
303  {
304  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
305  {
306  left_position -= j;
307  break;
308  }
309 
310  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
311  {
312  left_position += j;
313  break;
314  }
315  }
316  // start the interpolation for the true value in the gaussian
317  Size right_position = left_position + 1;
318  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
319  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
320  : coeffs_[left_position];
321 
322 #ifdef DEBUG_FILTERING
323 
324  std::cout << " help " << *help_x << " distance_in_gaussian " << distance_in_gaussian << std::endl;
325  std::cout << " left_position " << left_position << std::endl;
326  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
327  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
328  std::cout << "interpolated value left " << coeffs_left << std::endl;
329 #endif
330 
331  // search for the corresponding datapoint for (help+1) in the gaussian (take the left most adjacent point)
332  distance_in_gaussian = fabs((*x) - (*(help_x + 1)));
333  left_position = (UInt)floor(distance_in_gaussian / spacing_);
334 
335  // search for the true left adjacent data point (because of rounding errors)
336  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
337  {
338  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
339  {
340  left_position -= j;
341  break;
342  }
343 
344  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
345  {
346  left_position += j;
347  break;
348  }
349  }
350 
351  // start the interpolation for the true value in the gaussian
352  right_position = left_position + 1;
353  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
354  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
355  : coeffs_[left_position];
356 #ifdef DEBUG_FILTERING
357 
358  std::cout << " (help + 1) " << *(help_x + 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
359  std::cout << " left_position " << left_position << std::endl;
360  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
361  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
362  std::cout << "interpolated value right " << coeffs_right << std::endl;
363 
364  std::cout << " intensity " << fabs(*help_x - *(help_x + 1)) / 2.
365  << " * " << *help_y << " * " << coeffs_left << " + " << *(help_y + 1)
366  << "* " << coeffs_right
367  << std::endl;
368 #endif
369  norm += fabs((*help_x) - (*(help_x + 1)) ) / 2. * (coeffs_left + coeffs_right);
370 
371  v += fabs((*help_x) - (*(help_x + 1)) ) / 2. * ((*help_y) * coeffs_left + (*(help_y + 1)) * coeffs_right);
372  ++help_x;
373  ++help_y;
374  }
375 
376  if (v > 0)
377  {
378  return v / norm;
379  }
380  else
381  {
382  return 0;
383  }
384  }
385 
386  };
387 
388 } // namespace OpenMS
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilterAlgorithm.h:72
std::vector< double > coeffs_
Coefficients.
Definition: GaussFilterAlgorithm.h:171
GaussFilterAlgorithm()
Constructor.
void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance)
double sigma_
The standard derivation .
Definition: GaussFilterAlgorithm.h:173
bool filter(ConstIterT mz_in_start, ConstIterT mz_in_end, ConstIterT int_in_start, IterT mz_out, IterT int_out)
Smoothes two data arrays.
Definition: GaussFilterAlgorithm.h:134
double integrate_(InputPeakIterator x, InputPeakIterator y, InputPeakIterator first, InputPeakIterator last)
Computes the convolution of the raw data at position x and the gaussian kernel.
Definition: GaussFilterAlgorithm.h:183
double spacing_
The spacing of the pre-tabulated kernel coefficients.
Definition: GaussFilterAlgorithm.h:175
double ppm_tolerance_
Definition: GaussFilterAlgorithm.h:179
virtual ~GaussFilterAlgorithm()
Destructor.
bool use_ppm_tolerance_
Definition: GaussFilterAlgorithm.h:178
bool filter(OpenMS::Interfaces::SpectrumPtr spectrum)
Smoothes an Spectrum containing profile data.
Definition: GaussFilterAlgorithm.h:83
bool filter(OpenMS::Interfaces::ChromatogramPtr chromatogram)
Smoothes an Chromatogram containing profile data.
Definition: GaussFilterAlgorithm.h:107
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
boost::shared_ptr< Chromatogram > ChromatogramPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:156
boost::shared_ptr< BinaryDataArray > BinaryDataArrayPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:80
boost::shared_ptr< Spectrum > SpectrumPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:236
The datastructures used by the OpenSwath interfaces.
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:73
T ppmToMass(T ppm, T mz_ref)
Compute the mass diff in [Th], given a ppm value and a reference point.
Definition: MathFunctions.h:335
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
double norm(T beg, T end)
compute the Euclidean norm of the vector
Definition: StatsHelpers.h:57