OpenMS  2.6.0
GaussFilterAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Eva Lange $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
41 #include <cmath>
42 #include <vector>
43 
44 namespace OpenMS
45 {
68 // #define DEBUG_FILTERING
69 
70  class OPENMS_DLLAPI GaussFilterAlgorithm
71  {
72 public:
75 
77  virtual ~GaussFilterAlgorithm();
78 
83  {
84  // create new arrays for mz / intensity data and set their size
87  mz_array->data.resize(spectrum->getMZArray()->data.size());
88  intensity_array->data.resize(spectrum->getMZArray()->data.size());
89 
90  // apply the filter
91  bool ret_val = filter(
92  spectrum->getMZArray()->data.begin(),
93  spectrum->getMZArray()->data.end(),
94  spectrum->getIntensityArray()->data.begin(),
95  mz_array->data.begin(), intensity_array->data.begin()
96  );
97  // set the data of the spectrum to the new mz / int arrays
98  spectrum->setMZArray(mz_array);
99  spectrum->setIntensityArray(intensity_array);
100  return ret_val;
101  }
102 
107  {
108  // create new arrays for rt / intensity data and set their size
111  rt_array->data.resize(chromatogram->getTimeArray()->data.size());
112  intensity_array->data.resize(chromatogram->getTimeArray()->data.size());
113 
114  // apply the filter
115  bool ret_val = filter(
116  chromatogram->getTimeArray()->data.begin(),
117  chromatogram->getTimeArray()->data.end(),
118  chromatogram->getIntensityArray()->data.begin(),
119  rt_array->data.begin(), intensity_array->data.begin()
120  );
121  // set the data of the chromatogram to the new rt / int arrays
122  chromatogram->setTimeArray(rt_array);
123  chromatogram->setIntensityArray(intensity_array);
124  return ret_val;
125  }
126 
132  template <typename ConstIterT, typename IterT>
133  bool filter(
134  ConstIterT mz_in_start,
135  ConstIterT mz_in_end,
136  ConstIterT int_in_start,
137  IterT mz_out,
138  IterT int_out)
139  {
140  bool found_signal = false;
141 
142  ConstIterT mz_it = mz_in_start;
143  ConstIterT int_it = int_in_start;
144  for (; mz_it != mz_in_end; mz_it++, int_it++)
145  {
146  // if ppm tolerance is used, calculate a reasonable width value for this m/z
147  if (use_ppm_tolerance_)
148  {
149  initialize((*mz_it) * ppm_tolerance_ * 10e-6, spacing_, ppm_tolerance_, use_ppm_tolerance_ );
150  }
151 
152  double new_int = integrate_(mz_it, int_it, mz_in_start, mz_in_end);
153 
154  // store new intensity and m/z into output iterator
155  *mz_out = *mz_it;
156  *int_out = new_int;
157  ++mz_out;
158  ++int_out;
159 
160  if (fabs(new_int) > 0) found_signal = true;
161  }
162  return found_signal;
163  }
164 
165  void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance);
166 
167 protected:
168 
170  std::vector<double> coeffs_;
172  double sigma_;
174  double spacing_;
175 
176  // tolerance in ppm
179 
181  template <typename InputPeakIterator>
182  double integrate_(InputPeakIterator x /* mz */, InputPeakIterator y /* int */, InputPeakIterator first, InputPeakIterator last)
183  {
184  double v = 0.;
185  // norm the gaussian kernel area to one
186  double norm = 0.;
187  Size middle = coeffs_.size();
188 
189  double start_pos = (( (*x) - (middle * spacing_)) > (*first)) ? ((*x) - (middle * spacing_)) : (*first);
190  double end_pos = (( (*x) + (middle * spacing_)) < (*(last - 1))) ? ((*x) + (middle * spacing_)) : (*(last - 1));
191 
192  InputPeakIterator help_x = x;
193  InputPeakIterator help_y = y;
194 #ifdef DEBUG_FILTERING
195 
196  std::cout << "integrate from middle to start_pos " << *help_x << " until " << start_pos << std::endl;
197 #endif
198 
199  //integrate from middle to start_pos
200  while ((help_x != first) && (*(help_x - 1) > start_pos))
201  {
202  // search for the corresponding datapoint of help in the gaussian (take the left most adjacent point)
203  double distance_in_gaussian = fabs(*x - *help_x);
204  Size left_position = (Size)floor(distance_in_gaussian / spacing_);
205 
206  // search for the true left adjacent data point (because of rounding errors)
207  for (int j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
208  {
209  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
210  {
211  left_position -= j;
212  break;
213  }
214 
215  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
216  {
217  left_position += j;
218  break;
219  }
220  }
221 
222  // interpolate between the left and right data points in the gaussian to get the true value at position distance_in_gaussian
223  Size right_position = left_position + 1;
224  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
225  // check if the right data point in the gaussian exists
226  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
227  : coeffs_[left_position];
228 #ifdef DEBUG_FILTERING
229 
230  std::cout << "distance_in_gaussian " << distance_in_gaussian << std::endl;
231  std::cout << " right_position " << right_position << std::endl;
232  std::cout << " left_position " << left_position << std::endl;
233  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
234  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
235  std::cout << "interpolated value left " << coeffs_right << std::endl;
236 #endif
237 
238 
239  // search for the corresponding datapoint for (help-1) in the gaussian (take the left most adjacent point)
240  distance_in_gaussian = fabs((*x) - (*(help_x - 1)));
241  left_position = (Size)floor(distance_in_gaussian / spacing_);
242 
243  // search for the true left adjacent data point (because of rounding errors)
244  for (UInt j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
245  {
246  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
247  {
248  left_position -= j;
249  break;
250  }
251 
252  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
253  {
254  left_position += j;
255  break;
256  }
257  }
258 
259  // start the interpolation for the true value in the gaussian
260  right_position = left_position + 1;
261  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
262  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
263  : coeffs_[left_position];
264 #ifdef DEBUG_FILTERING
265 
266  std::cout << " help_x-1 " << *(help_x - 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
267  std::cout << " right_position " << right_position << std::endl;
268  std::cout << " left_position " << left_position << std::endl;
269  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
270  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
271  std::cout << "interpolated value right " << coeffs_left << std::endl;
272 
273  std::cout << " intensity " << fabs(*(help_x - 1) - (*help_x)) / 2. << " * " << *(help_y - 1) << " * " << coeffs_left << " + " << *help_y << "* " << coeffs_right
274  << std::endl;
275 #endif
276 
277 
278  norm += fabs((*(help_x - 1)) - (*help_x)) / 2. * (coeffs_left + coeffs_right);
279 
280  v += fabs((*(help_x - 1)) - (*help_x)) / 2. * (*(help_y - 1) * coeffs_left + (*help_y) * coeffs_right);
281  --help_x;
282  --help_y;
283  }
284 
285 
286  //integrate from middle to end_pos
287  help_x = x;
288  help_y = y;
289 #ifdef DEBUG_FILTERING
290 
291  std::cout << "integrate from middle to endpos " << *help_x << " until " << end_pos << std::endl;
292 #endif
293 
294  while ((help_x != (last - 1)) && (*(help_x + 1) < end_pos))
295  {
296  // search for the corresponding datapoint for help in the gaussian (take the left most adjacent point)
297  double distance_in_gaussian = fabs((*x) - (*help_x));
298  int left_position = (UInt)floor(distance_in_gaussian / spacing_);
299 
300  // search for the true left adjacent data point (because of rounding errors)
301  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
302  {
303  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
304  {
305  left_position -= j;
306  break;
307  }
308 
309  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
310  {
311  left_position += j;
312  break;
313  }
314  }
315  // start the interpolation for the true value in the gaussian
316  Size right_position = left_position + 1;
317  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
318  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
319  : coeffs_[left_position];
320 
321 #ifdef DEBUG_FILTERING
322 
323  std::cout << " help " << *help_x << " distance_in_gaussian " << distance_in_gaussian << std::endl;
324  std::cout << " left_position " << left_position << std::endl;
325  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
326  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
327  std::cout << "interpolated value left " << coeffs_left << std::endl;
328 #endif
329 
330  // search for the corresponding datapoint for (help+1) in the gaussian (take the left most adjacent point)
331  distance_in_gaussian = fabs((*x) - (*(help_x + 1)));
332  left_position = (UInt)floor(distance_in_gaussian / spacing_);
333 
334  // search for the true left adjacent data point (because of rounding errors)
335  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
336  {
337  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
338  {
339  left_position -= j;
340  break;
341  }
342 
343  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
344  {
345  left_position += j;
346  break;
347  }
348  }
349 
350  // start the interpolation for the true value in the gaussian
351  right_position = left_position + 1;
352  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
353  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
354  : coeffs_[left_position];
355 #ifdef DEBUG_FILTERING
356 
357  std::cout << " (help + 1) " << *(help_x + 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
358  std::cout << " left_position " << left_position << std::endl;
359  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
360  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
361  std::cout << "interpolated value right " << coeffs_right << std::endl;
362 
363  std::cout << " intensity " << fabs(*help_x - *(help_x + 1)) / 2.
364  << " * " << *help_y << " * " << coeffs_left << " + " << *(help_y + 1)
365  << "* " << coeffs_right
366  << std::endl;
367 #endif
368  norm += fabs((*help_x) - (*(help_x + 1)) ) / 2. * (coeffs_left + coeffs_right);
369 
370  v += fabs((*help_x) - (*(help_x + 1)) ) / 2. * ((*help_y) * coeffs_left + (*(help_y + 1)) * coeffs_right);
371  ++help_x;
372  ++help_y;
373  }
374 
375  if (v > 0)
376  {
377  return v / norm;
378  }
379  else
380  {
381  return 0;
382  }
383  }
384 
385  };
386 
387 } // namespace OpenMS
OpenMS::Interfaces::SpectrumPtr
boost::shared_ptr< Spectrum > SpectrumPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:236
DataStructures.h
Types.h
OpenSwath::norm
double norm(T beg, T end)
compute the norm of the vector
Definition: StatsHelpers.h:57
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::GaussFilterAlgorithm::use_ppm_tolerance_
bool use_ppm_tolerance_
Definition: GaussFilterAlgorithm.h:177
Constants.h
OpenMS::Interfaces::BinaryDataArrayPtr
boost::shared_ptr< BinaryDataArray > BinaryDataArrayPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:80
OpenMS::Interfaces::ChromatogramPtr
boost::shared_ptr< Chromatogram > ChromatogramPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:156
OpenMS::GaussFilterAlgorithm::filter
bool filter(OpenMS::Interfaces::ChromatogramPtr chromatogram)
Smoothes an Chromatogram containing profile data.
Definition: GaussFilterAlgorithm.h:106
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::GaussFilterAlgorithm::sigma_
double sigma_
The standard derivation .
Definition: GaussFilterAlgorithm.h:172
OpenMS::GaussFilterAlgorithm::filter
bool filter(ConstIterT mz_in_start, ConstIterT mz_in_end, ConstIterT int_in_start, IterT mz_out, IterT int_out)
Smoothes an two data arrays containing data.
Definition: GaussFilterAlgorithm.h:133
OpenMS::GaussFilterAlgorithm::coeffs_
std::vector< double > coeffs_
Coefficients.
Definition: GaussFilterAlgorithm.h:170
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ISpectrumAccess.h
OpenMS::GaussFilterAlgorithm::ppm_tolerance_
double ppm_tolerance_
Definition: GaussFilterAlgorithm.h:178
OpenMS::GaussFilterAlgorithm
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilterAlgorithm.h:70
OpenMS::Interfaces::BinaryDataArray
The datastructures used by the OpenSwath interfaces.
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:72
OpenMS::GaussFilterAlgorithm::filter
bool filter(OpenMS::Interfaces::SpectrumPtr spectrum)
Smoothes an Spectrum containing profile data.
Definition: GaussFilterAlgorithm.h:82
OpenMS::GaussFilterAlgorithm::integrate_
double integrate_(InputPeakIterator x, InputPeakIterator y, InputPeakIterator first, InputPeakIterator last)
Computes the convolution of the raw data at position x and the gaussian kernel.
Definition: GaussFilterAlgorithm.h:182
OpenMS::GaussFilterAlgorithm::spacing_
double spacing_
The spacing of the pre-tabulated kernel coefficients.
Definition: GaussFilterAlgorithm.h:174