OpenMS
WindowMower.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Mathias Walzer $
6 // $Authors: Mathias Walzer, Timo Sachsenberg$
7 // --------------------------------------------------------------------------
8 //
9 #pragma once
10 
15 
16 #include <set>
17 
18 namespace OpenMS
19 {
20 
28  class OPENMS_DLLAPI WindowMower :
29  public DefaultParamHandler
30  {
31 public:
32 
33  // @name Constructors, destructors and assignment operators
34  // @{
38  ~WindowMower() override;
39 
41  WindowMower(const WindowMower& source);
44  // @}
45 
47  template <typename SpectrumType>
49  {
50  typedef typename SpectrumType::ConstIterator ConstIterator;
51 
52  windowsize_ = (double)param_.getValue("windowsize");
53  peakcount_ = (UInt)param_.getValue("peakcount");
54 
55  //copy spectrum
56  SpectrumType old_spectrum = spectrum;
57  old_spectrum.sortByPosition();
58 
59  //find high peak positions
60  bool end = false;
61  std::set<double> positions;
62  for (ConstIterator it = old_spectrum.begin(); it != old_spectrum.end(); ++it)
63  {
64  // copy the window from the spectrum
65  SpectrumType window;
66  for (ConstIterator it2 = it; (it2->getPosition() - it->getPosition() < windowsize_); )
67  {
68  window.push_back(*it2);
69  if (++it2 == old_spectrum.end())
70  {
71  end = true;
72  break;
73  }
74  }
75 
76  //extract peakcount most intense peaks
77  window.sortByIntensity(true);
78  for (Size i = 0; i < peakcount_; ++i)
79  {
80  if (i < window.size())
81  {
82  positions.insert(window[i].getMZ());
83  }
84  }
85  //abort at the end of the spectrum
86  if (end) break;
87  }
88 
89  // select peaks that were retained
90  std::vector<Size> indices;
91  for (ConstIterator it = spectrum.begin(); it != spectrum.end(); ++it)
92  {
93  if (positions.find(it->getMZ()) != positions.end())
94  {
95  Size index(it - spectrum.begin());
96  indices.push_back(index);
97  }
98  }
99  spectrum.select(indices);
100  }
101 
103 
104  void filterPeakMap(PeakMap& exp);
105 
106  // jumping window version (faster)
107  template <typename SpectrumType>
109  {
110  if (spectrum.empty())
111  {
112  return;
113  }
114 
115  spectrum.sortByPosition();
116 
117  windowsize_ = static_cast<double>(param_.getValue("windowsize"));
118  peakcount_ = static_cast<UInt>(param_.getValue("peakcount"));
119 
120  // copy meta data
121  SpectrumType out = spectrum;
122  out.clear(false);
123 
124  SpectrumType peaks_in_window;
125  double window_start = spectrum[0].getMZ();
126  for (Size i = 0; i != spectrum.size(); ++i)
127  {
128  if (spectrum[i].getMZ() - window_start < windowsize_) // collect peaks in window
129  {
130  peaks_in_window.push_back(spectrum[i]);
131  }
132  else // step over window boundaries
133  {
134  window_start = spectrum[i].getMZ(); // as there might be large gaps between peaks resulting in empty windows, set new window start to next peak
135 
136  // copy N highest peaks to out
137  if (peaks_in_window.size() > peakcount_)
138  {
139  std::partial_sort(peaks_in_window.begin(), peaks_in_window.begin() + peakcount_, peaks_in_window.end(), [](auto &left, auto &right) {typename SpectrumType::PeakType::IntensityLess cmp; return cmp(right, left);});
140  copy(peaks_in_window.begin(), peaks_in_window.begin() + peakcount_, back_inserter(out));
141  }
142  else
143  {
144  std::sort(peaks_in_window.begin(), peaks_in_window.end(), [](auto &left, auto &right) {typename SpectrumType::PeakType::IntensityLess cmp; return cmp(right, left);});
145  copy(peaks_in_window.begin(), peaks_in_window.end(), back_inserter(out));
146  }
147 
148  peaks_in_window.clear(false);
149  peaks_in_window.push_back(spectrum[i]);
150  }
151  }
152 
153  if (!peaks_in_window.empty()) // last window is not empty
154  {
155  // Note that the last window might be much smaller than windowsize.
156  // Therefore the number of peaks copied from this window should be adapted accordingly.
157  // Otherwise a lot of noise peaks are copied from each end of a spectrum.
158 
159  double last_window_size = peaks_in_window.back().getMZ() - window_start;
160  double last_window_size_fraction = last_window_size / windowsize_;
161  Size last_window_peakcount = static_cast<Size>(std::round(last_window_size_fraction * peakcount_));
162 
163  if (peaks_in_window.size() > last_window_peakcount)
164  { // sort for last_window_peakcount highest peaks
165  std::partial_sort(peaks_in_window.begin(), peaks_in_window.begin() + last_window_peakcount, peaks_in_window.end(),
166  [](auto &left, auto &right) {typename SpectrumType::PeakType::IntensityLess cmp; return cmp(right, left);});
167  std::copy(peaks_in_window.begin(), peaks_in_window.begin() + last_window_peakcount, back_inserter(out));
168  }
169  else
170  {
171  std::copy(peaks_in_window.begin(), peaks_in_window.end(), std::back_inserter(out));
172  }
173  }
174 
175  // select peaks that were retained
176  std::vector<Size> indices;
177  for (typename SpectrumType::ConstIterator it = spectrum.begin(); it != spectrum.end(); ++it)
178  {
179  if (std::find(out.begin(), out.end(), *it) != out.end())
180  {
181  Size index(it - spectrum.begin());
182  indices.push_back(index);
183  }
184  }
185  spectrum.select(indices);
186 
187  return;
188  }
189 
190  //TODO reimplement DefaultParamHandler::updateMembers_()
191 
192 private:
193  double windowsize_;
195  };
196 
197 }
198 
199 
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
ContainerType::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSSpectrum.h:110
MSSpectrum & select(const std::vector< Size > &indices)
void sortByPosition()
Lexicographically sorts the peaks by their position.
void sortByIntensity(bool reverse=false)
Lexicographically sorts the peaks by their intensity.
void clear(bool clear_meta_data)
Clears all data and meta data.
Retains the highest peaks in a sliding or jumping window.
Definition: WindowMower.h:30
void filterPeakSpectrumForTopNInJumpingWindow(SpectrumType &spectrum)
Definition: WindowMower.h:108
void filterPeakSpectrum(PeakSpectrum &spectrum)
WindowMower(const WindowMower &source)
copy constructor
void filterPeakMap(PeakMap &exp)
WindowMower()
default constructor
double windowsize_
Definition: WindowMower.h:193
void filterPeakSpectrumForTopNInSlidingWindow(SpectrumType &spectrum)
sliding window version (slower)
Definition: WindowMower.h:48
UInt peakcount_
Definition: WindowMower.h:194
~WindowMower() override
destructor
WindowMower & operator=(const WindowMower &source)
assignment operator
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
T round(T x)
Rounds the value.
Definition: MathFunctions.h:218
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19