OpenMS
EmgScoring.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
44 
46 
47 #include <vector>
48 #include <cmath> // for isnan
49 
50 namespace OpenMS
51 {
52 
60  class EmgScoring
61  {
62 
63  public :
64 
65  EmgScoring() = default;
66 
67  ~EmgScoring() = default;
68 
71  void setFitterParam(const Param& param)
72  {
73  fitter_emg1D_params_ = param;
74  }
75 
78  {
79  return EmgFitter1D().getDefaults();
80  }
81 
83  template<typename SpectrumType, class TransitionT>
84  double calcElutionFitScore(MRMFeature & mrmfeature, MRMTransitionGroup<SpectrumType, TransitionT> & transition_group) const
85  {
86  double avg_score = 0;
87  bool smooth_data = false;
88 
89  for (Size k = 0; k < transition_group.size(); k++)
90  {
91  // get the id, then find the corresponding transition and features within this peakgroup
92  String native_id = transition_group.getChromatograms()[k].getNativeID();
93  Feature f = mrmfeature.getFeature(native_id);
94  OPENMS_PRECONDITION(f.getConvexHulls().size() == 1, "Convex hulls need to have exactly one hull point structure");
95 
96  //TODO think about penalizing aborted fits even more. Currently -1 is just the "lowest" pearson correlation to
97  // a fit that you can have.
98  double fscore = elutionModelFit(f.getConvexHulls()[0].getHullPoints(), smooth_data);
99  avg_score += fscore;
100  }
101 
102  avg_score /= transition_group.size();
103  return avg_score;
104  }
105 
106  // Fxn from FeatureFinderAlgorithmMRM
107  // TODO: check whether we can leave out some of the steps here, e.g. gaussian smoothing
108  double elutionModelFit(const ConvexHull2D::PointArrayType& current_section, bool smooth_data) const
109  {
110  // We need at least 2 datapoints in order to create a fit
111  if (current_section.size() < 2)
112  {
113  return -1;
114  }
115 
116  // local PeakType is a small hack since here we *need* data of type
117  // Peak1D, otherwise our fitter will not accept it.
118  typedef Peak1D LocalPeakType;
119 
120  // -- cut line 301 of FeatureFinderAlgorithmMRM
121  std::vector<LocalPeakType> data_to_fit;
122  prepareFit_(current_section, data_to_fit, smooth_data);
123  std::unique_ptr<InterpolationModel> model_rt;
124  double quality = fitRT_(data_to_fit, model_rt);
125  // cut line 354 of FeatureFinderAlgorithmMRM
126 
127  return quality;
128  }
129 
130  protected:
131  template<class LocalPeakType>
132  double fitRT_(std::vector<LocalPeakType>& rt_input_data, std::unique_ptr<InterpolationModel>& model) const
133  {
134  EmgFitter1D fitter_emg1D;
135  fitter_emg1D.setParameters(fitter_emg1D_params_);
136  // Construct model for rt
137  // NaN is checked in fit1d: if (std::isnan(quality)) quality = -1.0;
138  return fitter_emg1D.fit1d(rt_input_data, model);
139  }
140 
141  // Fxn from FeatureFinderAlgorithmMRM
142  // TODO: check whether we can leave out some of the steps here, e.g. gaussian smoothing
143  template<class LocalPeakType>
144  void prepareFit_(const ConvexHull2D::PointArrayType & current_section, std::vector<LocalPeakType> & data_to_fit, bool smooth_data) const
145  {
146  // typedef Peak1D LocalPeakType;
147  PeakSpectrum filter_spec;
148  // first smooth the data to prevent outliers from destroying the fit
149  for (const auto& pa : current_section)
150  {
151  LocalPeakType p;
152  using IntensityType = typename LocalPeakType::IntensityType;
153  p.setMZ(pa.getX());
154  p.setIntensity(IntensityType(pa.getY()));
155  filter_spec.push_back(p);
156  }
157 
158  // add two peaks at the beginning and at the end for better fit
159  // therefore calculate average distance first
160  std::vector<double> distances;
161  for (Size j = 1; j < filter_spec.size(); ++j)
162  {
163  distances.push_back(filter_spec[j].getMZ() - filter_spec[j - 1].getMZ());
164  }
165  double dist_average = std::accumulate(distances.begin(), distances.end(), 0.0) / (double) distances.size();
166 
167  // append peaks
168  Peak1D new_peak;
169  new_peak.setIntensity(0);
170  new_peak.setMZ(filter_spec.back().getMZ() + dist_average);
171  filter_spec.push_back(new_peak);
172  new_peak.setMZ(filter_spec.back().getMZ() + dist_average);
173  filter_spec.push_back(new_peak);
174  new_peak.setMZ(filter_spec.back().getMZ() + dist_average);
175  filter_spec.push_back(new_peak);
176 
177  // prepend peaks
178  new_peak.setMZ(filter_spec.front().getMZ() - dist_average);
179  filter_spec.insert(filter_spec.begin(), new_peak);
180  new_peak.setMZ(filter_spec.front().getMZ() - dist_average);
181  filter_spec.insert(filter_spec.begin(), new_peak);
182  new_peak.setMZ(filter_spec.front().getMZ() - dist_average);
183  filter_spec.insert(filter_spec.begin(), new_peak);
184 
185  // To get an estimate of the peak quality, we probably should not smooth
186  // and/or transform the data.
187  if (smooth_data)
188  {
189  GaussFilter filter;
190  Param filter_param(filter.getParameters());
191  filter.setParameters(filter_param);
192  filter_param.setValue("gaussian_width", 4 * dist_average);
193  filter.setParameters(filter_param);
194  filter.filter(filter_spec);
195  }
196 
197  // transform the data for fitting and fit RT profile
198  for (Size j = 0; j != filter_spec.size(); ++j)
199  {
200  LocalPeakType p;
201  p.setPosition(filter_spec[j].getMZ());
202  p.setIntensity(filter_spec[j].getIntensity());
203  data_to_fit.push_back(p);
204  }
205  }
206 
208  };
209 
210 }
211 
std::vector< PointType > PointArrayType
Definition: ConvexHull2D.h:76
const Param & getParameters() const
Non-mutable access to the parameters.
void setParameters(const Param &param)
Sets the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
Exponentially modified gaussian distribution fitter (1-dim.) using Levenberg-Marquardt algorithm (Eig...
Definition: EmgFitter1D.h:49
QualityType fit1d(const RawDataArrayType &range, std::unique_ptr< InterpolationModel > &model) override
return interpolation model
Scoring of an elution peak using an exponentially modified gaussian distribution model.
Definition: EmgScoring.h:61
EmgScoring()=default
void setFitterParam(const Param &param)
Definition: EmgScoring.h:71
double elutionModelFit(const ConvexHull2D::PointArrayType &current_section, bool smooth_data) const
Definition: EmgScoring.h:108
Param fitter_emg1D_params_
Definition: EmgScoring.h:207
Param getDefaults()
Get default params for the Emg1D fitting.
Definition: EmgScoring.h:77
double calcElutionFitScore(MRMFeature &mrmfeature, MRMTransitionGroup< SpectrumType, TransitionT > &transition_group) const
calculate the elution profile fit score
Definition: EmgScoring.h:84
double fitRT_(std::vector< LocalPeakType > &rt_input_data, std::unique_ptr< InterpolationModel > &model) const
Definition: EmgScoring.h:132
~EmgScoring()=default
void prepareFit_(const ConvexHull2D::PointArrayType &current_section, std::vector< LocalPeakType > &data_to_fit, bool smooth_data) const
Definition: EmgScoring.h:144
An LC-MS feature.
Definition: Feature.h:72
const std::vector< ConvexHull2D > & getConvexHulls() const
Non-mutable access to the convex hulls.
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilter.h:73
void filter(MSSpectrum &spectrum)
Smoothes an MSSpectrum containing profile data.
A multi-chromatogram MRM feature.
Definition: MRMFeature.h:52
Feature & getFeature(const String &key)
get a specified feature
The representation of a group of transitions in a targeted proteomics experiment.
Definition: MRMTransitionGroup.h:68
Size size() const
Definition: MRMTransitionGroup.h:125
std::vector< ChromatogramType > & getChromatograms()
Definition: MRMTransitionGroup.h:186
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:120
const double k
Definition: Constants.h:158
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48