OpenMS
EmgScoring.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hannes Roest $
6 // $Authors: Hannes Roest $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 
18 
20 
21 #include <vector>
22 #include <cmath> // for isnan
23 
24 namespace OpenMS
25 {
26 
34  class EmgScoring
35  {
36 
37  public :
38 
39  EmgScoring() = default;
40 
41  ~EmgScoring() = default;
42 
45  void setFitterParam(const Param& param)
46  {
47  fitter_emg1D_params_ = param;
48  }
49 
52  {
53  return EmgFitter1D().getDefaults();
54  }
55 
57  template<typename SpectrumType, class TransitionT>
58  double calcElutionFitScore(MRMFeature & mrmfeature, MRMTransitionGroup<SpectrumType, TransitionT> & transition_group) const
59  {
60  double avg_score = 0;
61  bool smooth_data = false;
62 
63  for (Size k = 0; k < transition_group.size(); k++)
64  {
65  // get the id, then find the corresponding transition and features within this peakgroup
66  String native_id = transition_group.getChromatograms()[k].getNativeID();
67  Feature f = mrmfeature.getFeature(native_id);
68  OPENMS_PRECONDITION(f.getConvexHulls().size() == 1, "Convex hulls need to have exactly one hull point structure");
69 
70  //TODO think about penalizing aborted fits even more. Currently -1 is just the "lowest" pearson correlation to
71  // a fit that you can have.
72  double fscore = elutionModelFit(f.getConvexHulls()[0].getHullPoints(), smooth_data);
73  avg_score += fscore;
74  }
75 
76  avg_score /= transition_group.size();
77  return avg_score;
78  }
79 
80  // Fxn from FeatureFinderAlgorithmMRM
81  // TODO: check whether we can leave out some of the steps here, e.g. gaussian smoothing
82  double elutionModelFit(const ConvexHull2D::PointArrayType& current_section, bool smooth_data) const
83  {
84  // We need at least 2 datapoints in order to create a fit
85  if (current_section.size() < 2)
86  {
87  return -1;
88  }
89 
90  // local PeakType is a small hack since here we *need* data of type
91  // Peak1D, otherwise our fitter will not accept it.
92  typedef Peak1D LocalPeakType;
93 
94  // -- cut line 301 of FeatureFinderAlgorithmMRM
95  std::vector<LocalPeakType> data_to_fit;
96  prepareFit_(current_section, data_to_fit, smooth_data);
97  std::unique_ptr<InterpolationModel> model_rt;
98  double quality = fitRT_(data_to_fit, model_rt);
99  // cut line 354 of FeatureFinderAlgorithmMRM
100 
101  return quality;
102  }
103 
104  protected:
105  template<class LocalPeakType>
106  double fitRT_(std::vector<LocalPeakType>& rt_input_data, std::unique_ptr<InterpolationModel>& model) const
107  {
108  EmgFitter1D fitter_emg1D;
109  fitter_emg1D.setParameters(fitter_emg1D_params_);
110  // Construct model for rt
111  // NaN is checked in fit1d: if (std::isnan(quality)) quality = -1.0;
112  return fitter_emg1D.fit1d(rt_input_data, model);
113  }
114 
115  // Fxn from FeatureFinderAlgorithmMRM
116  // TODO: check whether we can leave out some of the steps here, e.g. gaussian smoothing
117  template<class LocalPeakType>
118  void prepareFit_(const ConvexHull2D::PointArrayType & current_section, std::vector<LocalPeakType> & data_to_fit, bool smooth_data) const
119  {
120  // typedef Peak1D LocalPeakType;
121  PeakSpectrum filter_spec;
122  // first smooth the data to prevent outliers from destroying the fit
123  for (const auto& pa : current_section)
124  {
125  LocalPeakType p;
126  using IntensityType = typename LocalPeakType::IntensityType;
127  p.setMZ(pa.getX());
128  p.setIntensity(IntensityType(pa.getY()));
129  filter_spec.push_back(p);
130  }
131 
132  // add two peaks at the beginning and at the end for better fit
133  // therefore calculate average distance first
134  std::vector<double> distances;
135  for (Size j = 1; j < filter_spec.size(); ++j)
136  {
137  distances.push_back(filter_spec[j].getMZ() - filter_spec[j - 1].getMZ());
138  }
139  double dist_average = std::accumulate(distances.begin(), distances.end(), 0.0) / (double) distances.size();
140 
141  // append peaks
142  Peak1D new_peak;
143  new_peak.setIntensity(0);
144  new_peak.setMZ(filter_spec.back().getMZ() + dist_average);
145  filter_spec.push_back(new_peak);
146  new_peak.setMZ(filter_spec.back().getMZ() + dist_average);
147  filter_spec.push_back(new_peak);
148  new_peak.setMZ(filter_spec.back().getMZ() + dist_average);
149  filter_spec.push_back(new_peak);
150 
151  // prepend peaks
152  new_peak.setMZ(filter_spec.front().getMZ() - dist_average);
153  filter_spec.insert(filter_spec.begin(), new_peak);
154  new_peak.setMZ(filter_spec.front().getMZ() - dist_average);
155  filter_spec.insert(filter_spec.begin(), new_peak);
156  new_peak.setMZ(filter_spec.front().getMZ() - dist_average);
157  filter_spec.insert(filter_spec.begin(), new_peak);
158 
159  // To get an estimate of the peak quality, we probably should not smooth
160  // and/or transform the data.
161  if (smooth_data)
162  {
163  GaussFilter filter;
164  Param filter_param(filter.getParameters());
165  filter.setParameters(filter_param);
166  filter_param.setValue("gaussian_width", 4 * dist_average);
167  filter.setParameters(filter_param);
168  filter.filter(filter_spec);
169  }
170 
171  // transform the data for fitting and fit RT profile
172  for (Size j = 0; j != filter_spec.size(); ++j)
173  {
174  LocalPeakType p;
175  p.setPosition(filter_spec[j].getMZ());
176  p.setIntensity(filter_spec[j].getIntensity());
177  data_to_fit.push_back(p);
178  }
179  }
180 
182  };
183 
184 }
185 
std::vector< PointType > PointArrayType
Definition: ConvexHull2D.h:50
const Param & getParameters() const
Non-mutable access to the parameters.
void setParameters(const Param &param)
Sets the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
Exponentially modified gaussian distribution fitter (1-dim.) using Levenberg-Marquardt algorithm (Eig...
Definition: EmgFitter1D.h:23
QualityType fit1d(const RawDataArrayType &range, std::unique_ptr< InterpolationModel > &model) override
return interpolation model
Scoring of an elution peak using an exponentially modified gaussian distribution model.
Definition: EmgScoring.h:35
EmgScoring()=default
void setFitterParam(const Param &param)
Definition: EmgScoring.h:45
double elutionModelFit(const ConvexHull2D::PointArrayType &current_section, bool smooth_data) const
Definition: EmgScoring.h:82
Param fitter_emg1D_params_
Definition: EmgScoring.h:181
Param getDefaults()
Get default params for the Emg1D fitting.
Definition: EmgScoring.h:51
double calcElutionFitScore(MRMFeature &mrmfeature, MRMTransitionGroup< SpectrumType, TransitionT > &transition_group) const
calculate the elution profile fit score
Definition: EmgScoring.h:58
double fitRT_(std::vector< LocalPeakType > &rt_input_data, std::unique_ptr< InterpolationModel > &model) const
Definition: EmgScoring.h:106
~EmgScoring()=default
void prepareFit_(const ConvexHull2D::PointArrayType &current_section, std::vector< LocalPeakType > &data_to_fit, bool smooth_data) const
Definition: EmgScoring.h:118
An LC-MS feature.
Definition: Feature.h:46
const std::vector< ConvexHull2D > & getConvexHulls() const
Non-mutable access to the convex hulls.
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilter.h:47
void filter(MSSpectrum &spectrum)
Smoothes an MSSpectrum containing profile data.
A multi-chromatogram MRM feature.
Definition: MRMFeature.h:26
Feature & getFeature(const String &key)
get a specified feature
The representation of a group of transitions in a targeted proteomics experiment.
Definition: MRMTransitionGroup.h:42
Size size() const
Definition: MRMTransitionGroup.h:99
std::vector< ChromatogramType > & getChromatograms()
Definition: MRMTransitionGroup.h:160
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
Management and storage of parameters / INI files.
Definition: Param.h:44
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:28
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:84
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:93
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:94
const double k
Definition: Constants.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22