OpenMS
ConfidenceScoring.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hendrik Weisser $
6 // $Authors: Hannes Roest, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <cmath> // for "exp"
12 #include <limits> // for "infinity"
13 #include <map>
14 
20 
22 
23 namespace OpenMS
24 {
25 
26  class OPENMS_DLLAPI ConfidenceScoring :
27  public ProgressLogger
28  {
29  public:
30 
32  explicit ConfidenceScoring(bool test_mode_ = false);
33 
34  ~ConfidenceScoring() override {}
35 
36  protected:
37 
39  struct GLM_
40  {
41  double intercept;
42  double rt_coef;
43  double int_coef;
44 
45  double operator()(double diff_rt, double dist_int) const
46  {
47  double lm = intercept + rt_coef * diff_rt * diff_rt +
48  int_coef * dist_int;
49  return 1.0 / (1.0 + exp(-lm));
50  }
51  } glm_;
52 
54  struct RTNorm_
55  {
56  double min_rt;
57  double max_rt;
58 
59  double operator()(double rt) const
60  {
61  return (rt - min_rt) / (max_rt - min_rt) * 100;
62  }
63  } rt_norm_;
64 
66 
68 
70 
71  std::map<String, IntList> transition_map_;
72 
74 
77 
79 
81  void chooseDecoys_();
82 
85 
88 
93  double feature_rt, DoubleList& feature_intensities,
94  const std::set<String>& transition_ids = std::set<String>());
95 
97  void scoreFeature_(Feature& feature);
98 
99  public:
100 
101  void initialize(const TargetedExperiment& library, const Size n_decoys, const Size n_transitions, const TransformationDescription& rt_trafo)
102  {
103  library_ = library;
104  n_decoys_ = n_decoys;
105  n_transitions_ = n_transitions;
106  rt_trafo_ = rt_trafo;
107  }
108 
109  void initializeGlm(double intercept, double rt_coef, double int_coef)
110  {
111  glm_.intercept = intercept;
112  glm_.rt_coef = rt_coef;
113  glm_.int_coef = int_coef;
114  }
115 
128  void scoreMap(FeatureMap & features)
129  {
130  // are there enough assays in the library?
131  Size n_assays = library_.getPeptides().size();
132  if (n_assays < 2)
133  {
134  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
135  "There need to be at least 2 assays in the library for ConfidenceScoring.");
136 
137  }
138  if (n_assays - 1 < n_decoys_)
139  {
140  OPENMS_LOG_WARN << "Warning: Parameter 'decoys' (" << n_decoys_
141  << ") is higher than the number of unrelated assays in the "
142  << "library (" << n_assays - 1 << "). "
143  << "Using all unrelated assays as decoys." << std::endl;
144  }
145  if (n_assays - 1 <= n_decoys_) n_decoys_ = 0; // use all available assays
146 
147  decoy_index_.resize(n_assays);
148  for (Size i = 0; i < n_assays; ++i) decoy_index_[i] = boost::numeric_cast<Int>(i);
149 
150  // build mapping between assays and transitions:
151  OPENMS_LOG_DEBUG << "Building transition map..." << std::endl;
152  for (Size i = 0; i < library_.getTransitions().size(); ++i)
153  {
154  const String& ref = library_.getTransitions()[i].getPeptideRef();
155  transition_map_[ref].push_back(boost::numeric_cast<Int>(i));
156  }
157  // find min./max. RT in the library:
158  OPENMS_LOG_DEBUG << "Determining retention time range..." << std::endl;
159  rt_norm_.min_rt = std::numeric_limits<double>::infinity();
160  rt_norm_.max_rt = -std::numeric_limits<double>::infinity();
161  for (std::vector<TargetedExperiment::Peptide>::const_iterator it =
162  library_.getPeptides().begin(); it != library_.getPeptides().end();
163  ++it)
164  {
165  double current_rt = getAssayRT_(*it);
166  if (current_rt == -1.0) continue; // indicates a missing value
167  rt_norm_.min_rt = std::min(rt_norm_.min_rt, current_rt);
168  rt_norm_.max_rt = std::max(rt_norm_.max_rt, current_rt);
169  }
170 
171  // log scoring progress:
172  OPENMS_LOG_DEBUG << "Scoring features..." << std::endl;
173  startProgress(0, features.size(), "scoring features");
174 
175  for (FeatureMap::Iterator feat_it = features.begin();
176  feat_it != features.end(); ++feat_it)
177  {
178  OPENMS_LOG_DEBUG << "Feature " << feat_it - features.begin() + 1
179  << " (ID '" << feat_it->getUniqueId() << "')"<< std::endl;
180  scoreFeature_(*feat_it);
181  setProgress(feat_it - features.begin());
182  }
183  endProgress();
184 
185  }
186 
187  };
188 
189 }
190 
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:454
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
Definition: ConfidenceScoring.h:28
double scoreAssay_(const TargetedExperiment::Peptide &assay, double feature_rt, DoubleList &feature_intensities, const std::set< String > &transition_ids=std::set< String >())
void scoreMap(FeatureMap &features)
Score a feature map -> make sure the class is properly initialized.
Definition: ConfidenceScoring.h:128
void chooseDecoys_()
Randomize the list of decoy indexes.
std::map< String, IntList > transition_map_
assay (ID) -> transitions (indexes)
Definition: ConfidenceScoring.h:71
TargetedExperiment library_
assay library
Definition: ConfidenceScoring.h:65
Math::RandomShuffler shuffler_
random shuffler for container
Definition: ConfidenceScoring.h:78
IntList decoy_index_
indexes of assays to use as decoys
Definition: ConfidenceScoring.h:67
Size n_decoys_
number of decoys to use (per feature/true assay)
Definition: ConfidenceScoring.h:69
double getAssayRT_(const TargetedExperiment::Peptide &assay)
Get the retention time of an assay.
~ConfidenceScoring() override
Definition: ConfidenceScoring.h:34
TransformationDescription rt_trafo_
RT transformation to map measured RTs to assay RTs.
Definition: ConfidenceScoring.h:76
double manhattanDist_(DoubleList x, DoubleList y)
Manhattan distance.
ConfidenceScoring(bool test_mode_=false)
Constructor.
void scoreFeature_(Feature &feature)
Score a feature.
void initializeGlm(double intercept, double rt_coef, double int_coef)
Definition: ConfidenceScoring.h:109
Size n_transitions_
number of transitions to consider
Definition: ConfidenceScoring.h:73
void initialize(const TargetedExperiment &library, const Size n_decoys, const Size n_transitions, const TransformationDescription &rt_trafo)
Definition: ConfidenceScoring.h:101
A method or algorithm argument contains illegal values.
Definition: Exception.h:624
size_t size() const noexcept
Definition: ExposedVector.h:121
iterator begin() noexcept
Definition: ExposedVector.h:97
iterator end() noexcept
Definition: ExposedVector.h:101
A container for features.
Definition: FeatureMap.h:80
iterator Iterator
Definition: FeatureMap.h:87
An LC-MS feature.
Definition: Feature.h:46
Definition: MathFunctions.h:382
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:334
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:39
const std::vector< Peptide > & getPeptides() const
const std::vector< ReactionMonitoringTransition > & getTransitions() const
returns the transition list
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:37
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:29
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:36
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
Binomial GLM.
Definition: ConfidenceScoring.h:40
double rt_coef
Definition: ConfidenceScoring.h:42
double int_coef
Definition: ConfidenceScoring.h:43
double operator()(double diff_rt, double dist_int) const
Definition: ConfidenceScoring.h:45
double intercept
Definition: ConfidenceScoring.h:41
Helper for RT normalization (range 0-100)
Definition: ConfidenceScoring.h:55
double min_rt
Definition: ConfidenceScoring.h:56
double max_rt
Definition: ConfidenceScoring.h:57
double operator()(double rt) const
Definition: ConfidenceScoring.h:59