Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
ConfidenceScoring.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hannes Roest, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_OPENSWATH_CONFIDENCESCORING_H
36 #define OPENMS_ANALYSIS_OPENSWATH_CONFIDENCESCORING_H
37 
38 #include <cmath> // for "exp"
39 #include <ctime> // for "time" (random number seed)
40 #include <limits> // for "infinity"
41 #include <boost/bimap.hpp>
42 #include <boost/bimap/multiset_of.hpp>
43 #include <boost/random/uniform_int.hpp>
44 #include <boost/random/mersenne_twister.hpp>
45 #include <boost/random/variate_generator.hpp>
46 
51 
55 
56 namespace OpenMS
57 {
58 
59 class OPENMS_DLLAPI ConfidenceScoring :
60  public ProgressLogger
61 {
62 public:
63 
65  explicit ConfidenceScoring(bool test_mode_=false) :
66  generator_(), rand_gen_(generator_, boost::uniform_int<>())
67  {
68  if (!test_mode_) rand_gen_.engine().seed(time(0)); // seed with current time
69  }
70 
71  virtual ~ConfidenceScoring() {}
72 
73 protected:
74 
76  typedef boost::bimap<double, boost::bimaps::multiset_of<double> >
78 
80  struct
81  {
82  double intercept;
83  double rt_coef;
84  double int_coef;
85 
86  double operator()(double diff_rt, double dist_int)
87  {
88  double lm = intercept + rt_coef * diff_rt * diff_rt +
89  int_coef * dist_int;
90  return 1.0 / (1.0 + exp(-lm));
91  }
92  } glm_;
93 
95  struct
96  {
97  double min_rt;
98  double max_rt;
99 
100  double operator()(double rt)
101  {
102  return (rt - min_rt) / (max_rt - min_rt) * 100;
103  }
104  } rt_norm_;
105 
106  TargetedExperiment library_; // assay library
107 
108  IntList decoy_index_; // indexes of assays to use as decoys
109 
110  Size n_decoys_; // number of decoys to use (per feature/true assay)
111 
112  Map<String, IntList> transition_map_; // assay (ID) -> transitions (indexes)
113 
114  Size n_transitions_; // number of transitions to consider
115 
118 
119  boost::mt19937 generator_; // random number generation engine
120 
122  boost::variate_generator<boost::mt19937&, boost::uniform_int<> > rand_gen_;
123 
125  void chooseDecoys_();
126 
128  double manhattanDist_(DoubleList x, DoubleList y);
129 
131  double getAssayRT_(const TargetedExperiment::Peptide& assay,
132  const String& cv_accession = "MS:1000896");
133 
136  void extractIntensities_(BimapType& intensity_map, Size n_transitions,
137  DoubleList& intensities);
138 
142  double scoreAssay_(const TargetedExperiment::Peptide& assay,
143  double feature_rt, DoubleList& feature_intensities,
144  const std::set<String>& transition_ids = std::set<String>());
145 
147  void scoreFeature_(Feature& feature);
148 
149 public:
150 
151  void initialize(TargetedExperiment library, Size n_decoys, Size n_transitions, TransformationDescription rt_trafo)
152  {
153  library_ = TargetedExperiment(library);
154  n_decoys_ = n_decoys;
155  n_transitions_ = n_transitions;
156  rt_trafo_ = rt_trafo;
157  }
158 
159  void initializeGlm(double intercept, double rt_coef, double int_coef)
160  {
161  glm_.intercept = intercept;
162  glm_.rt_coef = rt_coef;
163  glm_.int_coef = int_coef;
164  }
165 
178  void scoreMap(FeatureMap & features)
179  {
180  // are there enough assays in the library?
181  Size n_assays = library_.getPeptides().size();
182  if (n_assays < 2)
183  {
184  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
185  "There need to be at least 2 assays in the library for ConfidenceScoring.");
186 
187  }
188  if (n_assays - 1 < n_decoys_)
189  {
190  LOG_WARN << "Warning: Parameter 'decoys' (" << n_decoys_
191  << ") is higher than the number of unrelated assays in the "
192  << "library (" << n_assays - 1 << "). "
193  << "Using all unrelated assays as decoys." << std::endl;
194  }
195  if (n_assays - 1 <= n_decoys_) n_decoys_ = 0; // use all available assays
196 
197  decoy_index_.resize(n_assays);
198  for (Size i = 0; i < n_assays; ++i) decoy_index_[i] = boost::numeric_cast<Int>(i);
199 
200  // build mapping between assays and transitions:
201  LOG_DEBUG << "Building transition map..." << std::endl;
202  for (Size i = 0; i < library_.getTransitions().size(); ++i)
203  {
204  const String& ref = library_.getTransitions()[i].getPeptideRef();
205  transition_map_[ref].push_back(boost::numeric_cast<Int>(i));
206  }
207  // find min./max. RT in the library:
208  LOG_DEBUG << "Determining retention time range..." << std::endl;
209  rt_norm_.min_rt = std::numeric_limits<double>::infinity();
210  rt_norm_.max_rt = -std::numeric_limits<double>::infinity();
211  for (std::vector<TargetedExperiment::Peptide>::const_iterator it =
212  library_.getPeptides().begin(); it != library_.getPeptides().end();
213  ++it)
214  {
215  double current_rt = getAssayRT_(*it);
216  if (current_rt == -1.0) continue; // indicates a missing value
217  rt_norm_.min_rt = std::min(rt_norm_.min_rt, current_rt);
218  rt_norm_.max_rt = std::max(rt_norm_.max_rt, current_rt);
219  }
220 
221  // log scoring progress:
222  LOG_DEBUG << "Scoring features..." << std::endl;
223  startProgress(0, features.size(), "scoring features");
224 
225  for (FeatureMap::Iterator feat_it = features.begin();
226  feat_it != features.end(); ++feat_it)
227  {
228  LOG_DEBUG << "Feature " << feat_it - features.begin() + 1
229  << " (ID '" << feat_it->getUniqueId() << "')"<< std::endl;
230  scoreFeature_(*feat_it);
231  setProgress(feat_it - features.begin());
232  }
233  endProgress();
234 
235  }
236 
237 };
238 
239 }
240 
241 #endif // OPENMS_ANALYSIS_OPENSWATH_CONFIDENCESCORING
double min_rt
Definition: ConfidenceScoring.h:97
A more convenient string class.
Definition: String.h:57
void initializeGlm(double intercept, double rt_coef, double int_coef)
Definition: ConfidenceScoring.h:159
virtual ~ConfidenceScoring()
Definition: ConfidenceScoring.h:71
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:66
A container for features.
Definition: FeatureMap.h:94
void scoreMap(FeatureMap &features)
Score a feature map -> make sure the class is properly initialized.
Definition: ConfidenceScoring.h:178
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:59
Map< String, IntList > transition_map_
Definition: ConfidenceScoring.h:112
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
TargetedExperiment library_
Definition: ConfidenceScoring.h:106
TransformationDescription rt_trafo_
RT transformation to map measured RTs to assay RTs.
Definition: ConfidenceScoring.h:117
boost::mt19937 generator_
Definition: ConfidenceScoring.h:119
double int_coef
Definition: ConfidenceScoring.h:84
A method or algorithm argument contains illegal values.
Definition: Exception.h:649
boost::bimap< double, boost::bimaps::multiset_of< double > > BimapType
Mapping: Q3 m/z <-> transition intensity (maybe not unique!)
Definition: ConfidenceScoring.h:77
ConfidenceScoring(bool test_mode_=false)
Constructor.
Definition: ConfidenceScoring.h:65
const std::vector< ReactionMonitoringTransition > & getTransitions() const
returns the transition list
Base::iterator Iterator
Definition: FeatureMap.h:138
double max_rt
Definition: ConfidenceScoring.h:98
IntList decoy_index_
Definition: ConfidenceScoring.h:108
An LC-MS feature.
Definition: Feature.h:70
void initialize(TargetedExperiment library, Size n_decoys, Size n_transitions, TransformationDescription rt_trafo)
Definition: ConfidenceScoring.h:151
Definition: ConfidenceScoring.h:59
double intercept
Definition: ConfidenceScoring.h:82
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
double rt_coef
Definition: ConfidenceScoring.h:83
Size n_decoys_
Definition: ConfidenceScoring.h:110
Size n_transitions_
Definition: ConfidenceScoring.h:114
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:62
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:61
const std::vector< Peptide > & getPeptides() const
UInt64 getUniqueId() const
Non-mutable access to unique id - returns the unique id.
Definition: UniqueIdInterface.h:110
boost::variate_generator< boost::mt19937 &, boost::uniform_int<> > rand_gen_
Random number generator (must be initialized in init. list of c&#39;tor!)
Definition: ConfidenceScoring.h:122
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:51
Definition: TargetedExperimentHelper.h:266

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:21:59 using doxygen 1.8.13