OpenMS  2.6.0
PosteriorErrorProbabilityModel.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: David Wojnar $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
42 
43 #include <vector>
44 #include <map>
45 
46 namespace OpenMS
47 {
48  class String;
49  class TextFile;
50  class PeptideIdentification;
51  class ProteinIdentification;
52  class PeptideHit;
53  namespace Math
54  {
55 
56 
73  class OPENMS_DLLAPI PosteriorErrorProbabilityModel :
74  public DefaultParamHandler
75  {
76 public:
77 
80 
83 
95  static std::map<String, std::vector<std::vector<double>>> extractAndTransformScores(
96  const std::vector<ProteinIdentification> & protein_ids,
97  const std::vector<PeptideIdentification> & peptide_ids,
98  const bool split_charge,
99  const bool top_hits_only,
100  const bool target_decoy_available,
101  const double fdr_for_targets_smaller);
102 
116  static void updateScores(
117  const PosteriorErrorProbabilityModel & PEP_model,
118  const String & search_engine,
119  const Int charge,
120  const bool prob_correct,
121  const bool split_charge,
122  std::vector<ProteinIdentification> & protein_ids,
123  std::vector<PeptideIdentification> & peptide_ids,
124  bool & unable_to_fit_data,
125  bool & data_might_not_be_well_fit);
126 
135  bool fit(std::vector<double> & search_engine_scores, const String& outlier_handling);
136 
145  bool fitGumbelGauss(std::vector<double>& search_engine_scores, const String& outlier_handling);
146 
154  bool fit(std::vector<double> & search_engine_scores, std::vector<double> & probabilities, const String& outlier_handling);
155 
157  void fillDensities(const std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
159  void fillLogDensities(const std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
161  void fillLogDensitiesGumbel(const std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
163  double computeLogLikelihood(const std::vector<double> & incorrect_density, const std::vector<double> & correct_density);
164 
169  double computeLLAndIncorrectPosteriorsFromLogDensities(
170  const std::vector<double>& incorrect_log_density,
171  const std::vector<double>& correct_log_density,
172  std::vector<double>& incorrect_posterior);
173 
180  std::pair<double, double> pos_neg_mean_weighted_posteriors(const std::vector<double> &x_scores,
181  const std::vector<double> &incorrect_posteriors);
182 
189  std::pair<double, double> pos_neg_sigma_weighted_posteriors(const std::vector<double> &x_scores,
190  const std::vector<double> &incorrect_posteriors,
191  const std::pair<double, double>& means);
192 
195  {
196  return correctly_assigned_fit_param_;
197  }
198 
201  {
202  return incorrectly_assigned_fit_param_;
203  }
204 
207  {
208  return incorrectly_assigned_fit_gumbel_param_;
209  }
210 
212  double getNegativePrior() const
213  {
214  return negative_prior_;
215  }
216 
218  static double getGumbel_(double x, const GaussFitter::GaussFitResult & params)
219  {
220  double z = exp((params.x0 - x) / params.sigma);
221  return (z * exp(-1 * z)) / params.sigma;
222  }
223 
228  double computeProbability(double score) const;
229 
231  TextFile initPlots(std::vector<double> & x_scores);
232 
234  const String getGumbelGnuplotFormula(const GaussFitter::GaussFitResult & params) const;
235 
237  const String getGaussGnuplotFormula(const GaussFitter::GaussFitResult & params) const;
238 
240  const String getBothGnuplotFormula(const GaussFitter::GaussFitResult & incorrect, const GaussFitter::GaussFitResult & correct) const;
241 
243  void plotTargetDecoyEstimation(std::vector<double> & target, std::vector<double> & decoy);
244 
246  inline double getSmallestScore()
247  {
248  return smallest_score_;
249  }
250 
252  void tryGnuplot(const String& gp_file);
253 
254 private:
256  void processOutliers_(std::vector<double>& x_scores, const String& outlier_handling) const;
257 
262  static double transformScore_(const String& engine, const PeptideHit& hit, const String& current_score_type);
263 
268  static double getScore_(const StringList& requested_score_types, const PeptideHit & hit, const String& actual_score_type);
269 
288  const String (PosteriorErrorProbabilityModel::* getNegativeGnuplotFormula_)(const GaussFitter::GaussFitResult & params) const;
290  const String (PosteriorErrorProbabilityModel::* getPositiveGnuplotFormula_)(const GaussFitter::GaussFitResult & params) const;
291  };
292  }
293 }
294 
DefaultParamHandler.h
GumbelMaxLikelihoodFitter.h
OpenMS::Math::GumbelMaxLikelihoodFitter::GumbelDistributionFitResult
struct to represent the parameters of a gumbel distribution
Definition: GumbelMaxLikelihoodFitter.h:65
DPosition.h
GaussFitter.h
OpenMS::Math::PosteriorErrorProbabilityModel::getIncorrectlyAssignedGumbelFitResult
GumbelMaxLikelihoodFitter::GumbelDistributionFitResult getIncorrectlyAssignedGumbelFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before.
Definition: PosteriorErrorProbabilityModel.h:206
OpenMS::Math::GaussFitter::GaussFitResult
struct of parameters of a Gaussian distribution
Definition: GaussFitter.h:65
OpenMS::String
A more convenient string class.
Definition: String.h:59
GumbelDistributionFitter.h
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
OpenMS::Math::PosteriorErrorProbabilityModel::incorrectly_assigned_fit_param_
GaussFitter::GaussFitResult incorrectly_assigned_fit_param_
stores parameters for incorrectly assigned sequences. If gumbel fit was used, A can be ignored....
Definition: PosteriorErrorProbabilityModel.h:275
OpenMS::Math::PosteriorErrorProbabilityModel::correctly_assigned_fit_param_
GaussFitter::GaussFitResult correctly_assigned_fit_param_
stores gauss parameters
Definition: PosteriorErrorProbabilityModel.h:278
OpenMS::Math::PosteriorErrorProbabilityModel::max_correctly_
double max_correctly_
peak of the gauss distribution (correctly assigned sequences)
Definition: PosteriorErrorProbabilityModel.h:284
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::Math::PosteriorErrorProbabilityModel::getNegativePrior
double getNegativePrior() const
returns the estimated negative prior probability.
Definition: PosteriorErrorProbabilityModel.h:212
OpenMS::Math::GaussFitter::GaussFitResult::sigma
double sigma
parameter sigma of Gaussian distribution (width)
Definition: GaussFitter.h:80
int
OpenMS::Math::PosteriorErrorProbabilityModel::getCorrectlyAssignedFitResult
GaussFitter::GaussFitResult getCorrectlyAssignedFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before.
Definition: PosteriorErrorProbabilityModel.h:194
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::Math::GaussFitter::GaussFitResult::x0
double x0
parameter x0 of Gaussian distribution (center position)
Definition: GaussFitter.h:77
OpenMS::Math::PosteriorErrorProbabilityModel::negative_prior_
double negative_prior_
stores final prior probability for negative peptides
Definition: PosteriorErrorProbabilityModel.h:280
OpenMS::Math::PosteriorErrorProbabilityModel::max_incorrectly_
double max_incorrectly_
peak of the incorrectly assigned sequences distribution
Definition: PosteriorErrorProbabilityModel.h:282
OpenMS::Math::PosteriorErrorProbabilityModel::smallest_score_
double smallest_score_
smallest score which was used for fitting the model
Definition: PosteriorErrorProbabilityModel.h:286
OpenMS::Math::PosteriorErrorProbabilityModel::incorrectly_assigned_fit_gumbel_param_
GumbelMaxLikelihoodFitter::GumbelDistributionFitResult incorrectly_assigned_fit_gumbel_param_
Definition: PosteriorErrorProbabilityModel.h:276
OpenMS::Math::PosteriorErrorProbabilityModel
Implements a mixture model of the inverse gumbel and the gauss distribution or a gaussian mixture.
Definition: PosteriorErrorProbabilityModel.h:73
OpenMS::Math::PosteriorErrorProbabilityModel::getGumbel_
static double getGumbel_(double x, const GaussFitter::GaussFitResult &params)
computes the gumbel density at position x with parameters params.
Definition: PosteriorErrorProbabilityModel.h:218
OpenMS::Math::PosteriorErrorProbabilityModel::getSmallestScore
double getSmallestScore()
returns the smallest score used in the last fit
Definition: PosteriorErrorProbabilityModel.h:246
OpenMS::Math::PosteriorErrorProbabilityModel::getIncorrectlyAssignedFitResult
GaussFitter::GaussFitResult getIncorrectlyAssignedFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before.
Definition: PosteriorErrorProbabilityModel.h:200
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55