25 class PeptideIdentification;
26 class ProteinIdentification;
28 class PeptideIdentificationList;
72 const std::vector<ProteinIdentification> & protein_ids,
74 const bool split_charge,
75 const bool top_hits_only,
76 const bool target_decoy_available,
77 const double fdr_for_targets_smaller);
94 const std::string & search_engine,
96 const bool prob_correct,
97 const bool split_charge,
98 std::vector<ProteinIdentification> & protein_ids,
100 bool & unable_to_fit_data,
101 bool & data_might_not_be_well_fit);
112 bool fit(std::vector<double>& search_engine_scores,
const std::string& outlier_handling);
123 bool fitGumbelGauss(std::vector<double>& search_engine_scores,
const std::string& outlier_handling);
133 bool fit(std::vector<double>& search_engine_scores, std::vector<double>& probabilities,
const std::string& outlier_handling);
136 void fillDensities(
const std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
138 void fillLogDensities(
const std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
140 void fillLogDensitiesGumbel(
const std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
142 double computeLogLikelihood(
const std::vector<double> & incorrect_density,
const std::vector<double> & correct_density)
const;
151 const std::vector<double>& incorrect_log_density,
152 const std::vector<double>& correct_log_density,
153 std::vector<double>& incorrect_posterior)
const;
162 const std::vector<double> &incorrect_posteriors);
172 const std::vector<double> &incorrect_posteriors,
173 const std::pair<double, double>& pos_neg_mean);
178 return correctly_assigned_fit_param_;
184 return incorrectly_assigned_fit_param_;
190 return incorrectly_assigned_fit_gumbel_param_;
196 return negative_prior_;
202 double z = exp((params.
x0 - x) / params.
sigma);
203 return (z * exp(-1 * z)) / params.
sigma;
230 return smallest_score_;
238 void processOutliers_(std::vector<double>& x_scores,
const std::string& outlier_handling)
const;
250 static double getScore_(
const std::vector<std::string>& requested_score_types,
const PeptideHit & hit,
const std::string& actual_score_type);
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Implements a fitter for Gaussian functions.
Definition GaussFitter.h:35
Implements a mixture model of the inverse gumbel and the gauss distribution or a gaussian mixture.
Definition PosteriorErrorProbabilityModel.h:51
PosteriorErrorProbabilityModel & operator=(const PosteriorErrorProbabilityModel &rhs)
assignment operator (not implemented)
double computeLLAndIncorrectPosteriorsFromLogDensities(const std::vector< double > &incorrect_log_density, const std::vector< double > &correct_log_density, std::vector< double > &incorrect_posterior) const
bool fitGumbelGauss(std::vector< double > &search_engine_scores, const std::string &outlier_handling)
fits the distributions to the data points(search_engine_scores). Estimated parameters for the distrib...
static std::map< std::string, std::vector< std::vector< double > > > extractAndTransformScores(const std::vector< ProteinIdentification > &protein_ids, const PeptideIdentificationList &peptide_ids, const bool split_charge, const bool top_hits_only, const bool target_decoy_available, const double fdr_for_targets_smaller)
extract and transform score types to a range and score orientation that the PEP model can handle
GumbelMaxLikelihoodFitter::GumbelDistributionFitResult incorrectly_assigned_fit_gumbel_param_
Definition PosteriorErrorProbabilityModel.h:258
TextFile initPlots(std::vector< double > &x_scores)
initializes the plots
void plotTargetDecoyEstimation(std::vector< double > &target, std::vector< double > &decoy)
plots the estimated distribution against target and decoy hits
static double transformScore_(const std::string &engine, const PeptideHit &hit, const std::string ¤t_score_type)
GaussFitter::GaussFitResult incorrectly_assigned_fit_param_
stores parameters for incorrectly assigned sequences. If gumbel fit was used, A can be ignored....
Definition PosteriorErrorProbabilityModel.h:257
bool fit(std::vector< double > &search_engine_scores, const std::string &outlier_handling)
fits the distributions to the data points(search_engine_scores). Estimated parameters for the distrib...
const std::string getBothGnuplotFormula(const GaussFitter::GaussFitResult &incorrect, const GaussFitter::GaussFitResult &correct) const
returns the gnuplot formula of the fitted mixture distribution.
double max_correctly_
peak of the gauss distribution (correctly assigned sequences)
Definition PosteriorErrorProbabilityModel.h:266
double computeProbability(double score) const
void fillDensities(const std::vector< double > &x_scores, std::vector< double > &incorrect_density, std::vector< double > &correct_density)
Writes the distributions densities into the two vectors for a set of scores. Incorrect_densities repr...
bool fit(std::vector< double > &search_engine_scores, std::vector< double > &probabilities, const std::string &outlier_handling)
fits the distributions to the data points(search_engine_scores) and writes the computed probabilities...
const std::string getGumbelGnuplotFormula(const GaussFitter::GaussFitResult ¶ms) const
returns the gnuplot formula of the fitted gumbel distribution. Only x0 and sigma are used as local pa...
static void updateScores(const PosteriorErrorProbabilityModel &PEP_model, const std::string &search_engine, const Int charge, const bool prob_correct, const bool split_charge, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids, bool &unable_to_fit_data, bool &data_might_not_be_well_fit)
update score entries with PEP (or 1-PEP) estimates
PosteriorErrorProbabilityModel(const PosteriorErrorProbabilityModel &rhs)
Copy constructor (not implemented)
void processOutliers_(std::vector< double > &x_scores, const std::string &outlier_handling) const
transform different score types to a range and score orientation that the model can handle (engine st...
const std::string getGaussGnuplotFormula(const GaussFitter::GaussFitResult ¶ms) const
returns the gnuplot formula of the fitted gauss distribution.
GaussFitter::GaussFitResult getIncorrectlyAssignedFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before.
Definition PosteriorErrorProbabilityModel.h:182
double getNegativePrior() const
returns the estimated negative prior probability.
Definition PosteriorErrorProbabilityModel.h:194
void fillLogDensities(const std::vector< double > &x_scores, std::vector< double > &incorrect_density, std::vector< double > &correct_density)
Writes the log distributions densities into the two vectors for a set of scores. Incorrect_densities ...
double negative_prior_
stores final prior probability for negative peptides
Definition PosteriorErrorProbabilityModel.h:262
void fillLogDensitiesGumbel(const std::vector< double > &x_scores, std::vector< double > &incorrect_density, std::vector< double > &correct_density)
Writes the log distributions of gumbel and gauss densities into the two vectors for a set of scores....
~PosteriorErrorProbabilityModel() override
Destructor.
GaussFitter::GaussFitResult correctly_assigned_fit_param_
stores gauss parameters
Definition PosteriorErrorProbabilityModel.h:260
double max_incorrectly_
peak of the incorrectly assigned sequences distribution
Definition PosteriorErrorProbabilityModel.h:264
PosteriorErrorProbabilityModel()
default constructor
GaussFitter::GaussFitResult getCorrectlyAssignedFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before.
Definition PosteriorErrorProbabilityModel.h:176
static double getGumbel_(double x, const GaussFitter::GaussFitResult ¶ms)
computes the gumbel density at position x with parameters params.
Definition PosteriorErrorProbabilityModel.h:200
double smallest_score_
smallest score which was used for fitting the model
Definition PosteriorErrorProbabilityModel.h:268
std::pair< double, double > pos_neg_mean_weighted_posteriors(const std::vector< double > &x_scores, const std::vector< double > &incorrect_posteriors)
double getSmallestScore() const
returns the smallest score used in the last fit
Definition PosteriorErrorProbabilityModel.h:228
static double getScore_(const std::vector< std::string > &requested_score_types, const PeptideHit &hit, const std::string &actual_score_type)
GumbelMaxLikelihoodFitter::GumbelDistributionFitResult getIncorrectlyAssignedGumbelFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before.
Definition PosteriorErrorProbabilityModel.h:188
void tryGnuplot(const std::string &gp_file)
try to invoke 'gnuplot' on the file to create PDF automatically
std::pair< double, double > pos_neg_sigma_weighted_posteriors(const std::vector< double > &x_scores, const std::vector< double > &incorrect_posteriors, const std::pair< double, double > &pos_neg_mean)
double computeLogLikelihood(const std::vector< double > &incorrect_density, const std::vector< double > &correct_density) const
computes the Likelihood with a log-likelihood function.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
int Int
Signed integer type.
Definition Types.h:72
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
struct of parameters of a Gaussian distribution
Definition GaussFitter.h:40
double sigma
parameter sigma of Gaussian distribution (width)
Definition GaussFitter.h:54
double x0
parameter x0 of Gaussian distribution (center position)
Definition GaussFitter.h:51
struct to represent the parameters of a gumbel distribution
Definition GumbelMaxLikelihoodFitter.h:38