19 #include <boost/regex.hpp>
24 class PeptideIdentification;
67 double cut_off = DBL_MAX;
78 double suitability = 0;
82 double suitability_no_rerank = 0;
85 double suitability_corr_no_rerank = 0;
116 double corr_factor = -1;
119 double num_top_novo_corr = 0;
126 double suitability_corr = 0;
253 double getDecoyCutOff_(
const std::vector<PeptideIdentification>& pep_ids,
double reranking_cutoff_percentile)
const;
335 std::vector<FASTAFile::FASTAEntry>
getSubsampledFasta_(
const std::vector<FASTAFile::FASTAEntry>& fasta_data,
double subsampling_rate)
const;
426 double getScoreMatchingFDR_(
const std::vector<PeptideIdentification>& pep_ids,
double FDR,
const String& score_name,
bool higher_score_better)
const;
437 std::vector<FASTAFile::FASTAEntry>
getSubsampledFasta(
const std::vector<FASTAFile::FASTAEntry>& fasta_data,
double subsampling_rate)
Definition: DBSuitability.h:431
std::vector< FASTAFile::FASTAEntry > getSubsampledFasta(const std::vector< FASTAFile::FASTAEntry > &fasta_data, double subsampling_rate)
Definition: DBSuitability.h:437
~DBSuitability_friend()=default
UInt numberOfUniqueProteins(const std::vector< PeptideIdentification > &peps, UInt number_of_hits=1)
Definition: DBSuitability.h:452
double calculateCorrectionFactor(const DBSuitability::SuitabilityData &data, const DBSuitability::SuitabilityData &data_sampled, double sampling_rate)
Definition: DBSuitability.h:447
DBSuitability suit_
Definition: DBSuitability.h:479
DBSuitability_friend()=default
double getScoreMatchingFDR(const std::vector< PeptideIdentification > &pep_ids, double FDR, String score_name, bool higher_score_better)
Definition: DBSuitability.h:462
Size getIndexWithMedianNovoHits(const std::vector< DBSuitability::SuitabilityData > &data)
Definition: DBSuitability.h:457
void appendDecoys(std::vector< FASTAFile::FASTAEntry > &fasta)
Definition: DBSuitability.h:442
This class holds the functionality of calculating the database suitability.
Definition: DBSuitability.h:47
void compute(std::vector< PeptideIdentification > &&pep_ids, const MSExperiment &exp, const std::vector< FASTAFile::FASTAEntry > &original_fasta, const std::vector< FASTAFile::FASTAEntry > &novo_fasta, const ProteinIdentification::SearchParameters &search_params)
Computes suitability of a database used to search a mzML.
std::vector< FASTAFile::FASTAEntry > getSubsampledFasta_(const std::vector< FASTAFile::FASTAEntry > &fasta_data, double subsampling_rate) const
Creates a subsampled fasta with the given subsampling rate.
bool checkScoreBetterThanThreshold_(const PeptideHit &hit, double threshold, bool higher_score_better) const
Tests if a PeptideHit has a score better than the given threshold.
Size getIndexWithMedianNovoHits_(const std::vector< SuitabilityData > &data) const
Finds the SuitabilityData object with the median number of de novo hits.
std::vector< PeptideIdentification > runIdentificationSearch_(const MSExperiment &exp, const std::vector< FASTAFile::FASTAEntry > &fasta_data, const String &adapter_name, Param ¶meters) const
Executes the workflow from search adapter, followed by PeptideIndexer and finishes with FDR.
void writeIniFile_(const Param ¶meters, const String &filename) const
Writes parameters into a given file.
std::pair< String, Param > extractSearchAdapterInfoFromMetaValues_(const ProteinIdentification::SearchParameters &meta_values) const
Looks through meta values of SearchParameters to find out which search adapter was used.
UInt numberOfUniqueProteins_(const std::vector< PeptideIdentification > &peps, UInt number_of_hits=1) const
Determines the number of unique proteins found in the protein accessions of PeptideIdentifications.
~DBSuitability() override=default
Destructor.
double getDecoyCutOff_(const std::vector< PeptideIdentification > &pep_ids, double reranking_cutoff_percentile) const
Calculates a xcorr cut-off based on decoy hits.
double calculateCorrectionFactor_(const SuitabilityData &data, const SuitabilityData &data_sampled, double sampling_rate) const
Calculates the correction factor from two suitability calculations.
const std::vector< SuitabilityData > & getResults() const
Returns results calculated by this metric.
std::vector< SuitabilityData > results_
result vector
Definition: DBSuitability.h:218
void appendDecoys_(std::vector< FASTAFile::FASTAEntry > &fasta) const
Calculates and appends decoys to a given vector of FASTAEntry.
void calculateSuitability_(const std::vector< PeptideIdentification > &pep_ids, SuitabilityData &data) const
Calculates all suitability data from a combined deNovo+database search.
double getScoreMatchingFDR_(const std::vector< PeptideIdentification > &pep_ids, double FDR, const String &score_name, bool higher_score_better) const
Extracts the worst score that still passes a FDR (q-value) threshold.
bool isNovoHit_(const PeptideHit &hit) const
Tests if a PeptideHit is considered a deNovo hit.
const boost::regex decoy_pattern_
pattern for finding a decoy string
Definition: DBSuitability.h:221
double extractScore_(const PeptideHit &pep_hit) const
Returns the cross correlation score normalized by MW (if existing), else if the 'force' flag is set t...
double getDecoyDiff_(const PeptideIdentification &pep_id) const
Calculates the xcorr difference between the top two hits marked as decoy.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
Management and storage of parameters / INI files.
Definition: Param.h:44
Representation of a peptide hit.
Definition: PeptideHit.h:31
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
A more convenient string class.
Definition: String.h:34
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
struct to store results
Definition: DBSuitability.h:51
double getCorrectedSuitability() const
void setCorrectionFactor(double factor)
SuitabilityData simulateNoReRanking() const
Returns a SuitabilityData object containing the data if re-ranking didn't happen.
double getCorrectedNovoHits() const
double getCorrectionFactor() const
Search parameters of the DB search.
Definition: ProteinIdentification.h:247