37 #include <OpenMS/config.h> 94 template <
class HitType>
104 higher_score_better(higher_score_better_)
109 if (higher_score_better)
111 return hit.getScore() >= score;
113 return hit.getScore() <= score;
122 template <
class HitType>
134 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
140 Size hit_rank = hit.getRank();
145 return hit_rank <= rank;
154 template <
class HitType>
170 if (found.
isEmpty())
return false;
171 if (value.
isEmpty())
return true;
172 return found == value;
177 template <
class HitType>
193 if (found.
isEmpty())
return false;
194 return double(found) <= value;
199 template <
class HitType>
207 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
215 return target_decoy(hit) || is_decoy(hit);
224 template <
class HitType>
232 accessions(accessions_)
238 for (std::set<String>::iterator it = present_accessions.begin();
239 it != present_accessions.end(); ++it)
241 if (accessions.count(*it) > 0)
return true;
262 template <
class HitType,
class Entry>
271 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
272 rec_it != records.end(); ++rec_it)
274 items[getKey(*rec_it)] = &(*rec_it);
287 return items.count(getHitKey(hit)) > 0;
297 if(!exists(evidence)){
298 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
300 return *(items.find(getHitKey(evidence))->second);
317 struct HasMinPeptideLength;
323 struct HasLowMZError;
330 struct HasMatchingModification;
337 struct HasMatchingSequence;
340 struct HasNoEvidence;
358 digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
369 [&](
const Int missed_cleavages)
372 bool max_filter = max_cleavages_ != disabledValue() ?
373 missed_cleavages > max_cleavages_ :
false;
374 bool min_filter = min_cleavages_ != disabledValue() ?
375 missed_cleavages < min_cleavages_ :
false;
376 return max_filter || min_filter;
382 hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
405 bool ignore_missed_cleavages,
406 bool methionine_cleavage) :
407 accession_resolver_(entries),
408 digestion_(digestion),
409 ignore_missed_cleavages_(ignore_missed_cleavages),
410 methionine_cleavage_(methionine_cleavage)
421 if (accession_resolver_.
exists(evidence))
425 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), ignore_missed_cleavages_, methionine_cleavage_);
431 LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
436 <<
"' not found in fasta file!" << std::endl;
444 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
457 template <
class IdentificationType>
464 return id.getHits().empty();
491 template <
class Container,
class Predicate>
494 items.erase(std::remove_if(items.begin(), items.end(), pred),
499 template <
class Container,
class Predicate>
502 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
513 template <
class IdentificationType>
517 for (
typename std::vector<IdentificationType>::const_iterator id_it =
518 ids.begin(); id_it != ids.end(); ++id_it)
520 counter += id_it->getHits().size();
537 template <
class IdentificationType>
539 const std::vector<IdentificationType>& identifications,
540 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
542 if (identifications.empty())
return false;
544 typename std::vector<IdentificationType>::const_iterator best_id_it =
545 identifications.end();
546 typename std::vector<typename IdentificationType::HitType>::const_iterator
549 for (
typename std::vector<IdentificationType>::const_iterator id_it =
550 identifications.begin(); id_it != identifications.end(); ++id_it)
552 if (id_it->getHits().empty())
continue;
554 if (best_id_it == identifications.end())
557 best_hit_it = id_it->getHits().begin();
559 else if (best_id_it->getScoreType() != id_it->getScoreType())
561 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
564 bool higher_better = best_id_it->isHigherScoreBetter();
565 for (
typename std::vector<typename IdentificationType::HitType>::
566 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
567 id_it->getHits().end(); ++hit_it)
569 if ((higher_better && (hit_it->getScore() >
570 best_hit_it->getScore())) ||
571 (!higher_better && (hit_it->getScore() <
572 best_hit_it->getScore())))
574 best_hit_it = hit_it;
576 if (assume_sorted)
break;
580 if (best_id_it == identifications.end())
585 best_hit = *best_hit_it;
596 static void extractPeptideSequences(
597 const std::vector<PeptideIdentification>& peptides,
598 std::set<String>& sequences,
bool ignore_mods =
false);
606 template<
class Ev
idenceFilter>
608 EvidenceFilter& filter,
609 std::vector<PeptideIdentification>& peptides)
611 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
612 pep_it != peptides.end(); ++pep_it)
614 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
615 hit_it != pep_it->getHits().end(); ++hit_it )
617 std::vector<PeptideEvidence> evidences;
618 remove_copy_if(hit_it->getPeptideEvidences().begin(),
619 hit_it->getPeptideEvidences().end(),
620 back_inserter(evidences),
622 hit_it->setPeptideEvidences(evidences);
635 template <
class IdentificationType>
638 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
639 it != ids.end(); ++it)
646 static void removeUnreferencedProteins(
647 std::vector<ProteinIdentification>& proteins,
648 const std::vector<PeptideIdentification>& peptides);
657 static void updateProteinReferences(
658 std::vector<PeptideIdentification>& peptides,
659 const std::vector<ProteinIdentification>& proteins,
660 bool remove_peptides_without_reference =
false);
670 static bool updateProteinGroups(
671 std::vector<ProteinIdentification::ProteinGroup>& groups,
672 const std::vector<ProteinHit>& hits);
681 template <
class IdentificationType>
684 struct HasNoHits<IdentificationType> empty_filter;
685 removeMatchingItems(ids, empty_filter);
693 template <
class IdentificationType>
695 double threshold_score)
697 for (
typename std::vector<IdentificationType>::iterator id_it =
698 ids.begin(); id_it != ids.end(); ++id_it)
700 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
701 threshold_score, id_it->isHigherScoreBetter());
702 keepMatchingItems(id_it->getHits(), score_filter);
711 template <class IdentificationType>
712 static void filterHitsBySignificance(std::vector<IdentificationType>& ids,
713 double threshold_fraction = 1.0)
715 for (
typename std::vector<IdentificationType>::iterator id_it =
716 ids.begin(); id_it != ids.end(); ++id_it)
718 double threshold_score = (threshold_fraction *
719 id_it->getSignificanceThreshold());
720 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
721 threshold_score, id_it->isHigherScoreBetter());
722 keepMatchingItems(id_it->getHits(), score_filter);
731 template <class IdentificationType>
732 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
734 for (
typename std::vector<IdentificationType>::iterator id_it =
735 ids.begin(); id_it != ids.end(); ++id_it)
738 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
756 template <
class IdentificationType>
763 struct HasMaxRank<typename IdentificationType::HitType>
764 rank_filter(min_rank - 1);
765 for (typename std::vector<IdentificationType>::iterator id_it =
766 ids.begin(); id_it != ids.end(); ++id_it)
768 removeMatchingItems(id_it->getHits(), rank_filter);
771 if (max_rank >= min_rank)
773 struct HasMaxRank<typename IdentificationType::HitType>
774 rank_filter(max_rank);
775 for (typename std::vector<IdentificationType>::iterator id_it =
776 ids.begin(); id_it != ids.end(); ++id_it)
778 keepMatchingItems(id_it->getHits(), rank_filter);
790 template <
class IdentificationType>
795 for (typename std::vector<IdentificationType>::iterator id_it =
796 ids.begin(); id_it != ids.end(); ++id_it)
798 removeMatchingItems(id_it->getHits(), decoy_filter);
809 template <
class IdentificationType>
811 const std::set<String> accessions)
814 acc_filter(accessions);
815 for (typename std::vector<IdentificationType>::iterator id_it =
816 ids.begin(); id_it != ids.end(); ++id_it)
818 removeMatchingItems(id_it->getHits(), acc_filter);
829 template <
class IdentificationType>
831 const std::set<String> accessions)
834 acc_filter(accessions);
835 for (typename std::vector<IdentificationType>::iterator id_it =
836 ids.begin(); id_it != ids.end(); ++id_it)
838 keepMatchingItems(id_it->getHits(), acc_filter);
856 static void keepBestPeptideHits(
857 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
867 static void filterPeptidesByLength(
868 std::vector<PeptideIdentification>& peptides,
Size min_length,
869 Size max_length = UINT_MAX);
879 static void filterPeptidesByCharge(
880 std::vector<PeptideIdentification>& peptides,
Int min_charge,
884 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
885 double min_rt,
double max_rt);
888 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
889 double min_mz,
double max_mz);
902 static void filterPeptidesByMZError(
903 std::vector<PeptideIdentification>& peptides,
double mass_error,
913 template <
class Filter>
914 static void filterPeptideEvidences(
916 std::vector<PeptideIdentification>& peptides);
929 static void filterPeptidesByRTPredictPValue(
930 std::vector<PeptideIdentification>& peptides,
931 const String& metavalue_key,
double threshold = 0.05);
934 static void removePeptidesWithMatchingModifications(
935 std::vector<PeptideIdentification>& peptides,
936 const std::set<String>& modifications);
939 static void keepPeptidesWithMatchingModifications(
940 std::vector<PeptideIdentification>& peptides,
941 const std::set<String>& modifications);
950 static void removePeptidesWithMatchingSequences(
951 std::vector<PeptideIdentification>& peptides,
952 const std::vector<PeptideIdentification>& bad_peptides,
953 bool ignore_mods =
false);
962 static void keepPeptidesWithMatchingSequences(
963 std::vector<PeptideIdentification>& peptides,
964 const std::vector<PeptideIdentification>& good_peptides,
965 bool ignore_mods =
false);
968 static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
976 static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
977 peptides,
bool seq_only =
false);
987 double peptide_threshold_score,
988 double protein_threshold_score)
992 protein_threshold_score);
998 exp_it != experiment.
end(); ++exp_it)
1000 filterHitsByScore(exp_it->getPeptideIdentifications(),
1001 peptide_threshold_score);
1002 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1003 updateProteinReferences(exp_it->getPeptideIdentifications(),
1011 double peptide_threshold_fraction,
1012 double protein_threshold_fraction)
1016 protein_threshold_fraction);
1022 exp_it != experiment.
end(); ++exp_it)
1024 filterHitsBySignificance(exp_it->getPeptideIdentifications(),
1025 peptide_threshold_fraction);
1026 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1027 updateProteinReferences(exp_it->getPeptideIdentifications(),
1038 std::vector<PeptideIdentification> all_peptides;
1042 exp_it != experiment.
end(); ++exp_it)
1044 std::vector<PeptideIdentification>& peptides =
1045 exp_it->getPeptideIdentifications();
1046 keepNBestHits(peptides, n);
1047 removeEmptyIdentifications(peptides);
1048 updateProteinReferences(peptides,
1050 all_peptides.insert(all_peptides.end(), peptides.begin(),
1061 const std::vector<FASTAFile::FASTAEntry>& proteins)
1063 std::set<String> accessions;
1064 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1065 proteins.begin(); it != proteins.end(); ++it)
1067 accessions.insert(it->identifier);
1077 exp_it != experiment.
end(); ++exp_it)
1079 if (exp_it->getMSLevel() == 2)
1081 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1083 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1084 updateHitRanks(exp_it->getPeptideIdentifications());
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:123
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:380
bool ignore_missed_cleavages_
Definition: IDFilter.h:400
ItemMap items
Definition: IDFilter.h:267
const String & getAccession() const
returns the accession of the protein
ProteaseDigestion & digestion_
Definition: IDFilter.h:399
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:266
bool exists(const HitType &hit) const
Definition: IDFilter.h:285
A more convenient string class.
Definition: String.h:57
bool filterByMissedCleavages(const String &sequence, std::function< bool(const Int)> filter) const
Filter based on the number of missed cleavages.
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:810
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:514
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:251
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:246
GetMatchingItems()
Definition: IDFilter.h:278
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:357
double score
Definition: IDFilter.h:99
Int max_cleavages_
Definition: IDFilter.h:353
Is this a decoy hit?
Definition: IDFilter.h:200
PeptideEvidence argument_type
Definition: IDFilter.h:395
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:458
HasMaxRank(Size rank_)
Definition: IDFilter.h:129
bool operator()(const HitType &hit) const
Definition: IDFilter.h:210
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:102
bool operator()(const HitType &hit) const
Definition: IDFilter.h:107
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
Iterator begin()
Definition: MSExperiment.h:157
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:393
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:235
const std::set< String > & accessions
Definition: IDFilter.h:229
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:830
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:295
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:791
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:694
static Int disabledValue()
Definition: IDFilter.h:361
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:462
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:450
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:348
Iterator end()
Definition: MSExperiment.h:167
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:442
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1059
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:607
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:636
bool operator()(const HitType &hit) const
Definition: IDFilter.h:138
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:290
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
Size rank
Definition: IDFilter.h:127
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:413
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:492
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:269
Representation of a peptide hit.
Definition: PeptideHit.h:54
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:398
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned...
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1034
IdentificationType argument_type
Definition: IDFilter.h:460
HasDecoyAnnotation()
Definition: IDFilter.h:206
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:95
Int min_cleavages_
Definition: IDFilter.h:352
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:225
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:280
HitType argument_type
Definition: IDFilter.h:202
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
HitType argument_type
Definition: IDFilter.h:97
static void filterHitsBySignificance(PeakMap &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
Filters an MS/MS experiment according to fractions of the significance thresholds.
Definition: IDFilter.h:1010
bool methionine_cleavage_
Definition: IDFilter.h:401
Representation of a protein hit.
Definition: ProteinHit.h:53
Invalid value exception.
Definition: Exception.h:335
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:403
bool operator()(PeptideHit &p)
Definition: IDFilter.h:365
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:682
bool higher_score_better
Definition: IDFilter.h:100
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:363
Builds a map index of data that have a String index to find matches and return the objects...
Definition: IDFilter.h:263
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:231
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:757
HitType argument_type
Definition: IDFilter.h:227
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:986
HitType argument_type
Definition: IDFilter.h:265
HitType argument_type
Definition: IDFilter.h:125
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
String toUnmodifiedString() const
returns the peptide as string without any modifications
String identifier
Definition: FASTAFile.h:78
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:538
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:75
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
int Int
Signed integer type.
Definition: Types.h:102
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
EnzymaticDigestion & digestion_
Definition: IDFilter.h:351
PeptideHit argument_type
Definition: IDFilter.h:356
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:500