35 #ifndef OPENMS_FILTERING_ID_IDFILTER_H 36 #define OPENMS_FILTERING_ID_IDFILTER_H 38 #include <OpenMS/config.h> 94 template <
class HitType>
103 score(score), higher_score_better(higher_score_better)
108 if (higher_score_better)
110 return hit.getScore() >= score;
112 return hit.getScore() <= score;
121 template <
class HitType>
133 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
139 Size hit_rank = hit.getRank();
144 return hit_rank <= rank;
153 template <
class HitType>
162 key(key), value(value)
168 if (found.
isEmpty())
return false;
169 if (value.
isEmpty())
return true;
170 return found == value;
175 template <
class HitType>
184 key(key), value(value)
190 if (found.
isEmpty())
return false;
191 return double(found) <= value;
196 template <
class HitType>
204 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
212 return target_decoy(hit) || is_decoy(hit);
221 template <
class HitType>
229 accessions(accessions)
235 for (std::set<String>::iterator it = present_accessions.begin();
236 it != present_accessions.end(); ++it)
238 if (accessions.count(*it) > 0)
return true;
259 template <
class HitType,
class Entry>
268 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
269 rec_it != records.end(); ++rec_it)
271 items[getKey(*rec_it)] = &(*rec_it);
284 return items.count(getHitKey(hit)) > 0;
294 if(!exists(evidence)){
295 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
297 return *(items.find(getHitKey(evidence))->second);
314 struct HasMinPeptideLength;
320 struct HasLowMZError;
327 struct HasMatchingModification;
334 struct HasMatchingSequence;
337 struct HasNoEvidence;
356 bool ignore_missed_cleavages,
357 bool methionine_cleavage) :
358 accession_resolver_(entries),
359 digestion_(digestion),
360 ignore_missed_cleavages_(ignore_missed_cleavages),
361 methionine_cleavage_(methionine_cleavage)
372 if(accession_resolver_.
exists(evidence))
376 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), methionine_cleavage_, ignore_missed_cleavages_);
382 LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
386 LOG_WARN <<
"Peptide accession '" <<
388 "' not found in fasta file!" << std::endl;
396 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
409 template <
class IdentificationType>
416 return id.getHits().empty();
443 template <
class Container,
class Predicate>
446 items.erase(std::remove_if(items.begin(), items.end(), pred),
451 template <
class Container,
class Predicate>
454 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
465 template <
class IdentificationType>
469 for (
typename std::vector<IdentificationType>::const_iterator id_it =
470 ids.begin(); id_it != ids.end(); ++id_it)
472 counter += id_it->getHits().size();
489 template <
class IdentificationType>
491 const std::vector<IdentificationType>& identifications,
492 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
494 if (identifications.empty())
return false;
496 typename std::vector<IdentificationType>::const_iterator best_id_it =
497 identifications.end();
498 typename std::vector<typename IdentificationType::HitType>::const_iterator
501 for (
typename std::vector<IdentificationType>::const_iterator id_it =
502 identifications.begin(); id_it != identifications.end(); ++id_it)
504 if (id_it->getHits().empty())
continue;
506 if (best_id_it == identifications.end())
509 best_hit_it = id_it->getHits().begin();
511 else if (best_id_it->getScoreType() != id_it->getScoreType())
513 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
516 bool higher_better = best_id_it->isHigherScoreBetter();
517 for (
typename std::vector<typename IdentificationType::HitType>::
518 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
519 id_it->getHits().end(); ++hit_it)
521 if ((higher_better && (hit_it->getScore() >
522 best_hit_it->getScore())) ||
523 (!higher_better && (hit_it->getScore() <
524 best_hit_it->getScore())))
526 best_hit_it = hit_it;
528 if (assume_sorted)
break;
532 if (best_id_it == identifications.end())
537 best_hit = *best_hit_it;
548 static void extractPeptideSequences(
549 const std::vector<PeptideIdentification>& peptides,
550 std::set<String>& sequences,
bool ignore_mods =
false);
558 template<
class Ev
idenceFilter>
560 EvidenceFilter& filter,
561 std::vector<PeptideIdentification>& peptides)
563 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
564 pep_it != peptides.end(); ++pep_it)
566 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
567 hit_it != pep_it->getHits().end(); ++hit_it )
569 std::vector<PeptideEvidence> evidences;
570 remove_copy_if(hit_it->getPeptideEvidences().begin(),
571 hit_it->getPeptideEvidences().end(),
572 back_inserter(evidences),
574 hit_it->setPeptideEvidences(evidences);
586 template <
class IdentificationType>
589 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
590 it != ids.end(); ++it)
597 static void removeUnreferencedProteins(
598 std::vector<ProteinIdentification>& proteins,
599 const std::vector<PeptideIdentification>& peptides);
608 static void updateProteinReferences(
609 std::vector<PeptideIdentification>& peptides,
610 const std::vector<ProteinIdentification>& proteins,
611 bool remove_peptides_without_reference =
false);
621 static bool updateProteinGroups(
622 std::vector<ProteinIdentification::ProteinGroup>& groups,
623 const std::vector<ProteinHit>& hits);
632 template <
class IdentificationType>
635 struct HasNoHits<IdentificationType> empty_filter;
636 removeMatchingItems(ids, empty_filter);
644 template <
class IdentificationType>
646 double threshold_score)
648 for (
typename std::vector<IdentificationType>::iterator id_it =
649 ids.begin(); id_it != ids.end(); ++id_it)
651 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
652 threshold_score, id_it->isHigherScoreBetter());
653 keepMatchingItems(id_it->getHits(), score_filter);
662 template <class IdentificationType>
663 static void filterHitsBySignificance(std::vector<IdentificationType>& ids,
664 double threshold_fraction = 1.0)
666 for (
typename std::vector<IdentificationType>::iterator id_it =
667 ids.begin(); id_it != ids.end(); ++id_it)
669 double threshold_score = (threshold_fraction *
670 id_it->getSignificanceThreshold());
671 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
672 threshold_score, id_it->isHigherScoreBetter());
673 keepMatchingItems(id_it->getHits(), score_filter);
682 template <class IdentificationType>
683 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
685 for (
typename std::vector<IdentificationType>::iterator id_it =
686 ids.begin(); id_it != ids.end(); ++id_it)
689 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
707 template <
class IdentificationType>
714 struct HasMaxRank<typename IdentificationType::HitType>
715 rank_filter(min_rank - 1);
716 for (typename std::vector<IdentificationType>::iterator id_it =
717 ids.begin(); id_it != ids.end(); ++id_it)
719 removeMatchingItems(id_it->getHits(), rank_filter);
722 if (max_rank >= min_rank)
724 struct HasMaxRank<typename IdentificationType::HitType>
725 rank_filter(max_rank);
726 for (typename std::vector<IdentificationType>::iterator id_it =
727 ids.begin(); id_it != ids.end(); ++id_it)
729 keepMatchingItems(id_it->getHits(), rank_filter);
741 template <
class IdentificationType>
746 for (typename std::vector<IdentificationType>::iterator id_it =
747 ids.begin(); id_it != ids.end(); ++id_it)
749 removeMatchingItems(id_it->getHits(), decoy_filter);
760 template <
class IdentificationType>
762 const std::set<String> accessions)
765 acc_filter(accessions);
766 for (typename std::vector<IdentificationType>::iterator id_it =
767 ids.begin(); id_it != ids.end(); ++id_it)
769 removeMatchingItems(id_it->getHits(), acc_filter);
780 template <
class IdentificationType>
782 const std::set<String> accessions)
785 acc_filter(accessions);
786 for (typename std::vector<IdentificationType>::iterator id_it =
787 ids.begin(); id_it != ids.end(); ++id_it)
789 keepMatchingItems(id_it->getHits(), acc_filter);
807 static void keepBestPeptideHits(
808 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
818 static void filterPeptidesByLength(
819 std::vector<PeptideIdentification>& peptides,
Size min_length,
820 Size max_length = UINT_MAX);
830 static void filterPeptidesByCharge(
831 std::vector<PeptideIdentification>& peptides,
Int min_charge,
835 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
836 double min_rt,
double max_rt);
839 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
840 double min_mz,
double max_mz);
853 static void filterPeptidesByMZError(
854 std::vector<PeptideIdentification>& peptides,
double mass_error,
864 template <
class Filter>
865 static void filterPeptideEvidences(
867 std::vector<PeptideIdentification>& peptides);
880 static void filterPeptidesByRTPredictPValue(
881 std::vector<PeptideIdentification>& peptides,
882 const String& metavalue_key,
double threshold = 0.05);
885 static void removePeptidesWithMatchingModifications(
886 std::vector<PeptideIdentification>& peptides,
887 const std::set<String>& modifications);
890 static void keepPeptidesWithMatchingModifications(
891 std::vector<PeptideIdentification>& peptides,
892 const std::set<String>& modifications);
901 static void removePeptidesWithMatchingSequences(
902 std::vector<PeptideIdentification>& peptides,
903 const std::vector<PeptideIdentification>& bad_peptides,
904 bool ignore_mods =
false);
913 static void keepPeptidesWithMatchingSequences(
914 std::vector<PeptideIdentification>& peptides,
915 const std::vector<PeptideIdentification>& good_peptides,
916 bool ignore_mods =
false);
919 static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
927 static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
928 peptides,
bool seq_only =
false);
938 double peptide_threshold_score,
939 double protein_threshold_score)
943 protein_threshold_score);
949 exp_it != experiment.
end(); ++exp_it)
951 filterHitsByScore(exp_it->getPeptideIdentifications(),
952 peptide_threshold_score);
953 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
954 updateProteinReferences(exp_it->getPeptideIdentifications(),
962 double peptide_threshold_fraction,
963 double protein_threshold_fraction)
967 protein_threshold_fraction);
973 exp_it != experiment.
end(); ++exp_it)
975 filterHitsBySignificance(exp_it->getPeptideIdentifications(),
976 peptide_threshold_fraction);
977 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
978 updateProteinReferences(exp_it->getPeptideIdentifications(),
989 std::vector<PeptideIdentification> all_peptides;
993 exp_it != experiment.
end(); ++exp_it)
995 std::vector<PeptideIdentification>& peptides =
996 exp_it->getPeptideIdentifications();
997 keepNBestHits(peptides, n);
998 removeEmptyIdentifications(peptides);
999 updateProteinReferences(peptides,
1001 all_peptides.insert(all_peptides.end(), peptides.begin(),
1012 const std::vector<FASTAFile::FASTAEntry>& proteins)
1014 std::set<String> accessions;
1015 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1016 proteins.begin(); it != proteins.end(); ++it)
1018 accessions.insert(it->identifier);
1028 exp_it != experiment.
end(); ++exp_it)
1030 if (exp_it->getMSLevel() == 2)
1032 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1034 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1035 updateHitRanks(exp_it->getPeptideIdentifications());
1047 #endif // OPENMS_FILTERING_ID_IDFILTER_H Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:122
bool ignore_missed_cleavages_
Definition: IDFilter.h:351
ItemMap items
Definition: IDFilter.h:264
const String & getAccession() const
returns the accession of the protein
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:263
bool exists(const HitType &hit) const
Definition: IDFilter.h:282
A more convenient string class.
Definition: String.h:57
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:761
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:466
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:248
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, EnzymaticDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:354
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:116
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:243
GetMatchingItems()
Definition: IDFilter.h:275
HasMatchingAccession(const std::set< String > &accessions)
Definition: IDFilter.h:228
double score
Definition: IDFilter.h:99
Is this a decoy hit?
Definition: IDFilter.h:197
PeptideEvidence argument_type
Definition: IDFilter.h:346
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:410
bool operator()(const HitType &hit) const
Definition: IDFilter.h:207
bool operator()(const HitType &hit) const
Definition: IDFilter.h:106
Iterator begin()
Definition: MSExperiment.h:162
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:344
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:232
const std::set< String > & accessions
Definition: IDFilter.h:226
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:781
Class for the enzymatic digestion of proteins.
Definition: EnzymaticDigestion.h:61
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:292
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:742
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:645
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:414
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:57
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
Iterator end()
Definition: MSExperiment.h:172
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:394
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1010
EnzymaticDigestion & digestion_
Definition: IDFilter.h:350
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:559
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:587
HasMaxRank(Size rank)
Definition: IDFilter.h:128
bool operator()(const HitType &hit) const
Definition: IDFilter.h:137
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:287
A method or algorithm argument contains illegal values.
Definition: Exception.h:649
Size rank
Definition: IDFilter.h:126
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:364
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:444
bool isValidProduct(const AASequence &protein, Size pep_pos, Size pep_length, bool methionine_cleavage=false, bool ignore_missed_cleavages=true) const
Returns true if peptide at position pep_pos with length pep_length within protein protein was generat...
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:266
Representation of a peptide hit.
Definition: PeptideHit.h:55
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:349
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned...
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:985
HasGoodScore(double score, bool higher_score_better)
Definition: IDFilter.h:102
IdentificationType argument_type
Definition: IDFilter.h:412
HasDecoyAnnotation()
Definition: IDFilter.h:203
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:95
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:222
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:277
HitType argument_type
Definition: IDFilter.h:199
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
HitType argument_type
Definition: IDFilter.h:97
static void filterHitsBySignificance(PeakMap &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
Filters an MS/MS experiment according to fractions of the significance thresholds.
Definition: IDFilter.h:961
bool methionine_cleavage_
Definition: IDFilter.h:352
Representation of a protein hit.
Definition: ProteinHit.h:54
Invalid value exception.
Definition: Exception.h:336
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:633
bool higher_score_better
Definition: IDFilter.h:100
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:364
Builds a map index of data that have a String index to find matches and return the objects...
Definition: IDFilter.h:260
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:708
HitType argument_type
Definition: IDFilter.h:224
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:937
HitType argument_type
Definition: IDFilter.h:262
HitType argument_type
Definition: IDFilter.h:124
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:74
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
String identifier
Definition: FASTAFile.h:76
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:490
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:75
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
int Int
Signed integer type.
Definition: Types.h:103
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:452