47 #include <OpenMS/config.h>
53 #include <unordered_set>
100 template<
class HitType>
107 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
113 if (higher_score_better)
115 return hit.getScore() >= score;
117 return hit.getScore() <= score;
126 template<
class HitType>
136 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
142 Size hit_rank = hit.getRank();
147 return hit_rank <= rank;
156 template<
class HitType>
174 return found == value;
179 template<
class HitType>
195 return double(found) <= value;
200 template<
class HitType>
215 return target_decoy(hit) || is_decoy(hit);
224 template<
class HitType>
238 if (accessions.count(it) > 0)
260 template<
class HitType>
274 if (accessions.count(it) > 0)
296 template<
class HitType,
class Entry>
304 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
306 items[getKey(*rec_it)] = &(*rec_it);
321 return items.count(getHitKey(hit)) > 0;
331 if (!exists(evidence))
333 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
335 return *(items.find(getHitKey(evidence))->second);
350 struct HasMinPeptideLength;
356 struct HasLowMZError;
363 struct HasMatchingModification;
370 struct HasMatchingSequence;
373 struct HasNoEvidence;
403 const auto& fun = [&](
const Int missed_cleavages) {
404 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
405 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
406 return max_filter || min_filter;
413 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
433 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
445 if (accession_resolver_.
exists(evidence))
448 ignore_missed_cleavages_, methionine_cleavage_);
454 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
466 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
477 template<
class IdentificationType>
483 return id.getHits().empty();
510 template<
class Container,
class Predicate>
513 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
517 template<
class Container,
class Predicate>
520 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
524 template<
class Container,
class Predicate>
527 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
528 std::move(part, items.end(), std::back_inserter(target));
529 items.erase(part, items.end());
533 template<
class IDContainer,
class Predicate>
536 for (
auto& item : items)
538 removeMatchingItems(item.getHits(), pred);
543 template<
class IDContainer,
class Predicate>
546 for (
auto& item : items)
548 keepMatchingItems(item.getHits(), pred);
552 template<
class MapType,
class Predicate>
555 for (
auto& feat : prot_and_pep_ids)
557 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
559 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
562 template<
class MapType,
class Predicate>
565 for (
auto& feat : prot_and_pep_ids)
567 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
569 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
572 template<
class MapType,
class Predicate>
575 for (
auto& feat : prot_and_pep_ids)
577 removeMatchingItems(feat.getPeptideIdentifications(), pred);
579 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
589 template<
class IdentificationType>
593 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
595 counter += id_it->getHits().size();
613 template<
class IdentificationType>
614 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
616 if (identifications.empty())
619 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
620 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
622 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
624 if (id_it->getHits().empty())
627 if (best_id_it == identifications.end())
630 best_hit_it = id_it->getHits().begin();
632 else if (best_id_it->getScoreType() != id_it->getScoreType())
634 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
637 bool higher_better = best_id_it->isHigherScoreBetter();
638 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
640 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
642 best_hit_it = hit_it;
649 if (best_id_it == identifications.end())
654 best_hit = *best_hit_it;
665 static void extractPeptideSequences(
const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences,
bool ignore_mods =
false);
679 template<
class Ev
idenceFilter>
682 for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
684 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
686 std::vector<PeptideEvidence> evidences;
687 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
688 hit_it->setPeptideEvidences(evidences);
700 template<
class IdentificationType>
703 for (
typename std::vector<IdentificationType>::iterator it = ids.begin(); it != ids.end(); ++it)
725 static void updateProteinReferences(std::vector<PeptideIdentification>& peptides,
const std::vector<ProteinIdentification>& proteins,
bool remove_peptides_without_reference =
false);
753 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
761 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
769 template<
class IdentificationType>
772 struct HasNoHits<IdentificationType> empty_filter;
773 removeMatchingItems(ids, empty_filter);
781 template<
class IdentificationType>
784 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
786 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
787 keepMatchingItems(id_it->getHits(), score_filter);
797 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
804 template<class IdentificationType>
805 static void filterHitsByScore(IdentificationType& id, double threshold_score)
807 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
808 keepMatchingItems(id.getHits(), score_filter);
816 template<class IdentificationType>
817 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
819 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
822 if (n < id_it->getHits().size())
823 id_it->getHits().resize(n);
841 template<
class IdentificationType>
847 struct HasMaxRank<typename IdentificationType::HitType> rank_filter(min_rank - 1);
848 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
850 removeMatchingItems(id_it->getHits(), rank_filter);
853 if (max_rank >= min_rank)
855 struct HasMaxRank<typename IdentificationType::HitType> rank_filter(max_rank);
856 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
858 keepMatchingItems(id_it->getHits(), rank_filter);
870 template<
class IdentificationType>
874 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
876 removeMatchingItems(id_it->getHits(), decoy_filter);
887 template<
class IdentificationType>
891 for (auto& id_it : ids)
893 removeMatchingItems(id_it.getHits(), acc_filter);
904 template<
class IdentificationType>
908 for (auto& id_it : ids)
910 keepMatchingItems(id_it.getHits(), acc_filter);
949 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
double min_rt,
double max_rt);
952 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
double min_mz,
double max_mz);
974 template<
class Filter>
1044 filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score);
1045 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1057 std::vector<PeptideIdentification> all_peptides;
1062 std::vector<PeptideIdentification>& peptides = exp_it->getPeptideIdentifications();
1063 keepNBestHits(peptides, n);
1064 removeEmptyIdentifications(peptides);
1066 all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
1077 template<
class MapType>
1082 for (
auto& feat : map)
1084 keepNBestHits(feat.getPeptideIdentifications(), n);
1086 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1089 template<
class MapType>
1093 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1097 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1099 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1101 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1104 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1106 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1108 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1112 template<
class MapType>
1118 for (
const auto& idrun : prot_ids)
1123 for (
auto& feat : prot_and_pep_ids)
1125 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1128 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1131 template<
class MapType>
1134 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1136 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1141 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
1142 Size nr_best_spectrum)
1145 for (
const auto&
id : prot_ids)
1149 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1156 Size nr_best_spectrum)
1158 for (
auto& pep : pep_ids)
1161 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1171 for (
auto& pep : pep_ids)
1173 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1187 auto pepIt = pep.
getHits().begin();
1188 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1189 for (; pepIt != pepItEnd; ++pepIt)
1203 int lookup_charge = 0;
1204 if (!ignore_charges)
1210 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1211 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1213 PeptideHit*& p = it_inserted_chg.first->second;
1214 if (!it_inserted_chg.second)
1238 std::set<String> accessions;
1239 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
1241 accessions.insert(it->identifier);
1251 if (exp_it->getMSLevel() == 2)
1253 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions);
1254 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1255 updateHitRanks(exp_it->getPeptideIdentifications());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:470
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:92
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:388
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:64
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:650
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Invalid value exception.
Definition: Exception.h:329
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:382
Int max_cleavages_
Definition: IDFilter.h:386
EnzymaticDigestion & digestion_
Definition: IDFilter.h:384
PeptideHit argument_type
Definition: IDFilter.h:389
Int min_cleavages_
Definition: IDFilter.h:385
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:401
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:411
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:390
static Int disabledValue()
Definition: IDFilter.h:394
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:79
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:782
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:525
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1053
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1113
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:553
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1090
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1141
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:511
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1155
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:770
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:544
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:871
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:518
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1181
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:563
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1097
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:680
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:842
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1078
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:534
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:573
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String ®ex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:590
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:614
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:701
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1132
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1236
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:888
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1104
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:905
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1168
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1034
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
Definition: IdentificationData.h:113
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
Iterator begin()
Definition: MSExperiment.h:182
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:103
Iterator end()
Definition: MSExperiment.h:192
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:60
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:60
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:76
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String identifier
Definition: FASTAFile.h:73
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:423
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:432
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:427
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:464
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:437
bool ignore_missed_cleavages_
Definition: IDFilter.h:429
PeptideEvidence argument_type
Definition: IDFilter.h:424
ProteaseDigestion & digestion_
Definition: IDFilter.h:428
bool methionine_cleavage_
Definition: IDFilter.h:430
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:297
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:299
GetMatchingItems()
Definition: IDFilter.h:310
ItemMap items
Definition: IDFilter.h:300
HitType argument_type
Definition: IDFilter.h:298
bool exists(const HitType &hit) const
Definition: IDFilter.h:319
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:329
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:302
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:314
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:324
Is this a decoy hit?
Definition: IDFilter.h:201
bool operator()(const HitType &hit) const
Definition: IDFilter.h:210
HitType argument_type
Definition: IDFilter.h:202
HasDecoyAnnotation()
Definition: IDFilter.h:206
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:101
bool operator()(const HitType &hit) const
Definition: IDFilter.h:111
double score
Definition: IDFilter.h:104
HitType argument_type
Definition: IDFilter.h:102
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:107
bool higher_score_better
Definition: IDFilter.h:105
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:225
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:230
HitType argument_type
Definition: IDFilter.h:226
const std::unordered_set< String > & accessions
Definition: IDFilter.h:228
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:234
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:249
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:244
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:261
HitType argument_type
Definition: IDFilter.h:262
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:270
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:285
const std::set< String > & accessions
Definition: IDFilter.h:264
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:266
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:280
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:127
bool operator()(const HitType &hit) const
Definition: IDFilter.h:140
HitType argument_type
Definition: IDFilter.h:128
Size rank
Definition: IDFilter.h:130
HasMaxRank(Size rank_)
Definition: IDFilter.h:132
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:478
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:481
IdentificationType argument_type
Definition: IDFilter.h:479
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:46