37 #include <OpenMS/config.h>
54 #include <unordered_set>
101 template <
class HitType>
111 higher_score_better(higher_score_better_)
116 if (higher_score_better)
118 return hit.getScore() >= score;
120 return hit.getScore() <= score;
129 template <
class HitType>
141 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
147 Size hit_rank = hit.getRank();
152 return hit_rank <= rank;
161 template <
class HitType>
177 if (found.
isEmpty())
return false;
178 if (value.
isEmpty())
return true;
179 return found == value;
184 template <
class HitType>
200 if (found.
isEmpty())
return false;
201 return double(found) <= value;
206 template <
class HitType>
214 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
222 return target_decoy(hit) || is_decoy(hit);
231 template <
class HitType>
239 accessions(accessions_)
246 if (accessions.count(it) > 0)
return true;
267 template <
class HitType>
275 accessions(accessions_)
282 if (accessions.count(it) > 0)
return true;
303 template <
class HitType,
class Entry>
312 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
313 rec_it != records.end(); ++rec_it)
315 items[getKey(*rec_it)] = &(*rec_it);
328 return items.count(getHitKey(hit)) > 0;
338 if(!exists(evidence)){
339 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
341 return *(items.find(getHitKey(evidence))->second);
357 struct HasMinPeptideLength;
363 struct HasLowMZError;
370 struct HasMatchingModification;
377 struct HasMatchingSequence;
380 struct HasNoEvidence;
398 digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
407 const auto& fun = [&](
const Int missed_cleavages)
410 bool max_filter = max_cleavages_ != disabledValue() ?
411 missed_cleavages > max_cleavages_ :
false;
412 bool min_filter = min_cleavages_ != disabledValue() ?
413 missed_cleavages < min_cleavages_ :
false;
414 return max_filter || min_filter;
423 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)),
447 bool ignore_missed_cleavages,
448 bool methionine_cleavage) :
449 accession_resolver_(entries),
450 digestion_(digestion),
451 ignore_missed_cleavages_(ignore_missed_cleavages),
452 methionine_cleavage_(methionine_cleavage)
463 if (accession_resolver_.
exists(evidence))
467 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), ignore_missed_cleavages_, methionine_cleavage_);
473 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
478 <<
"' not found in fasta file!" << std::endl;
486 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
498 template <
class IdentificationType>
505 return id.getHits().empty();
532 template <
class Container,
class Predicate>
535 items.erase(std::remove_if(items.begin(), items.end(), pred),
540 template <
class Container,
class Predicate>
543 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
548 template <
class Container,
class Predicate>
551 auto part = std::partition(items.begin(), items.end(), std::not1(pred));
552 std::move(part, items.end(), std::back_inserter(target));
553 items.erase(part, items.end());
557 template <
class IDContainer,
class Predicate>
560 for (
auto& item : items)
562 removeMatchingItems(item.getHits(), pred);
567 template <
class IDContainer,
class Predicate>
570 for (
auto& item : items)
572 keepMatchingItems(item.getHits(), pred);
576 template <
class MapType,
class Predicate>
579 for (
auto& feat : prot_and_pep_ids)
581 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
583 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
586 template <
class MapType,
class Predicate>
589 for (
auto& feat : prot_and_pep_ids)
591 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
593 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
596 template <
class MapType,
class Predicate>
599 for (
auto& feat : prot_and_pep_ids)
601 removeMatchingItems(feat.getPeptideIdentifications(), pred);
603 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
613 template <
class IdentificationType>
617 for (
typename std::vector<IdentificationType>::const_iterator id_it =
618 ids.begin(); id_it != ids.end(); ++id_it)
620 counter += id_it->getHits().size();
638 template <
class IdentificationType>
640 const std::vector<IdentificationType>& identifications,
641 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
643 if (identifications.empty())
return false;
645 typename std::vector<IdentificationType>::const_iterator best_id_it =
646 identifications.end();
647 typename std::vector<typename IdentificationType::HitType>::const_iterator
650 for (
typename std::vector<IdentificationType>::const_iterator id_it =
651 identifications.begin(); id_it != identifications.end(); ++id_it)
653 if (id_it->getHits().empty())
continue;
655 if (best_id_it == identifications.end())
658 best_hit_it = id_it->getHits().begin();
660 else if (best_id_it->getScoreType() != id_it->getScoreType())
662 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
665 bool higher_better = best_id_it->isHigherScoreBetter();
666 for (
typename std::vector<typename IdentificationType::HitType>::
667 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
668 id_it->getHits().end(); ++hit_it)
670 if ((higher_better && (hit_it->getScore() >
671 best_hit_it->getScore())) ||
672 (!higher_better && (hit_it->getScore() <
673 best_hit_it->getScore())))
675 best_hit_it = hit_it;
677 if (assume_sorted)
break;
681 if (best_id_it == identifications.end())
686 best_hit = *best_hit_it;
698 const std::vector<PeptideIdentification>& peptides,
699 std::set<String>& sequences,
bool ignore_mods =
false);
713 template<
class Ev
idenceFilter>
715 EvidenceFilter& filter,
716 std::vector<PeptideIdentification>& peptides)
718 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
719 pep_it != peptides.end(); ++pep_it)
721 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
722 hit_it != pep_it->getHits().end(); ++hit_it )
724 std::vector<PeptideEvidence> evidences;
725 remove_copy_if(hit_it->getPeptideEvidences().begin(),
726 hit_it->getPeptideEvidences().end(),
727 back_inserter(evidences),
729 hit_it->setPeptideEvidences(evidences);
741 template <
class IdentificationType>
744 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
745 it != ids.end(); ++it)
757 std::vector<ProteinIdentification>& proteins,
758 const std::vector<PeptideIdentification>& peptides);
762 const std::vector<PeptideIdentification>& peptides);
772 std::vector<PeptideIdentification>& peptides,
773 const std::vector<ProteinIdentification>& proteins,
774 bool remove_peptides_without_reference =
false);
785 bool remove_peptides_without_reference =
false);
797 bool remove_peptides_without_reference =
false);
808 std::vector<ProteinIdentification::ProteinGroup>& groups,
809 const std::vector<ProteinHit>& hits);
818 const std::vector<ProteinIdentification::ProteinGroup>& groups,
819 std::vector<ProteinHit>& hits);
827 template <
class IdentificationType>
830 struct HasNoHits<IdentificationType> empty_filter;
831 removeMatchingItems(ids, empty_filter);
839 template <
class IdentificationType>
841 double threshold_score)
843 for (
typename std::vector<IdentificationType>::iterator id_it =
844 ids.begin(); id_it != ids.end(); ++id_it)
846 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
847 threshold_score, id_it->isHigherScoreBetter());
848 keepMatchingItems(id_it->getHits(), score_filter);
858 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps,
859 double threshold_score, bool higher_better);
866 template <class IdentificationType>
867 static void filterHitsByScore(IdentificationType& id,
868 double threshold_score)
870 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
871 threshold_score, id->isHigherScoreBetter());
872 keepMatchingItems(id->getHits(), score_filter);
880 template <class IdentificationType>
881 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
883 for (
typename std::vector<IdentificationType>::iterator id_it =
884 ids.begin(); id_it != ids.end(); ++id_it)
887 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
905 template <
class IdentificationType>
912 struct HasMaxRank<typename IdentificationType::HitType>
913 rank_filter(min_rank - 1);
914 for (typename std::vector<IdentificationType>::iterator id_it =
915 ids.begin(); id_it != ids.end(); ++id_it)
917 removeMatchingItems(id_it->getHits(), rank_filter);
920 if (max_rank >= min_rank)
922 struct HasMaxRank<typename IdentificationType::HitType>
923 rank_filter(max_rank);
924 for (typename std::vector<IdentificationType>::iterator id_it =
925 ids.begin(); id_it != ids.end(); ++id_it)
927 keepMatchingItems(id_it->getHits(), rank_filter);
939 template <
class IdentificationType>
944 for (typename std::vector<IdentificationType>::iterator id_it =
945 ids.begin(); id_it != ids.end(); ++id_it)
947 removeMatchingItems(id_it->getHits(), decoy_filter);
958 template <
class IdentificationType>
960 const std::set<String> accessions)
963 for (auto& id_it : ids)
965 removeMatchingItems(id_it.getHits(), acc_filter);
976 template <
class IdentificationType>
978 const std::set<String>& accessions)
981 for (auto& id_it : ids)
983 keepMatchingItems(id_it.getHits(), acc_filter);
1000 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
1011 std::vector<PeptideIdentification>& peptides,
Size min_length,
1012 Size max_length = UINT_MAX);
1023 std::vector<PeptideIdentification>& peptides,
Int min_charge,
1028 double min_rt,
double max_rt);
1032 double min_mz,
double max_mz);
1046 std::vector<PeptideIdentification>& peptides,
double mass_error,
1056 template <
class Filter>
1059 std::vector<PeptideIdentification>& peptides);
1073 std::vector<PeptideIdentification>& peptides,
1074 const String& metavalue_key,
double threshold = 0.05);
1078 std::vector<PeptideIdentification>& peptides,
1079 const std::set<String>& modifications);
1082 std::vector<PeptideIdentification>& peptides,
1087 std::vector<PeptideIdentification>& peptides,
1088 const std::set<String>& modifications);
1098 std::vector<PeptideIdentification>& peptides,
1099 const std::vector<PeptideIdentification>& bad_peptides,
1100 bool ignore_mods =
false);
1110 std::vector<PeptideIdentification>& peptides,
1111 const std::vector<PeptideIdentification>& good_peptides,
1112 bool ignore_mods =
false);
1124 peptides,
bool seq_only =
false);
1134 double peptide_threshold_score,
1135 double protein_threshold_score)
1139 protein_threshold_score);
1145 exp_it != experiment.
end(); ++exp_it)
1147 filterHitsByScore(exp_it->getPeptideIdentifications(),
1148 peptide_threshold_score);
1149 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1150 updateProteinReferences(exp_it->getPeptideIdentifications(),
1161 std::vector<PeptideIdentification> all_peptides;
1165 exp_it != experiment.
end(); ++exp_it)
1167 std::vector<PeptideIdentification>& peptides =
1168 exp_it->getPeptideIdentifications();
1169 keepNBestHits(peptides, n);
1170 removeEmptyIdentifications(peptides);
1171 updateProteinReferences(peptides,
1173 all_peptides.insert(all_peptides.end(), peptides.begin(),
1186 template <
class MapType>
1191 for (
auto& feat : map)
1193 keepNBestHits(feat.getPeptideIdentifications(), n);
1195 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1198 template <
class MapType>
1202 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1206 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1208 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1210 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1213 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1215 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1217 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1221 template <
class MapType>
1227 for (
const auto& idrun : prot_ids)
1232 for (
auto& feat : prot_and_pep_ids)
1234 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1237 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1240 template <
class MapType>
1243 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1245 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1250 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1253 for (
const auto&
id : prot_ids)
1257 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1265 for (
auto &pep : pep_ids)
1268 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1278 for (
auto& pep : pep_ids)
1280 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1294 auto pepIt = pep.
getHits().begin();
1295 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1296 for (; pepIt != pepItEnd; ++pepIt)
1310 int lookup_charge = 0;
1311 if (!ignore_charges)
1317 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1318 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1320 PeptideHit* &p = it_inserted_chg.first->second;
1321 if (!it_inserted_chg.second)
1348 const std::vector<FASTAFile::FASTAEntry>& proteins)
1350 std::set<String> accessions;
1351 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1352 proteins.begin(); it != proteins.end(); ++it)
1354 accessions.insert(it->identifier);
1364 exp_it != experiment.
end(); ++exp_it)
1366 if (exp_it->getMSLevel() == 2)
1368 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1370 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1371 updateHitRanks(exp_it->getPeptideIdentifications());
1391 template <
typename PredicateType>
1396 if (cleanup_affected) id_data.
cleanup();
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:90
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:384
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:65
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:650
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Invalid value exception.
Definition: Exception.h:329
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:389
Int max_cleavages_
Definition: IDFilter.h:393
EnzymaticDigestion & digestion_
Definition: IDFilter.h:391
PeptideHit argument_type
Definition: IDFilter.h:396
Int min_cleavages_
Definition: IDFilter.h:392
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:405
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:421
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:397
static Int disabledValue()
Definition: IDFilter.h:401
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:79
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:840
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:549
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1157
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1222
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:91
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:577
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1199
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1250
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:533
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1263
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:90
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:828
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:568
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:940
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:541
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1288
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:587
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1206
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:714
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:906
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1187
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:558
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:597
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String ®ex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:614
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:639
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:742
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1241
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1346
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:959
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1213
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:977
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterObservationMatchesByFunctor(IdentificationData &id_data, PredicateType &&func, bool cleanup_affected=false)
Helper function for filtering observation matches (e.g. PSMs) in IdentificationData.
Definition: IDFilter.h:1392
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1275
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1133
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:89
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:95
ObservationMatches observation_matches_
Definition: IdentificationData.h:656
static void removeFromSetIf_(ContainerType &container, PredicateType predicate)
Remove elements from a set (or ordered multi_index_container) if they fulfill a predicate.
Definition: IdentificationData.h:837
void cleanup(bool require_observation_match=true, bool require_identified_sequence=true, bool require_parent_match=true, bool require_parent_group=false, bool require_match_group=false)
Clean up the data structure after filtering parts of it.
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:73
Iterator begin()
Definition: MSExperiment.h:150
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:104
Iterator end()
Definition: MSExperiment.h:160
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:61
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:60
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String identifier
Definition: FASTAFile.h:73
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:436
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:445
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:440
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:484
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:455
bool ignore_missed_cleavages_
Definition: IDFilter.h:442
PeptideEvidence argument_type
Definition: IDFilter.h:437
ProteaseDigestion & digestion_
Definition: IDFilter.h:441
bool methionine_cleavage_
Definition: IDFilter.h:443
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:305
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:307
GetMatchingItems()
Definition: IDFilter.h:319
ItemMap items
Definition: IDFilter.h:308
HitType argument_type
Definition: IDFilter.h:306
bool exists(const HitType &hit) const
Definition: IDFilter.h:326
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:336
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:310
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:321
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:331
Is this a decoy hit?
Definition: IDFilter.h:208
bool operator()(const HitType &hit) const
Definition: IDFilter.h:217
HitType argument_type
Definition: IDFilter.h:209
HasDecoyAnnotation()
Definition: IDFilter.h:213
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:103
bool operator()(const HitType &hit) const
Definition: IDFilter.h:114
double score
Definition: IDFilter.h:106
HitType argument_type
Definition: IDFilter.h:104
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:109
bool higher_score_better
Definition: IDFilter.h:107
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:233
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:238
HitType argument_type
Definition: IDFilter.h:234
const std::unordered_set< String > & accessions
Definition: IDFilter.h:236
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:242
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:256
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:251
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:269
HitType argument_type
Definition: IDFilter.h:270
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:278
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:292
const std::set< String > & accessions
Definition: IDFilter.h:272
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:274
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:287
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:131
bool operator()(const HitType &hit) const
Definition: IDFilter.h:145
HitType argument_type
Definition: IDFilter.h:132
Size rank
Definition: IDFilter.h:134
HasMaxRank(Size rank_)
Definition: IDFilter.h:136
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:500
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:503
IdentificationType argument_type
Definition: IDFilter.h:501
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:46