37 #include <OpenMS/config.h>
53 #include <unordered_set>
100 template <
class HitType>
110 higher_score_better(higher_score_better_)
115 if (higher_score_better)
117 return hit.getScore() >= score;
119 return hit.getScore() <= score;
128 template <
class HitType>
140 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
146 Size hit_rank = hit.getRank();
151 return hit_rank <= rank;
160 template <
class HitType>
176 if (found.
isEmpty())
return false;
177 if (value.
isEmpty())
return true;
178 return found == value;
183 template <
class HitType>
199 if (found.
isEmpty())
return false;
200 return double(found) <= value;
205 template <
class HitType>
213 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
221 return target_decoy(hit) || is_decoy(hit);
230 template <
class HitType>
238 accessions(accessions_)
245 if (accessions.count(it) > 0)
return true;
266 template <
class HitType>
274 accessions(accessions_)
281 if (accessions.count(it) > 0)
return true;
302 template <
class HitType,
class Entry>
311 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
312 rec_it != records.end(); ++rec_it)
314 items[getKey(*rec_it)] = &(*rec_it);
327 return items.count(getHitKey(hit)) > 0;
337 if(!exists(evidence)){
338 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
340 return *(items.find(getHitKey(evidence))->second);
356 struct HasMinPeptideLength;
362 struct HasLowMZError;
369 struct HasMatchingModification;
376 struct HasMatchingSequence;
379 struct HasNoEvidence;
397 digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
406 const auto& fun = [&](
const Int missed_cleavages)
409 bool max_filter = max_cleavages_ != disabledValue() ?
410 missed_cleavages > max_cleavages_ :
false;
411 bool min_filter = min_cleavages_ != disabledValue() ?
412 missed_cleavages < min_cleavages_ :
false;
413 return max_filter || min_filter;
422 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)),
446 bool ignore_missed_cleavages,
447 bool methionine_cleavage) :
448 accession_resolver_(entries),
449 digestion_(digestion),
450 ignore_missed_cleavages_(ignore_missed_cleavages),
451 methionine_cleavage_(methionine_cleavage)
462 if (accession_resolver_.
exists(evidence))
466 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), ignore_missed_cleavages_, methionine_cleavage_);
472 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
477 <<
"' not found in fasta file!" << std::endl;
485 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
497 template <
class IdentificationType>
504 return id.getHits().empty();
531 template <
class Container,
class Predicate>
534 items.erase(std::remove_if(items.begin(), items.end(), pred),
539 template <
class Container,
class Predicate>
542 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
547 template <
class Container,
class Predicate>
550 auto part = std::partition(items.begin(), items.end(), std::not1(pred));
551 std::move(part, items.end(), std::back_inserter(target));
552 items.erase(part, items.end());
556 template <
class IDContainer,
class Predicate>
559 for (
auto& item : items)
561 removeMatchingItems(item.getHits(), pred);
566 template <
class IDContainer,
class Predicate>
569 for (
auto& item : items)
571 keepMatchingItems(item.getHits(), pred);
575 template <
class MapType,
class Predicate>
578 for (
auto& feat : prot_and_pep_ids)
580 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
582 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
585 template <
class MapType,
class Predicate>
588 for (
auto& feat : prot_and_pep_ids)
590 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
592 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
595 template <
class MapType,
class Predicate>
598 for (
auto& feat : prot_and_pep_ids)
600 removeMatchingItems(feat.getPeptideIdentifications(), pred);
602 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
612 template <
class IdentificationType>
616 for (
typename std::vector<IdentificationType>::const_iterator id_it =
617 ids.begin(); id_it != ids.end(); ++id_it)
619 counter += id_it->getHits().size();
637 template <
class IdentificationType>
639 const std::vector<IdentificationType>& identifications,
640 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
642 if (identifications.empty())
return false;
644 typename std::vector<IdentificationType>::const_iterator best_id_it =
645 identifications.end();
646 typename std::vector<typename IdentificationType::HitType>::const_iterator
649 for (
typename std::vector<IdentificationType>::const_iterator id_it =
650 identifications.begin(); id_it != identifications.end(); ++id_it)
652 if (id_it->getHits().empty())
continue;
654 if (best_id_it == identifications.end())
657 best_hit_it = id_it->getHits().begin();
659 else if (best_id_it->getScoreType() != id_it->getScoreType())
661 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
664 bool higher_better = best_id_it->isHigherScoreBetter();
665 for (
typename std::vector<typename IdentificationType::HitType>::
666 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
667 id_it->getHits().end(); ++hit_it)
669 if ((higher_better && (hit_it->getScore() >
670 best_hit_it->getScore())) ||
671 (!higher_better && (hit_it->getScore() <
672 best_hit_it->getScore())))
674 best_hit_it = hit_it;
676 if (assume_sorted)
break;
680 if (best_id_it == identifications.end())
685 best_hit = *best_hit_it;
697 const std::vector<PeptideIdentification>& peptides,
698 std::set<String>& sequences,
bool ignore_mods =
false);
712 template<
class Ev
idenceFilter>
714 EvidenceFilter& filter,
715 std::vector<PeptideIdentification>& peptides)
717 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
718 pep_it != peptides.end(); ++pep_it)
720 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
721 hit_it != pep_it->getHits().end(); ++hit_it )
723 std::vector<PeptideEvidence> evidences;
724 remove_copy_if(hit_it->getPeptideEvidences().begin(),
725 hit_it->getPeptideEvidences().end(),
726 back_inserter(evidences),
728 hit_it->setPeptideEvidences(evidences);
740 template <
class IdentificationType>
743 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
744 it != ids.end(); ++it)
756 std::vector<ProteinIdentification>& proteins,
757 const std::vector<PeptideIdentification>& peptides);
767 std::vector<PeptideIdentification>& peptides,
768 const std::vector<ProteinIdentification>& proteins,
769 bool remove_peptides_without_reference =
false);
780 bool remove_peptides_without_reference =
false);
791 std::vector<ProteinIdentification::ProteinGroup>& groups,
792 const std::vector<ProteinHit>& hits);
801 const std::vector<ProteinIdentification::ProteinGroup>& groups,
802 std::vector<ProteinHit>& hits);
810 template <
class IdentificationType>
813 struct HasNoHits<IdentificationType> empty_filter;
814 removeMatchingItems(ids, empty_filter);
822 template <
class IdentificationType>
824 double threshold_score)
826 for (
typename std::vector<IdentificationType>::iterator id_it =
827 ids.begin(); id_it != ids.end(); ++id_it)
829 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
830 threshold_score, id_it->isHigherScoreBetter());
831 keepMatchingItems(id_it->getHits(), score_filter);
841 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps,
842 double threshold_score, bool higher_better);
849 template <class IdentificationType>
850 static void filterHitsByScore(IdentificationType& id,
851 double threshold_score)
853 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
854 threshold_score, id->isHigherScoreBetter());
855 keepMatchingItems(id->getHits(), score_filter);
863 template <class IdentificationType>
864 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
866 for (
typename std::vector<IdentificationType>::iterator id_it =
867 ids.begin(); id_it != ids.end(); ++id_it)
870 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
888 template <
class IdentificationType>
895 struct HasMaxRank<typename IdentificationType::HitType>
896 rank_filter(min_rank - 1);
897 for (typename std::vector<IdentificationType>::iterator id_it =
898 ids.begin(); id_it != ids.end(); ++id_it)
900 removeMatchingItems(id_it->getHits(), rank_filter);
903 if (max_rank >= min_rank)
905 struct HasMaxRank<typename IdentificationType::HitType>
906 rank_filter(max_rank);
907 for (typename std::vector<IdentificationType>::iterator id_it =
908 ids.begin(); id_it != ids.end(); ++id_it)
910 keepMatchingItems(id_it->getHits(), rank_filter);
922 template <
class IdentificationType>
927 for (typename std::vector<IdentificationType>::iterator id_it =
928 ids.begin(); id_it != ids.end(); ++id_it)
930 removeMatchingItems(id_it->getHits(), decoy_filter);
941 template <
class IdentificationType>
943 const std::set<String> accessions)
946 for (auto& id_it : ids)
948 removeMatchingItems(id_it.getHits(), acc_filter);
959 template <
class IdentificationType>
961 const std::set<String>& accessions)
964 for (auto& id_it : ids)
966 keepMatchingItems(id_it.getHits(), acc_filter);
983 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
994 std::vector<PeptideIdentification>& peptides,
Size min_length,
995 Size max_length = UINT_MAX);
1006 std::vector<PeptideIdentification>& peptides,
Int min_charge,
1011 double min_rt,
double max_rt);
1015 double min_mz,
double max_mz);
1029 std::vector<PeptideIdentification>& peptides,
double mass_error,
1039 template <
class Filter>
1042 std::vector<PeptideIdentification>& peptides);
1056 std::vector<PeptideIdentification>& peptides,
1057 const String& metavalue_key,
double threshold = 0.05);
1061 std::vector<PeptideIdentification>& peptides,
1062 const std::set<String>& modifications);
1065 std::vector<PeptideIdentification>& peptides,
1070 std::vector<PeptideIdentification>& peptides,
1071 const std::set<String>& modifications);
1081 std::vector<PeptideIdentification>& peptides,
1082 const std::vector<PeptideIdentification>& bad_peptides,
1083 bool ignore_mods =
false);
1093 std::vector<PeptideIdentification>& peptides,
1094 const std::vector<PeptideIdentification>& good_peptides,
1095 bool ignore_mods =
false);
1107 peptides,
bool seq_only =
false);
1117 double peptide_threshold_score,
1118 double protein_threshold_score)
1122 protein_threshold_score);
1128 exp_it != experiment.
end(); ++exp_it)
1130 filterHitsByScore(exp_it->getPeptideIdentifications(),
1131 peptide_threshold_score);
1132 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1133 updateProteinReferences(exp_it->getPeptideIdentifications(),
1144 std::vector<PeptideIdentification> all_peptides;
1148 exp_it != experiment.
end(); ++exp_it)
1150 std::vector<PeptideIdentification>& peptides =
1151 exp_it->getPeptideIdentifications();
1152 keepNBestHits(peptides, n);
1153 removeEmptyIdentifications(peptides);
1154 updateProteinReferences(peptides,
1156 all_peptides.insert(all_peptides.end(), peptides.begin(),
1169 template <
class MapType>
1174 for (
auto& feat : map)
1176 keepNBestHits(feat.getPeptideIdentifications(), n);
1178 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1181 template <
class MapType>
1185 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1189 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1191 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1193 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1196 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1198 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1200 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1204 template <
class MapType>
1210 for (
const auto& idrun : prot_ids)
1215 for (
auto& feat : prot_and_pep_ids)
1217 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1220 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1223 template <
class MapType>
1226 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1228 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1233 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1236 for (
const auto&
id : prot_ids)
1240 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1248 for (
auto &pep : pep_ids)
1251 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1261 for (
auto& pep : pep_ids)
1263 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1277 auto pepIt = pep.
getHits().begin();
1278 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1279 for (; pepIt != pepItEnd; ++pepIt)
1293 int lookup_charge = 0;
1294 if (!ignore_charges)
1300 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1301 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1303 PeptideHit* &p = it_inserted_chg.first->second;
1304 if (!it_inserted_chg.second)
1331 const std::vector<FASTAFile::FASTAEntry>& proteins)
1333 std::set<String> accessions;
1334 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1335 proteins.begin(); it != proteins.end(); ++it)
1337 accessions.insert(it->identifier);
1347 exp_it != experiment.
end(); ++exp_it)
1349 if (exp_it->getMSLevel() == 2)
1351 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1353 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1354 updateHitRanks(exp_it->getPeptideIdentifications());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:88
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:379
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:63
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:656
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Invalid value exception.
Definition: Exception.h:329
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:388
Int max_cleavages_
Definition: IDFilter.h:392
EnzymaticDigestion & digestion_
Definition: IDFilter.h:390
PeptideHit argument_type
Definition: IDFilter.h:395
Int min_cleavages_
Definition: IDFilter.h:391
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:404
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:420
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:396
static Int disabledValue()
Definition: IDFilter.h:400
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:78
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:823
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:548
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1140
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1205
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:576
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1182
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1233
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:532
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1246
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:811
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:567
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:923
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:540
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1271
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:586
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1189
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:713
static void keepBestMatchPerQuery(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:889
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1170
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:557
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:596
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String ®ex)
static void filterQueryMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:613
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:638
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:741
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1224
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1329
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:942
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1196
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:960
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1258
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1116
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:90
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:80
Iterator begin()
Definition: MSExperiment.h:157
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
Iterator end()
Definition: MSExperiment.h:167
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:61
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:60
const String & getAccession() const
returns the accession of the protein
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String identifier
Definition: FASTAFile.h:73
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:435
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:444
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:439
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:483
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:454
bool ignore_missed_cleavages_
Definition: IDFilter.h:441
PeptideEvidence argument_type
Definition: IDFilter.h:436
ProteaseDigestion & digestion_
Definition: IDFilter.h:440
bool methionine_cleavage_
Definition: IDFilter.h:442
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:304
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:306
GetMatchingItems()
Definition: IDFilter.h:318
ItemMap items
Definition: IDFilter.h:307
HitType argument_type
Definition: IDFilter.h:305
bool exists(const HitType &hit) const
Definition: IDFilter.h:325
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:335
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:309
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:320
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:330
Is this a decoy hit?
Definition: IDFilter.h:207
bool operator()(const HitType &hit) const
Definition: IDFilter.h:216
HitType argument_type
Definition: IDFilter.h:208
HasDecoyAnnotation()
Definition: IDFilter.h:212
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:102
bool operator()(const HitType &hit) const
Definition: IDFilter.h:113
double score
Definition: IDFilter.h:105
HitType argument_type
Definition: IDFilter.h:103
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:108
bool higher_score_better
Definition: IDFilter.h:106
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:232
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:237
HitType argument_type
Definition: IDFilter.h:233
const std::unordered_set< String > & accessions
Definition: IDFilter.h:235
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:241
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:255
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:250
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:268
HitType argument_type
Definition: IDFilter.h:269
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:277
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:291
const std::set< String > & accessions
Definition: IDFilter.h:271
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:273
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:286
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:130
bool operator()(const HitType &hit) const
Definition: IDFilter.h:144
HitType argument_type
Definition: IDFilter.h:131
Size rank
Definition: IDFilter.h:133
HasMaxRank(Size rank_)
Definition: IDFilter.h:135
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:499
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:502
IdentificationType argument_type
Definition: IDFilter.h:500
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:44