22 #include <OpenMS/config.h>
28 #include <unordered_set>
75 template<
class HitType>
82 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
88 if (higher_score_better)
90 return hit.getScore() >= score;
92 return hit.getScore() <= score;
101 template<
class HitType>
111 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
117 Size hit_rank = hit.getRank();
122 return hit_rank <= rank;
131 template<
class HitType>
149 return found == value;
154 template<
class HitType>
170 return double(found) <= value;
181 template<
class HitType>
214 return static_cast<double>(found) >= value;
234 template<
class HitType>
247 target_decoy(
"target_decoy",
"decoy"),
248 is_decoy(
"isDecoy",
"true")
265 return target_decoy(hit) || is_decoy(hit);
274 template<
class HitType>
281 accessions(accessions_)
289 if (accessions.count(it) > 0)
311 template<
class HitType>
325 if (accessions.count(it) > 0)
347 template<
class HitType,
class Entry>
355 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
357 items[getKey(*rec_it)] = &(*rec_it);
372 return items.count(getHitKey(hit)) > 0;
382 if (!exists(evidence))
384 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
386 return *(items.find(getHitKey(evidence))->second);
401 struct HasMinPeptideLength;
407 struct HasLowMZError;
414 struct HasMatchingModification;
421 struct HasMatchingSequence;
424 struct HasNoEvidence;
454 const auto& fun = [&](
const Int missed_cleavages) {
455 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
456 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
457 return max_filter || min_filter;
464 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
484 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
496 if (accession_resolver_.
exists(evidence))
499 ignore_missed_cleavages_, methionine_cleavage_);
505 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
517 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
528 template<
class IdentificationType>
534 return id.getHits().empty();
561 template<
class Container,
class Predicate>
564 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
568 template<
class Container,
class Predicate>
571 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
575 template<
class Container,
class Predicate>
578 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
579 std::move(part, items.end(), std::back_inserter(target));
580 items.erase(part, items.end());
584 template<
class IDContainer,
class Predicate>
587 for (
auto& item : items)
589 removeMatchingItems(item.getHits(), pred);
594 template<
class IDContainer,
class Predicate>
597 for (
auto& item : items)
599 keepMatchingItems(item.getHits(), pred);
603 template<
class MapType,
class Predicate>
606 for (
auto& feat : prot_and_pep_ids)
608 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
610 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
613 template<
class MapType,
class Predicate>
616 for (
auto& feat : prot_and_pep_ids)
618 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
620 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
623 template<
class MapType,
class Predicate>
626 for (
auto& feat : prot_and_pep_ids)
628 removeMatchingItems(feat.getPeptideIdentifications(), pred);
630 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
640 template<
class IdentificationType>
644 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
646 counter += id_it->getHits().size();
664 template<
class IdentificationType>
665 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
667 if (identifications.empty())
670 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
671 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
673 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
675 if (id_it->getHits().empty())
678 if (best_id_it == identifications.end())
681 best_hit_it = id_it->getHits().begin();
683 else if (best_id_it->getScoreType() != id_it->getScoreType())
685 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
688 bool higher_better = best_id_it->isHigherScoreBetter();
689 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
691 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
693 best_hit_it = hit_it;
700 if (best_id_it == identifications.end())
705 best_hit = *best_hit_it;
716 static void extractPeptideSequences(
const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences,
bool ignore_mods =
false);
730 template<
class Ev
idenceFilter>
733 for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
735 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
737 std::vector<PeptideEvidence> evidences;
738 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
739 hit_it->setPeptideEvidences(evidences);
751 template<
class IdentificationType>
754 for (
typename std::vector<IdentificationType>::iterator it = ids.begin(); it != ids.end(); ++it)
776 static void updateProteinReferences(std::vector<PeptideIdentification>& peptides,
const std::vector<ProteinIdentification>& proteins,
bool remove_peptides_without_reference =
false);
804 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
812 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
820 template<
class IdentificationType>
823 struct HasNoHits<IdentificationType> empty_filter;
824 removeMatchingItems(ids, empty_filter);
832 template<
class IdentificationType>
835 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
837 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
838 keepMatchingItems(id_it->getHits(), score_filter);
855 template<class IdentificationType>
856 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
859 bool at_least_one_found =
false;
860 for (IdentificationType&
id : ids)
862 if (switcher.
isScoreType(
id.getScoreType(), score_type))
864 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
865 keepMatchingItems(id.getHits(), score_filter);
871 if (!metaval.empty())
875 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
876 keepMatchingItems(id.getHits(), score_filter);
880 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
881 keepMatchingItems(id.getHits(), score_filter);
883 at_least_one_found = true;
887 if (!at_least_one_found)
OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
896 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
903 template<class IdentificationType>
904 static void filterHitsByScore(IdentificationType& id, double threshold_score)
906 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
907 keepMatchingItems(id.getHits(), score_filter);
915 template<class IdentificationType>
916 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
918 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
921 if (n < id_it->getHits().size())
922 id_it->getHits().resize(n);
940 template<class IdentificationType>
941 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
946 struct HasMaxRank<typename IdentificationType::HitType> rank_filter(min_rank - 1);
947 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
949 removeMatchingItems(id_it->getHits(), rank_filter);
952 if (max_rank >= min_rank)
954 struct HasMaxRank<typename IdentificationType::HitType> rank_filter(max_rank);
955 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
957 keepMatchingItems(id_it->getHits(), rank_filter);
969 template<
class IdentificationType>
973 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
975 removeMatchingItems(id_it->getHits(), decoy_filter);
986 template<
class IdentificationType>
990 for (auto& id_it : ids)
992 removeMatchingItems(id_it.getHits(), acc_filter);
1003 template<
class IdentificationType>
1007 for (auto& id_it : ids)
1009 keepMatchingItems(id_it.getHits(), acc_filter);
1048 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
double min_rt,
double max_rt);
1051 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
double min_mz,
double max_mz);
1073 template<
class Filter>
1143 filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score);
1144 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1156 std::vector<PeptideIdentification> all_peptides;
1161 std::vector<PeptideIdentification>& peptides = exp_it->getPeptideIdentifications();
1162 keepNBestHits(peptides, n);
1163 removeEmptyIdentifications(peptides);
1165 all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
1176 template<
class MapType>
1181 for (
auto& feat : map)
1183 keepNBestHits(feat.getPeptideIdentifications(), n);
1185 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1188 template<
class MapType>
1192 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1196 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1198 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1200 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1203 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1205 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1207 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1211 template<
class MapType>
1217 for (
const auto& idrun : prot_ids)
1222 for (
auto& feat : prot_and_pep_ids)
1224 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1227 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1230 template<
class MapType>
1233 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1235 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1240 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
1241 Size nr_best_spectrum)
1244 for (
const auto&
id : prot_ids)
1248 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1255 Size nr_best_spectrum)
1257 for (
auto& pep : pep_ids)
1260 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1270 for (
auto& pep : pep_ids)
1272 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1286 auto pepIt = pep.
getHits().begin();
1287 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1288 for (; pepIt != pepItEnd; ++pepIt)
1302 int lookup_charge = 0;
1303 if (!ignore_charges)
1309 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1310 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1312 PeptideHit*& p = it_inserted_chg.first->second;
1313 if (!it_inserted_chg.second)
1337 std::set<String> accessions;
1338 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
1340 accessions.insert(it->identifier);
1350 if (exp_it->getMSLevel() == 2)
1352 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions);
1353 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1354 updateHitRanks(exp_it->getPeptideIdentifications());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:629
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:433
Int max_cleavages_
Definition: IDFilter.h:437
EnzymaticDigestion & digestion_
Definition: IDFilter.h:435
PeptideHit argument_type
Definition: IDFilter.h:440
Int min_cleavages_
Definition: IDFilter.h:436
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:452
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:462
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:441
static Int disabledValue()
Definition: IDFilter.h:445
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:54
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:833
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:576
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1152
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1212
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:65
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:604
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1189
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1240
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:562
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1254
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:64
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:821
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:595
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:970
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:569
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1280
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:614
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1196
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:731
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1177
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:585
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:624
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String ®ex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:641
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:665
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:752
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1231
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1335
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:987
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1203
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:1004
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1267
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1133
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:63
Definition: IDScoreSwitcherAlgorithm.h:25
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
Searches for a specified score type within an identification object and its meta values.
Definition: IDScoreSwitcherAlgorithm.h:310
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:97
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:44
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:77
Iterator begin() noexcept
Definition: MSExperiment.h:156
Iterator end()
Definition: MSExperiment.h:171
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:31
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:474
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:483
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:478
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:515
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:488
bool ignore_missed_cleavages_
Definition: IDFilter.h:480
PeptideEvidence argument_type
Definition: IDFilter.h:475
ProteaseDigestion & digestion_
Definition: IDFilter.h:479
bool methionine_cleavage_
Definition: IDFilter.h:481
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:348
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:350
GetMatchingItems()
Definition: IDFilter.h:361
ItemMap items
Definition: IDFilter.h:351
HitType argument_type
Definition: IDFilter.h:349
bool exists(const HitType &hit) const
Definition: IDFilter.h:370
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:380
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:353
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:365
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:375
Is this a decoy hit?
Definition: IDFilter.h:236
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:260
HitType argument_type
Definition: IDFilter.h:237
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:246
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:76
bool operator()(const HitType &hit) const
Definition: IDFilter.h:86
double score
Definition: IDFilter.h:79
HitType argument_type
Definition: IDFilter.h:77
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:82
bool higher_score_better
Definition: IDFilter.h:80
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:275
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:280
HitType argument_type
Definition: IDFilter.h:276
const std::unordered_set< String > & accessions
Definition: IDFilter.h:278
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:285
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:300
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:295
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:312
HitType argument_type
Definition: IDFilter.h:313
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:321
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:336
const std::set< String > & accessions
Definition: IDFilter.h:315
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:317
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:331
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:102
bool operator()(const HitType &hit) const
Definition: IDFilter.h:115
HitType argument_type
Definition: IDFilter.h:103
Size rank
Definition: IDFilter.h:105
HasMaxRank(Size rank_)
Definition: IDFilter.h:107
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:529
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:532
IdentificationType argument_type
Definition: IDFilter.h:530
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20