24#include <OpenMS/config.h>
30#include <unordered_set>
37 std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
41 std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
46 !std::is_same_v<T, std::vector<PeptideIdentification>> &&
47 !std::is_same_v<T, std::vector<ProteinIdentification>> &&
48 !std::is_same_v<T, PeptideIdentificationList>;
92 template<
class HitType>
99 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
105 if (higher_score_better)
107 return hit.getScore() >= score;
109 return hit.getScore() <= score;
118 template<
class HitType>
136 return found == value;
141 template<
class HitType>
157 return double(found) <= value;
168 template<
class HitType>
201 return static_cast<double>(found) >= value;
221 template<
class HitType>
234 target_decoy(
"target_decoy",
"decoy"),
235 is_decoy(
"isDecoy",
"true")
252 return target_decoy(hit) || is_decoy(hit);
264 template<
class HitType,
class SetType>
278 if (accessions.count(acc) > 0)
296 template<
class HitType>
300 template<
class HitType>
308 template<
class HitType,
class Entry>
316 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
318 items[getKey(*rec_it)] = &(*rec_it);
333 return items.count(getHitKey(hit)) > 0;
343 if (!exists(evidence))
345 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
347 return *(items.find(getHitKey(evidence))->second);
362 struct HasMinPeptideLength;
368 struct HasLowMZError;
375 struct HasMatchingModification;
382 struct HasMatchingSequence;
385 struct HasNoEvidence;
415 const auto& fun = [&](
const Int missed_cleavages) {
416 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
417 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
418 return max_filter || min_filter;
425 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
445 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
457 if (accession_resolver_.
exists(evidence))
460 ignore_missed_cleavages_, methionine_cleavage_);
466 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
478 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
489 template<
class IdentificationType>
495 return id.getHits().empty();
522 template<
class Container,
class Predicate>
525 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
529 template<
class Container,
class Predicate>
532 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
536 template<
class Container,
class Predicate>
539 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
540 std::move(part, items.end(), std::back_inserter(target));
541 items.erase(part, items.end());
545 template<
class IDContainer,
class Predicate>
548 for (
auto& item : items)
550 removeMatchingItems(item.getHits(), pred);
555 template<
class IDContainer,
class Predicate>
558 for (
auto& item : items)
560 keepMatchingItems(item.getHits(), pred);
564 template<
class MapType,
class Predicate>
567 for (
auto& feat : prot_and_pep_ids)
569 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
571 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
574 template<
class MapType,
class Predicate>
577 for (
auto& feat : prot_and_pep_ids)
579 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
581 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
584 template<IsFeatureOrConsensusMap MapType,
class Predicate>
587 for (
auto& feat : prot_and_pep_ids)
589 removeMatchingItems(feat.getPeptideIdentifications(), pred);
591 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
595 template<
class Predicate>
598 removeMatchingItems(pep_ids, pred);
608 template<
class IdentificationType>
612 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
614 counter += id_it->getHits().size();
623 for (
const auto&
id : ids)
625 counter +=
id.getHits().size();
633 std::vector<PeptideIdentification>& vec = ids.
getData();
634 filterHitsByRank(vec, min_rank, max_rank);
640 std::vector<PeptideIdentification>& vec = ids.
getData();
641 removeHitsMatchingProteins(vec, accessions);
647 std::vector<PeptideIdentification>& vec = ids.
getData();
648 keepHitsMatchingProteins(vec, accessions);
654 std::vector<PeptideIdentification>& vec = ids.
getData();
655 return getBestHit(vec, assume_sorted, best_hit);
661 std::vector<PeptideIdentification>& vec = ids.
getData();
662 removeEmptyIdentifications(vec);
678 template<
class IdentificationType>
679 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
681 if (identifications.empty())
684 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
685 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
687 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
689 if (id_it->getHits().empty())
692 if (best_id_it == identifications.end())
695 best_hit_it = id_it->getHits().begin();
697 else if (best_id_it->getScoreType() != id_it->getScoreType())
699 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
702 bool higher_better = best_id_it->isHigherScoreBetter();
703 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
705 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
707 best_hit_it = hit_it;
714 if (best_id_it == identifications.end())
719 best_hit = *best_hit_it;
744 template<
class Ev
idenceFilter>
749 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
751 std::vector<PeptideEvidence> evidences;
752 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
753 hit_it->setPeptideEvidences(evidences);
829 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
837 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
845 template<IsPept
ideOrProteinIdentification IdentificationType>
848 struct HasNoHits<IdentificationType> empty_filter;
849 removeMatchingItems(ids, empty_filter);
857 template<
class IdentificationType>
860 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
862 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
863 keepMatchingItems(id_it->getHits(), score_filter);
880 template<class IdentificationType>
881 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
884 bool at_least_one_found =
false;
885 for (IdentificationType&
id : ids)
887 if (switcher.
isScoreType(
id.getScoreType(), score_type))
889 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
890 keepMatchingItems(id.getHits(), score_filter);
895 auto result = switcher.
findScoreType<IdentificationType>(id, score_type);
896 if (!result.score_name.empty())
898 String metaval = result.score_name;
901 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
902 keepMatchingItems(id.getHits(), score_filter);
906 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
907 keepMatchingItems(id.getHits(), score_filter);
909 at_least_one_found = true;
913 if (!at_least_one_found)
OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
922 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
929 template<class IdentificationType>
930 static void filterHitsByScore(IdentificationType& id, double threshold_score)
932 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
933 keepMatchingItems(id.getHits(), score_filter);
941 template<class IdentificationType>
942 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
944 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
947 if (n < id_it->getHits().size())
948 id_it->getHits().resize(n);
960 static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
962 std::vector<PeptideIdentification>& vec = pep_ids.getData();
963 keepNBestHits(vec, n);
980 template<class IdentificationType>
981 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
985 auto& hits = id.getHits();
986 if (hits.empty()) continue;
991 if (max_rank < min_rank) max_rank = hits.size();
994 double last_score = hits.front().getScore();
998 std::remove_if(hits.begin(), hits.end(),
999 [&](const auto& hit) {
1000 if (hit.getScore() != last_score)
1003 last_score = hit.getScore();
1005 return rank < min_rank || rank > max_rank;
1019 template<class IdentificationType>
1020 static void removeDecoyHits(std::vector<IdentificationType>& ids)
1022 struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1023 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1025 removeMatchingItems(id_it->getHits(), decoy_filter);
1036 template<class IdentificationType>
1037 static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1039 HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1040 for (auto& id_it : ids)
1042 removeMatchingItems(id_it.getHits(), acc_filter);
1053 template<IsPeptideOrProteinIdentification IdentificationType>
1054 static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1056 HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1057 keepMatchingItems(id.getHits(), acc_filter);
1067 template<class IdentificationType>
1068 static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1070 for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1085 static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1095 static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1105 static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1108 static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1111 static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1124 static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1133 template<class Filter>
1134 static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1147 static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1150 static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1152 static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1155 static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1164 static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1173 static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1176 static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1184 static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1193 static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1194 double peptide_threshold_score,
1195 double protein_threshold_score)
1198 filterHitsByScore(annotated_data.getProteinIdentifications(),
1199 protein_threshold_score);
1204 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1206 filterHitsByScore(peptide_id, peptide_threshold_score);
1208 removeDanglingProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1212 static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1216 PeptideIdentificationList all_peptides;
1218 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1221 PeptideIdentificationList temp_vec = {peptide_id};
1222 keepNBestHits(temp_vec, n);
1224 if (!temp_vec.empty())
1226 peptide_id = temp_vec[0];
1230 peptide_id.getHits().clear();
1235 temp_vec = {peptide_id};
1236 removeDanglingProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1237 all_peptides.push_back(peptide_id);
1240 removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1245 static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1248 template<class MapType>
1249 static void keepNBestPeptideHits(MapType& map, Size n)
1253 for (auto& feat : map)
1255 keepNBestHits(feat.getPeptideIdentifications(), n);
1257 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1260 template<IsNotIdentificationVector MapType>
1261 static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1263 const auto pred = HasNoHits<PeptideIdentification>();
1264 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1268 static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1270 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1271 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1272 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1275 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1277 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1278 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1279 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1283 template<class MapType>
1284 static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1286 const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1288 RunToSequenceToChargeToPepHitP best_peps_per_run;
1289 for (const auto& idrun : prot_ids)
1291 best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1294 for (auto& feat : prot_and_pep_ids)
1296 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1299 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1302 template<class MapType>
1303 static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1305 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1306 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1307 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1312 static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1313 Size nr_best_spectrum)
1315 RunToSequenceToChargeToPepHitP best_peps_per_run;
1316 for (const auto& id : prot_ids)
1318 best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1320 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1326 static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1327 Size nr_best_spectrum)
1329 for (auto& pep : pep_ids)
1331 SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1332 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1339 static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1341 SequenceToChargeToPepHitP best_pep;
1342 for (auto& pep : pep_ids)
1344 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1352 static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1354 bool higher_score_better = pep.isHigherScoreBetter();
1358 auto pepIt = pep.getHits().begin();
1359 auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1360 for (; pepIt != pepItEnd; ++pepIt)
1362 PeptideHit& hit = *pepIt;
1367 lookup_seq = hit.getSequence().toUnmodifiedString();
1371 lookup_seq = hit.getSequence().toString();
1374 int lookup_charge = 0;
1375 if (!ignore_charges)
1377 lookup_charge = hit.getCharge();
1381 auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1382 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1384 PeptideHit*& p = it_inserted_chg.first->second;
1385 if (!it_inserted_chg.second)
1387 if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1389 p->setMetaValue(
"best_per_peptide", 0);
1390 hit.setMetaValue(
"best_per_peptide", 1);
1396 hit.setMetaValue(
"best_per_peptide", 0);
1401 hit.setMetaValue(
"best_per_peptide", 1);
1409 const std::vector<FASTAFile::FASTAEntry>& proteins)
1411 std::set<String> accessions;
1412 for (
auto it = proteins.begin(); it != proteins.end(); ++it)
1414 accessions.insert(it->identifier);
1422 for (
auto [spectrum, peptide_id] : experiment)
1424 if (spectrum.getMSLevel() == 2)
1426 keepHitsMatchingProteins(peptide_id, accessions);
1473 removeDecoyHits(ids.
getData());
1478 filterHitsByScore(ids.
getData(), threshold_score);
1483 removeUnreferencedProteins(proteins, ids.
getData());
#define OPENMS_LOG_WARN
Macro for warnings.
Definition LogStream.h:550
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
Class for storing MS run data with peptide and protein identifications.
Definition AnnotatedMSRun.h:38
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition AnnotatedMSRun.h:85
A container for consensus elements.
Definition ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition DataValue.h:34
bool isEmpty() const
Test if the value is empty.
Class for the enzymatic digestion of sequences.
Definition EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
Invalid value exception.
Definition Exception.h:306
const VecMember & getData() const
read-only access to the underlying data
Definition ExposedVector.h:328
typename VecMember::iterator iterator
Definition ExposedVector.h:68
iterator begin() noexcept
Definition ExposedVector.h:104
iterator end() noexcept
Definition ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition IDFilter.h:394
Int max_cleavages_
Definition IDFilter.h:398
EnzymaticDigestion & digestion_
Definition IDFilter.h:396
PeptideHit argument_type
Definition IDFilter.h:401
Int min_cleavages_
Definition IDFilter.h:397
bool operator()(PeptideHit &p) const
Definition IDFilter.h:413
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition IDFilter.h:423
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition IDFilter.h:402
static Int disabledValue()
Definition IDFilter.h:406
Collection of functions for filtering peptide and protein identifications.
Definition IDFilter.h:71
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:638
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition IDFilter.h:858
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDanglingProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits using a reference protein run.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition IDFilter.h:537
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition IDFilter.h:82
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:565
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition IDFilter.h:523
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition IDFilter.h:81
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition IDFilter.h:1471
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition IDFilter.h:846
static void removeEmptyIdentifications(PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:659
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition IDFilter.h:556
static void removeDanglingProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits in a ConsensusMap.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:645
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition IDFilter.h:530
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition IDFilter.h:1407
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:575
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:631
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:620
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition IDFilter.h:652
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition IDFilter.h:546
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition IDFilter.h:1476
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition IDFilter.h:585
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition IDFilter.h:745
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition IDFilter.h:609
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition IDFilter.h:1481
static void removeDanglingProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes dangling protein references from peptide hits.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition IDFilter.h:679
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition IDFilter.h:596
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition IDFilter.h:80
This class is used to switch identification scores within identification or consensus feature maps.
Definition IDScoreSwitcherAlgorithm.h:42
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition IDScoreSwitcherAlgorithm.h:139
bool isScoreType(const String &score_name, const ScoreType &type) const
Checks if the given score name corresponds to a specific score type.
Definition IDScoreSwitcherAlgorithm.h:75
ScoreSearchResult findScoreType(const IDType &id, ScoreType score_type) const
Searches for a general score type (e.g. PEP, QVAL) in an identification data structure.
Definition IDScoreSwitcherAlgorithm.h:176
Definition IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Representation of a peptide evidence.
Definition PeptideEvidence.h:28
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition ProteinHit.h:35
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition ProteinIdentification.h:54
A more convenient string class.
Definition String.h:34
Concept to exclude std::vector of identification types (used to disambiguate template overloads)
Definition IDFilter.h:45
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition FASTAFile.h:46
String identifier
Definition FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition IDFilter.h:435
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition IDFilter.h:444
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition IDFilter.h:439
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition IDFilter.h:476
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:449
bool ignore_missed_cleavages_
Definition IDFilter.h:441
PeptideEvidence argument_type
Definition IDFilter.h:436
ProteaseDigestion & digestion_
Definition IDFilter.h:440
bool methionine_cleavage_
Definition IDFilter.h:442
Builds a map index of data that have a String index to find matches and return the objects.
Definition IDFilter.h:309
std::map< String, Entry * > ItemMap
Definition IDFilter.h:311
GetMatchingItems()
Definition IDFilter.h:322
const String & getHitKey(const PeptideEvidence &p) const
Definition IDFilter.h:336
ItemMap items
Definition IDFilter.h:312
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition IDFilter.h:326
HitType argument_type
Definition IDFilter.h:310
bool exists(const HitType &hit) const
Definition IDFilter.h:331
GetMatchingItems(std::vector< Entry > &records)
Definition IDFilter.h:314
const Entry & getValue(const PeptideEvidence &evidence) const
Definition IDFilter.h:341
Is this a decoy hit?
Definition IDFilter.h:223
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition IDFilter.h:247
HitType argument_type
Definition IDFilter.h:224
HasDecoyAnnotation()
Default constructor.
Definition IDFilter.h:233
Is the score of this hit at least as good as the given value?
Definition IDFilter.h:93
bool operator()(const HitType &hit) const
Definition IDFilter.h:103
double score
Definition IDFilter.h:96
HitType argument_type
Definition IDFilter.h:94
HasGoodScore(double score_, bool higher_score_better_)
Definition IDFilter.h:99
bool higher_score_better
Definition IDFilter.h:97
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition IDFilter.h:265
HasMatchingAccessionImpl(const SetType &accessions_)
Definition IDFilter.h:270
const SetType & accessions
Definition IDFilter.h:268
HitType argument_type
Definition IDFilter.h:266
bool operator()(const PeptideHit &hit) const
Definition IDFilter.h:274
bool operator()(const PeptideEvidence &evidence) const
Definition IDFilter.h:289
bool operator()(const ProteinHit &hit) const
Definition IDFilter.h:284
Is the list of hits of this peptide/protein ID empty?
Definition IDFilter.h:490
bool operator()(const IdentificationType &id) const
Definition IDFilter.h:493
IdentificationType argument_type
Definition IDFilter.h:491
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition MetaData.h:20