OpenMS  2.8.0
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
48 
49 #include <algorithm>
50 #include <climits>
51 #include <vector>
52 #include <set>
53 #include <map>
54 #include <unordered_set>
55 
56 namespace OpenMS
57 {
78  class OPENMS_DLLAPI IDFilter
79  {
80 public:
81 
83  IDFilter() = default;
84 
86  virtual ~IDFilter() = default;
87 
89  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
90  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
91  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
92 
99 
101  template <class HitType>
103  {
104  typedef HitType argument_type; // for use as a predicate
105 
106  double score;
108 
109  HasGoodScore(double score_, bool higher_score_better_) :
110  score(score_),
111  higher_score_better(higher_score_better_)
112  {}
113 
114  bool operator()(const HitType& hit) const
115  {
116  if (higher_score_better)
117  {
118  return hit.getScore() >= score;
119  }
120  return hit.getScore() <= score;
121  }
122  };
123 
129  template <class HitType>
130  struct HasMaxRank
131  {
132  typedef HitType argument_type; // for use as a predicate
133 
135 
136  HasMaxRank(Size rank_):
137  rank(rank_)
138  {
139  if (rank_ == 0)
140  {
141  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
142  }
143  }
144 
145  bool operator()(const HitType& hit) const
146  {
147  Size hit_rank = hit.getRank();
148  if (hit_rank == 0)
149  {
150  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
151  }
152  return hit_rank <= rank;
153  }
154  };
155 
161  template <class HitType>
163  {
164  typedef HitType argument_type; // for use as a predicate
165 
168 
169  HasMetaValue(const String& key_, const DataValue& value_):
170  key(key_),
171  value(value_)
172  {}
173 
174  bool operator()(const HitType& hit) const
175  {
176  DataValue found = hit.getMetaValue(key);
177  if (found.isEmpty()) return false; // meta value "key" not set
178  if (value.isEmpty()) return true; // "key" is set, value doesn't matter
179  return found == value;
180  }
181  };
182 
184  template <class HitType>
186  {
187  typedef HitType argument_type; // for use as a predicate
188 
190  double value;
191 
192  HasMaxMetaValue(const String& key_, const double& value_):
193  key(key_),
194  value(value_)
195  {}
196 
197  bool operator()(const HitType& hit) const
198  {
199  DataValue found = hit.getMetaValue(key);
200  if (found.isEmpty()) return false; // meta value "key" not set
201  return double(found) <= value;
202  }
203  };
204 
206  template <class HitType>
208  {
209  typedef HitType argument_type; // for use as a predicate
210 
211  struct HasMetaValue<HitType> target_decoy, is_decoy;
212 
214  target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
215  {}
216 
217  bool operator()(const HitType& hit) const
218  {
219  // @TODO: this could be done slightly more efficiently by returning
220  // false if the "target_decoy" meta value is "target" or "target+decoy",
221  // without checking for an "isDecoy" meta value in that case
222  return target_decoy(hit) || is_decoy(hit);
223  }
224  };
225 
231  template <class HitType>
233  {
234  typedef HitType argument_type; // for use as a predicate
235 
236  const std::unordered_set<String>& accessions;
237 
238  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_):
239  accessions(accessions_)
240  {}
241 
242  bool operator()(const PeptideHit& hit) const
243  {
244  for (const auto& it : hit.extractProteinAccessionsSet())
245  {
246  if (accessions.count(it) > 0) return true;
247  }
248  return false;
249  }
250 
251  bool operator()(const ProteinHit& hit) const
252  {
253  return (accessions.count(hit.getAccession()) > 0);
254  }
255 
256  bool operator()(const PeptideEvidence& evidence) const
257  {
258  return (accessions.count(evidence.getProteinAccession()) > 0);
259  }
260  };
261 
267  template <class HitType>
269  {
270  typedef HitType argument_type; // for use as a predicate
271 
272  const std::set<String>& accessions;
273 
274  HasMatchingAccession(const std::set<String>& accessions_):
275  accessions(accessions_)
276  {}
277 
278  bool operator()(const PeptideHit& hit) const
279  {
280  for (const auto& it : hit.extractProteinAccessionsSet())
281  {
282  if (accessions.count(it) > 0) return true;
283  }
284  return false;
285  }
286 
287  bool operator()(const ProteinHit& hit) const
288  {
289  return (accessions.count(hit.getAccession()) > 0);
290  }
291 
292  bool operator()(const PeptideEvidence& evidence) const
293  {
294  return (accessions.count(evidence.getProteinAccession()) > 0);
295  }
296  };
297 
303  template <class HitType, class Entry>
305  {
306  typedef HitType argument_type; // for use as a predicate
307  typedef std::map<String, Entry*> ItemMap;//Store pointers to avoid copying data
309 
310  GetMatchingItems(std::vector<Entry>& records)
311  {
312  for(typename std::vector<Entry>::iterator rec_it = records.begin();
313  rec_it != records.end(); ++rec_it)
314  {
315  items[getKey(*rec_it)] = &(*rec_it);
316  }
317  }
318 
320 
321  const String& getKey(const FASTAFile::FASTAEntry& entry) const
322  {
323  return entry.identifier;
324  }
325 
326  bool exists(const HitType& hit) const
327  {
328  return items.count(getHitKey(hit)) > 0;
329  }
330 
331  const String& getHitKey(const PeptideEvidence& p) const
332  {
333  return p.getProteinAccession();
334  }
335 
336  const Entry& getValue(const PeptideEvidence& evidence) const
337  {
338  if(!exists(evidence)){
339  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '"+ getHitKey(evidence) + "'. peptide evidence accession not in data");
340  }
341  return *(items.find(getHitKey(evidence))->second);
342  }
343 
344  };
345 
347 
348 
355 
357  struct HasMinPeptideLength;
358 
360  struct HasMinCharge;
361 
363  struct HasLowMZError;
364 
370  struct HasMatchingModification;
371 
377  struct HasMatchingSequence;
378 
380  struct HasNoEvidence;
381 
382 
389  {
390  private:
394 
395  public:
398  digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
399  {}
400 
401  static inline Int disabledValue(){ return -1; }
402 
405  bool operator()(PeptideHit& p) const
406  {
407  const auto& fun = [&](const Int missed_cleavages)
408  {
409 
410  bool max_filter = max_cleavages_ != disabledValue() ?
411  missed_cleavages > max_cleavages_ : false;
412  bool min_filter = min_cleavages_ != disabledValue() ?
413  missed_cleavages < min_cleavages_ : false;
414  return max_filter || min_filter;
415  };
416  return digestion_.filterByMissedCleavages(
418  fun);
419  }
420 
421  void filterPeptideSequences(std::vector<PeptideHit>& hits)
422  {
423  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)),
424  hits.end());
425  }
426 
427  };
428 
429 
436  {
438 
439  // Build an accession index to avoid the linear search cost
444 
445  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries,
446  ProteaseDigestion& digestion,
447  bool ignore_missed_cleavages,
448  bool methionine_cleavage) :
449  accession_resolver_(entries),
450  digestion_(digestion),
451  ignore_missed_cleavages_(ignore_missed_cleavages),
452  methionine_cleavage_(methionine_cleavage)
453  {}
454 
455  bool operator()(const PeptideEvidence& evidence) const
456  {
457  if(!evidence.hasValidLimits())
458  {
459  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
460  return true;
461  }
462 
463  if (accession_resolver_.exists(evidence))
464  {
465  return digestion_.isValidProduct(
466  AASequence::fromString(accession_resolver_.getValue(evidence).sequence),
467  evidence.getStart(), evidence.getEnd() - evidence.getStart(), ignore_missed_cleavages_, methionine_cleavage_);
468  }
469  else
470  {
471  if (evidence.getProteinAccession().empty())
472  {
473  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
474  }
475  else
476  {
477  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession()
478  << "' not found in fasta file!" << std::endl;
479  }
480  return true;
481  }
482  }
483 
484  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
485  {
486  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this,peptides);
487  }
488 
489  };
490 
492 
493 
496 
498  template <class IdentificationType>
499  struct HasNoHits
500  {
501  typedef IdentificationType argument_type; // for use as a predicate
502 
503  bool operator()(const IdentificationType& id) const
504  {
505  return id.getHits().empty();
506  }
507  };
508 
510 
511 
514 
516  struct HasRTInRange;
517 
519  struct HasMZInRange;
520 
522 
523 
530 
532  template <class Container, class Predicate>
533  static void removeMatchingItems(Container& items, const Predicate& pred)
534  {
535  items.erase(std::remove_if(items.begin(), items.end(), pred),
536  items.end());
537  }
538 
540  template <class Container, class Predicate>
541  static void keepMatchingItems(Container& items, const Predicate& pred)
542  {
543  items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
544  items.end());
545  }
546 
548  template <class Container, class Predicate>
549  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
550  {
551  auto part = std::partition(items.begin(), items.end(), std::not1(pred));
552  std::move(part, items.end(), std::back_inserter(target));
553  items.erase(part, items.end());
554  }
555 
557  template <class IDContainer, class Predicate>
558  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
559  {
560  for (auto& item : items)
561  {
562  removeMatchingItems(item.getHits(), pred);
563  }
564  }
565 
567  template <class IDContainer, class Predicate>
568  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
569  {
570  for (auto& item : items)
571  {
572  keepMatchingItems(item.getHits(), pred);
573  }
574  }
575 
576  template <class MapType, class Predicate>
577  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
578  {
579  for (auto& feat : prot_and_pep_ids)
580  {
581  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
582  }
583  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
584  }
585 
586  template <class MapType, class Predicate>
587  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
588  {
589  for (auto& feat : prot_and_pep_ids)
590  {
591  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
592  }
593  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
594  }
595 
596  template <class MapType, class Predicate>
597  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
598  {
599  for (auto& feat : prot_and_pep_ids)
600  {
601  removeMatchingItems(feat.getPeptideIdentifications(), pred);
602  }
603  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
604  }
605 
607 
608 
611 
613  template <class IdentificationType>
614  static Size countHits(const std::vector<IdentificationType>& ids)
615  {
616  Size counter = 0;
617  for (typename std::vector<IdentificationType>::const_iterator id_it =
618  ids.begin(); id_it != ids.end(); ++id_it)
619  {
620  counter += id_it->getHits().size();
621  }
622  return counter;
623  }
624 
638  template <class IdentificationType>
639  static bool getBestHit(
640  const std::vector<IdentificationType>& identifications,
641  bool assume_sorted, typename IdentificationType::HitType& best_hit)
642  {
643  if (identifications.empty()) return false;
644 
645  typename std::vector<IdentificationType>::const_iterator best_id_it =
646  identifications.end();
647  typename std::vector<typename IdentificationType::HitType>::const_iterator
648  best_hit_it;
649 
650  for (typename std::vector<IdentificationType>::const_iterator id_it =
651  identifications.begin(); id_it != identifications.end(); ++id_it)
652  {
653  if (id_it->getHits().empty()) continue;
654 
655  if (best_id_it == identifications.end()) // no previous "best" hit
656  {
657  best_id_it = id_it;
658  best_hit_it = id_it->getHits().begin();
659  }
660  else if (best_id_it->getScoreType() != id_it->getScoreType())
661  {
662  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
663  }
664 
665  bool higher_better = best_id_it->isHigherScoreBetter();
666  for (typename std::vector<typename IdentificationType::HitType>::
667  const_iterator hit_it = id_it->getHits().begin(); hit_it !=
668  id_it->getHits().end(); ++hit_it)
669  {
670  if ((higher_better && (hit_it->getScore() >
671  best_hit_it->getScore())) ||
672  (!higher_better && (hit_it->getScore() <
673  best_hit_it->getScore())))
674  {
675  best_hit_it = hit_it;
676  }
677  if (assume_sorted) break; // only consider the first hit
678  }
679  }
680 
681  if (best_id_it == identifications.end())
682  {
683  return false; // no hits in any IDs
684  }
685 
686  best_hit = *best_hit_it;
687  return true;
688  }
689 
698  const std::vector<PeptideIdentification>& peptides,
699  std::set<String>& sequences, bool ignore_mods = false);
700 
706  static std::map<String,std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
707 
713  template<class EvidenceFilter>
715  EvidenceFilter& filter,
716  std::vector<PeptideIdentification>& peptides)
717  {
718  for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
719  pep_it != peptides.end(); ++pep_it)
720  {
721  for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
722  hit_it != pep_it->getHits().end(); ++hit_it )
723  {
724  std::vector<PeptideEvidence> evidences;
725  remove_copy_if(hit_it->getPeptideEvidences().begin(),
726  hit_it->getPeptideEvidences().end(),
727  back_inserter(evidences),
728  std::not1(filter));
729  hit_it->setPeptideEvidences(evidences);
730  }
731  }
732  }
733 
735 
736 
739 
741  template <class IdentificationType>
742  static void updateHitRanks(std::vector<IdentificationType>& ids)
743  {
744  for (typename std::vector<IdentificationType>::iterator it = ids.begin();
745  it != ids.end(); ++it)
746  {
747  it->assignRanks();
748  }
749  }
750 
753  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
754 
757  std::vector<ProteinIdentification>& proteins,
758  const std::vector<PeptideIdentification>& peptides);
761  ProteinIdentification& proteins,
762  const std::vector<PeptideIdentification>& peptides);
763 
772  std::vector<PeptideIdentification>& peptides,
773  const std::vector<ProteinIdentification>& proteins,
774  bool remove_peptides_without_reference = false);
775 
784  ConsensusMap& cmap,
785  bool remove_peptides_without_reference = false);
786 
795  ConsensusMap& cmap,
796  const ProteinIdentification& ref_run,
797  bool remove_peptides_without_reference = false);
798 
807  static bool updateProteinGroups(
808  std::vector<ProteinIdentification::ProteinGroup>& groups,
809  const std::vector<ProteinHit>& hits);
810 
818  const std::vector<ProteinIdentification::ProteinGroup>& groups,
819  std::vector<ProteinHit>& hits);
821 
822 
825 
827  template <class IdentificationType>
828  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
829  {
830  struct HasNoHits<IdentificationType> empty_filter;
831  removeMatchingItems(ids, empty_filter);
832  }
833 
839  template <class IdentificationType>
840  static void filterHitsByScore(std::vector<IdentificationType>& ids,
841  double threshold_score)
842  {
843  for (typename std::vector<IdentificationType>::iterator id_it =
844  ids.begin(); id_it != ids.end(); ++id_it)
845  {
846  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
847  threshold_score, id_it->isHigherScoreBetter());
848  keepMatchingItems(id_it->getHits(), score_filter);
849  }
850  }
851 
858  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps,
859  double threshold_score, bool higher_better);
860 
866  template <class IdentificationType>
867  static void filterHitsByScore(IdentificationType& id,
868  double threshold_score)
869  {
870  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
871  threshold_score, id->isHigherScoreBetter());
872  keepMatchingItems(id->getHits(), score_filter);
873  }
874 
880  template <class IdentificationType>
881  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
882  {
883  for (typename std::vector<IdentificationType>::iterator id_it =
884  ids.begin(); id_it != ids.end(); ++id_it)
885  {
886  id_it->sort();
887  if (n < id_it->getHits().size()) id_it->getHits().resize(n);
888  }
889  }
890 
905  template <class IdentificationType>
906  static void filterHitsByRank(std::vector<IdentificationType>& ids,
907  Size min_rank, Size max_rank)
908  {
909  updateHitRanks(ids);
910  if (min_rank > 1)
911  {
912  struct HasMaxRank<typename IdentificationType::HitType>
913  rank_filter(min_rank - 1);
914  for (typename std::vector<IdentificationType>::iterator id_it =
915  ids.begin(); id_it != ids.end(); ++id_it)
916  {
917  removeMatchingItems(id_it->getHits(), rank_filter);
918  }
919  }
920  if (max_rank >= min_rank)
921  {
922  struct HasMaxRank<typename IdentificationType::HitType>
923  rank_filter(max_rank);
924  for (typename std::vector<IdentificationType>::iterator id_it =
925  ids.begin(); id_it != ids.end(); ++id_it)
926  {
927  keepMatchingItems(id_it->getHits(), rank_filter);
928  }
929  }
930  }
931 
939  template <class IdentificationType>
940  static void removeDecoyHits(std::vector<IdentificationType>& ids)
941  {
942  struct HasDecoyAnnotation<typename IdentificationType::HitType>
943  decoy_filter;
944  for (typename std::vector<IdentificationType>::iterator id_it =
945  ids.begin(); id_it != ids.end(); ++id_it)
946  {
947  removeMatchingItems(id_it->getHits(), decoy_filter);
948  }
949  }
950 
958  template <class IdentificationType>
959  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids,
960  const std::set<String> accessions)
961  {
962  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
963  for (auto& id_it : ids)
964  {
965  removeMatchingItems(id_it.getHits(), acc_filter);
966  }
967  }
968 
976  template <class IdentificationType>
977  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids,
978  const std::set<String>& accessions)
979  {
980  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
981  for (auto& id_it : ids)
982  {
983  keepMatchingItems(id_it.getHits(), acc_filter);
984  }
985  }
986 
988 
989 
992 
999  static void keepBestPeptideHits(
1000  std::vector<PeptideIdentification>& peptides, bool strict = false);
1001 
1011  std::vector<PeptideIdentification>& peptides, Size min_length,
1012  Size max_length = UINT_MAX);
1013 
1023  std::vector<PeptideIdentification>& peptides, Int min_charge,
1024  Int max_charge);
1025 
1027  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
1028  double min_rt, double max_rt);
1029 
1031  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
1032  double min_mz, double max_mz);
1033 
1046  std::vector<PeptideIdentification>& peptides, double mass_error,
1047  bool unit_ppm);
1048 
1049 
1056  template <class Filter>
1058  Filter& filter,
1059  std::vector<PeptideIdentification>& peptides);
1060 
1073  std::vector<PeptideIdentification>& peptides,
1074  const String& metavalue_key, double threshold = 0.05);
1075 
1078  std::vector<PeptideIdentification>& peptides,
1079  const std::set<String>& modifications);
1080 
1082  std::vector<PeptideIdentification>& peptides,
1083  const String& regex);
1084 
1087  std::vector<PeptideIdentification>& peptides,
1088  const std::set<String>& modifications);
1089 
1098  std::vector<PeptideIdentification>& peptides,
1099  const std::vector<PeptideIdentification>& bad_peptides,
1100  bool ignore_mods = false);
1101 
1110  std::vector<PeptideIdentification>& peptides,
1111  const std::vector<PeptideIdentification>& good_peptides,
1112  bool ignore_mods = false);
1113 
1115  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
1116  peptides);
1117 
1123  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
1124  peptides, bool seq_only = false);
1125 
1127 
1128 
1131 
1133  static void filterHitsByScore(PeakMap& experiment,
1134  double peptide_threshold_score,
1135  double protein_threshold_score)
1136  {
1137  // filter protein hits:
1138  filterHitsByScore(experiment.getProteinIdentifications(),
1139  protein_threshold_score);
1140  // don't remove empty protein IDs - they contain search meta data and may
1141  // be referenced by peptide IDs (via run ID)
1142 
1143  // filter peptide hits:
1144  for (PeakMap::Iterator exp_it = experiment.begin();
1145  exp_it != experiment.end(); ++exp_it)
1146  {
1147  filterHitsByScore(exp_it->getPeptideIdentifications(),
1148  peptide_threshold_score);
1149  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1150  updateProteinReferences(exp_it->getPeptideIdentifications(),
1151  experiment.getProteinIdentifications());
1152  }
1153  // @TODO: remove proteins that aren't referenced by peptides any more?
1154  }
1155 
1157  static void keepNBestHits(PeakMap& experiment, Size n)
1158  {
1159  // don't filter the protein hits by "N best" here - filter the peptides
1160  // and update the protein hits!
1161  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1162 
1163  // filter peptide hits:
1164  for (PeakMap::Iterator exp_it = experiment.begin();
1165  exp_it != experiment.end(); ++exp_it)
1166  {
1167  std::vector<PeptideIdentification>& peptides =
1168  exp_it->getPeptideIdentifications();
1169  keepNBestHits(peptides, n);
1170  removeEmptyIdentifications(peptides);
1171  updateProteinReferences(peptides,
1172  experiment.getProteinIdentifications());
1173  all_peptides.insert(all_peptides.end(), peptides.begin(),
1174  peptides.end());
1175  }
1176  // update protein hits:
1177  removeUnreferencedProteins(experiment.getProteinIdentifications(),
1178  all_peptides);
1179  }
1180 
1183  static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1184 
1186  template <class MapType>
1187  static void keepNBestPeptideHits(MapType& map, Size n)
1188  {
1189  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1190  // which sorts Hits first.
1191  for (auto& feat : map)
1192  {
1193  keepNBestHits(feat.getPeptideIdentifications(), n);
1194  }
1195  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1196  }
1197 
1198  template <class MapType>
1199  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1200  {
1201  const auto pred = HasNoHits<PeptideIdentification>();
1202  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1203  }
1204 
1206  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1207  {
1208  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1209  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1210  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1211  }
1212 
1213  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1214  {
1215  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1216  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1217  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1218  }
1219 
1220  //TODO allow skipping unassigned?
1221  template <class MapType>
1222  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1223  {
1224  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1225 
1226  RunToSequenceToChargeToPepHitP best_peps_per_run;
1227  for (const auto& idrun : prot_ids)
1228  {
1229  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1230  }
1231 
1232  for (auto& feat : prot_and_pep_ids)
1233  {
1234  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1235  }
1236 
1237  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1238  }
1239 
1240  template <class MapType>
1241  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1242  {
1243  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1244  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1245  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1246  }
1247 
1250  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1251  {
1252  RunToSequenceToChargeToPepHitP best_peps_per_run;
1253  for (const auto& id : prot_ids)
1254  {
1255  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1256  }
1257  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1258  }
1259 
1263  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1264  {
1265  for (auto &pep : pep_ids)
1266  {
1267  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1268  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1269  }
1270  }
1271 
1275  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1276  {
1277  SequenceToChargeToPepHitP best_pep;
1278  for (auto& pep : pep_ids)
1279  {
1280  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1281  }
1282  }
1283 
1288  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1289  {
1290  bool higher_score_better = pep.isHigherScoreBetter();
1291  //make sure that first = best hit
1292  pep.sort();
1293 
1294  auto pepIt = pep.getHits().begin();
1295  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1296  for (; pepIt != pepItEnd; ++pepIt)
1297  {
1298  PeptideHit &hit = *pepIt;
1299 
1300  String lookup_seq;
1301  if (ignore_mods)
1302  {
1303  lookup_seq = hit.getSequence().toUnmodifiedString();
1304  }
1305  else
1306  {
1307  lookup_seq = hit.getSequence().toString();
1308  }
1309 
1310  int lookup_charge = 0;
1311  if (!ignore_charges)
1312  {
1313  lookup_charge = hit.getCharge();
1314  }
1315 
1316  // try to insert
1317  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1318  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1319 
1320  PeptideHit* &p = it_inserted_chg.first->second; //now this gets either the old one if already present, or this
1321  if (!it_inserted_chg.second) //was already present -> possibly update
1322  {
1323  if (
1324  (higher_score_better && (hit.getScore() > p->getScore())) ||
1325  (!higher_score_better && (hit.getScore() < p->getScore()))
1326  )
1327  {
1328  p->setMetaValue("best_per_peptide", 0);
1329  hit.setMetaValue("best_per_peptide", 1);
1330  p = &hit;
1331  }
1332  else //note that this was def. not the best
1333  {
1334  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1335  hit.setMetaValue("best_per_peptide", 0);
1336  }
1337  }
1338  else //newly inserted -> first for that sequence (and optionally charge)
1339  {
1340  hit.setMetaValue("best_per_peptide", 1);
1341  }
1342  }
1343  }
1344 
1347  PeakMap& experiment,
1348  const std::vector<FASTAFile::FASTAEntry>& proteins)
1349  {
1350  std::set<String> accessions;
1351  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1352  proteins.begin(); it != proteins.end(); ++it)
1353  {
1354  accessions.insert(it->identifier);
1355  }
1356 
1357  // filter protein hits:
1358  keepHitsMatchingProteins(experiment.getProteinIdentifications(),
1359  accessions);
1360  updateHitRanks(experiment.getProteinIdentifications());
1361 
1362  // filter peptide hits:
1363  for (PeakMap::Iterator exp_it = experiment.begin();
1364  exp_it != experiment.end(); ++exp_it)
1365  {
1366  if (exp_it->getMSLevel() == 2)
1367  {
1368  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1369  accessions);
1370  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1371  updateHitRanks(exp_it->getPeptideIdentifications());
1372  }
1373  }
1374  }
1375 
1377 
1378 
1381 
1391  template <typename PredicateType>
1393  IdentificationData& id_data, PredicateType&& func, bool cleanup_affected = false)
1394  {
1395  id_data.removeFromSetIf_(id_data.observation_matches_, func);
1396  if (cleanup_affected) id_data.cleanup();
1397  }
1398 
1410  IdentificationData& id_data,
1412 
1425  IdentificationData& id_data,
1426  IdentificationData::ScoreTypeRef score_ref, double cutoff);
1427 
1433  static void removeDecoys(IdentificationData& id_data);
1435 
1436  };
1437 
1438 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:90
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:384
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:65
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:650
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Invalid value exception.
Definition: Exception.h:329
Not all required information provided.
Definition: Exception.h:188
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:389
Int max_cleavages_
Definition: IDFilter.h:393
EnzymaticDigestion & digestion_
Definition: IDFilter.h:391
PeptideHit argument_type
Definition: IDFilter.h:396
Int min_cleavages_
Definition: IDFilter.h:392
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:405
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:421
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:397
static Int disabledValue()
Definition: IDFilter.h:401
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:79
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:840
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:549
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1157
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1222
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:91
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:577
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1199
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1250
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:533
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1263
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:90
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:828
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:568
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:940
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:541
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1288
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:587
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1206
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:714
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:906
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1187
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:558
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:597
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String &regex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:614
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:639
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:742
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1241
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1346
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:959
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1213
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:977
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterObservationMatchesByFunctor(IdentificationData &id_data, PredicateType &&func, bool cleanup_affected=false)
Helper function for filtering observation matches (e.g. PSMs) in IdentificationData.
Definition: IDFilter.h:1392
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1275
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1133
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:89
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:95
ObservationMatches observation_matches_
Definition: IdentificationData.h:656
static void removeFromSetIf_(ContainerType &container, PredicateType predicate)
Remove elements from a set (or ordered multi_index_container) if they fulfill a predicate.
Definition: IdentificationData.h:837
void cleanup(bool require_observation_match=true, bool require_identified_sequence=true, bool require_parent_match=true, bool require_parent_group=false, bool require_match_group=false)
Clean up the data structure after filtering parts of it.
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:73
Iterator begin()
Definition: MSExperiment.h:150
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:104
Iterator end()
Definition: MSExperiment.h:160
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:61
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:60
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String identifier
Definition: FASTAFile.h:73
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:436
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:445
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:440
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:484
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:455
bool ignore_missed_cleavages_
Definition: IDFilter.h:442
PeptideEvidence argument_type
Definition: IDFilter.h:437
ProteaseDigestion & digestion_
Definition: IDFilter.h:441
bool methionine_cleavage_
Definition: IDFilter.h:443
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:305
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:307
GetMatchingItems()
Definition: IDFilter.h:319
ItemMap items
Definition: IDFilter.h:308
HitType argument_type
Definition: IDFilter.h:306
bool exists(const HitType &hit) const
Definition: IDFilter.h:326
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:336
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:310
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:321
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:331
Is this a decoy hit?
Definition: IDFilter.h:208
bool operator()(const HitType &hit) const
Definition: IDFilter.h:217
HitType argument_type
Definition: IDFilter.h:209
HasDecoyAnnotation()
Definition: IDFilter.h:213
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:103
bool operator()(const HitType &hit) const
Definition: IDFilter.h:114
double score
Definition: IDFilter.h:106
HitType argument_type
Definition: IDFilter.h:104
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:109
bool higher_score_better
Definition: IDFilter.h:107
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:233
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:238
HitType argument_type
Definition: IDFilter.h:234
const std::unordered_set< String > & accessions
Definition: IDFilter.h:236
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:242
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:256
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:251
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:269
HitType argument_type
Definition: IDFilter.h:270
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:278
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:292
const std::set< String > & accessions
Definition: IDFilter.h:272
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:274
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:287
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:186
bool operator()(const HitType &hit) const
Definition: IDFilter.h:197
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:192
HitType argument_type
Definition: IDFilter.h:187
String key
Definition: IDFilter.h:189
double value
Definition: IDFilter.h:190
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:131
bool operator()(const HitType &hit) const
Definition: IDFilter.h:145
HitType argument_type
Definition: IDFilter.h:132
Size rank
Definition: IDFilter.h:134
HasMaxRank(Size rank_)
Definition: IDFilter.h:136
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:163
bool operator()(const HitType &hit) const
Definition: IDFilter.h:174
DataValue value
Definition: IDFilter.h:167
HitType argument_type
Definition: IDFilter.h:164
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:169
String key
Definition: IDFilter.h:166
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:500
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:503
IdentificationType argument_type
Definition: IDFilter.h:501
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:46