OpenMS  2.4.0
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
46 
47 #include <algorithm>
48 #include <climits>
49 #include <vector>
50 #include <set>
51 #include <map>
52 
53 namespace OpenMS
54 {
75  class OPENMS_DLLAPI IDFilter
76  {
77 public:
78 
80  IDFilter();
81 
83  virtual ~IDFilter();
84 
85 
91 
94  template <class HitType>
95  struct HasGoodScore
96  {
97  typedef HitType argument_type; // for use as a predicate
98 
99  double score;
101 
102  HasGoodScore(double score_, bool higher_score_better_) :
103  score(score_),
104  higher_score_better(higher_score_better_)
105  {}
106 
107  bool operator()(const HitType& hit) const
108  {
109  if (higher_score_better)
110  {
111  return hit.getScore() >= score;
112  }
113  return hit.getScore() <= score;
114  }
115  };
116 
122  template <class HitType>
123  struct HasMaxRank
124  {
125  typedef HitType argument_type; // for use as a predicate
126 
128 
129  HasMaxRank(Size rank_):
130  rank(rank_)
131  {
132  if (rank_ == 0)
133  {
134  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
135  }
136  }
137 
138  bool operator()(const HitType& hit) const
139  {
140  Size hit_rank = hit.getRank();
141  if (hit_rank == 0)
142  {
143  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
144  }
145  return hit_rank <= rank;
146  }
147  };
148 
154  template <class HitType>
156  {
157  typedef HitType argument_type; // for use as a predicate
158 
161 
162  HasMetaValue(const String& key_, const DataValue& value_):
163  key(key_),
164  value(value_)
165  {}
166 
167  bool operator()(const HitType& hit) const
168  {
169  DataValue found = hit.getMetaValue(key);
170  if (found.isEmpty()) return false; // meta value "key" not set
171  if (value.isEmpty()) return true; // "key" is set, value doesn't matter
172  return found == value;
173  }
174  };
175 
177  template <class HitType>
179  {
180  typedef HitType argument_type; // for use as a predicate
181 
183  double value;
184 
185  HasMaxMetaValue(const String& key_, const double& value_):
186  key(key_),
187  value(value_)
188  {}
189 
190  bool operator()(const HitType& hit) const
191  {
192  DataValue found = hit.getMetaValue(key);
193  if (found.isEmpty()) return false; // meta value "key" not set
194  return double(found) <= value;
195  }
196  };
197 
199  template <class HitType>
201  {
202  typedef HitType argument_type; // for use as a predicate
203 
204  struct HasMetaValue<HitType> target_decoy, is_decoy;
205 
207  target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
208  {}
209 
210  bool operator()(const HitType& hit) const
211  {
212  // @TODO: this could be done slightly more efficiently by returning
213  // false if the "target_decoy" meta value is "target" or "target+decoy",
214  // without checking for an "isDecoy" meta value in that case
215  return target_decoy(hit) || is_decoy(hit);
216  }
217  };
218 
224  template <class HitType>
226  {
227  typedef HitType argument_type; // for use as a predicate
228 
229  const std::set<String>& accessions;
230 
231  HasMatchingAccession(const std::set<String>& accessions_):
232  accessions(accessions_)
233  {}
234 
235  bool operator()(const PeptideHit& hit) const
236  {
237  std::set<String> present_accessions = hit.extractProteinAccessionsSet();
238  for (std::set<String>::iterator it = present_accessions.begin();
239  it != present_accessions.end(); ++it)
240  {
241  if (accessions.count(*it) > 0) return true;
242  }
243  return false;
244  }
245 
246  bool operator()(const ProteinHit& hit) const
247  {
248  return (accessions.count(hit.getAccession()) > 0);
249  }
250 
251  bool operator()(const PeptideEvidence& evidence) const
252  {
253  return (accessions.count(evidence.getProteinAccession()) > 0);
254  }
255  };
256 
262  template <class HitType, class Entry>
264  {
265  typedef HitType argument_type; // for use as a predicate
266  typedef std::map<String, Entry*> ItemMap;//Store pointers to avoid copying data
268 
269  GetMatchingItems(std::vector<Entry>& records)
270  {
271  for(typename std::vector<Entry>::iterator rec_it = records.begin();
272  rec_it != records.end(); ++rec_it)
273  {
274  items[getKey(*rec_it)] = &(*rec_it);
275  }
276  }
277 
279 
280  const String& getKey(const FASTAFile::FASTAEntry& entry) const
281  {
282  return entry.identifier;
283  }
284 
285  bool exists(const HitType& hit) const
286  {
287  return items.count(getHitKey(hit)) > 0;
288  }
289 
290  const String& getHitKey(const PeptideEvidence& p) const
291  {
292  return p.getProteinAccession();
293  }
294 
295  const Entry& getValue(const PeptideEvidence& evidence) const
296  {
297  if(!exists(evidence)){
298  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '"+ getHitKey(evidence) + "'. peptide evidence accession not in data");
299  }
300  return *(items.find(getHitKey(evidence))->second);
301  }
302 
303  };
304 
305 
307 
308 
314 
317  struct HasMinPeptideLength;
318 
320  struct HasMinCharge;
321 
323  struct HasLowMZError;
324 
330  struct HasMatchingModification;
331 
337  struct HasMatchingSequence;
338 
340  struct HasNoEvidence;
341 
342 
349  {
350  private:
354 
355  public:
358  digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
359  {}
360 
361  static inline Int disabledValue(){ return -1; }
362 
366  {
367  return digestion_.filterByMissedCleavages(
369  [&](const Int missed_cleavages)
370  {
371 
372  bool max_filter = max_cleavages_ != disabledValue() ?
373  missed_cleavages > max_cleavages_ : false;
374  bool min_filter = min_cleavages_ != disabledValue() ?
375  missed_cleavages < min_cleavages_ : false;
376  return max_filter || min_filter;
377  });
378  }
379 
380  void filterPeptideSequences(std::vector<PeptideHit>& hits)
381  {
382  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
383  }
384 
385  };
386 
387 
394  {
396 
397  // Build an accession index to avoid the linear search cost
402 
403  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries,
404  ProteaseDigestion& digestion,
405  bool ignore_missed_cleavages,
406  bool methionine_cleavage) :
407  accession_resolver_(entries),
408  digestion_(digestion),
409  ignore_missed_cleavages_(ignore_missed_cleavages),
410  methionine_cleavage_(methionine_cleavage)
411  {}
412 
413  bool operator()(const PeptideEvidence& evidence) const
414  {
415  if(!evidence.hasValidLimits())
416  {
417  LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
418  return true;
419  }
420 
421  if (accession_resolver_.exists(evidence))
422  {
423  return digestion_.isValidProduct(
424  AASequence::fromString(accession_resolver_.getValue(evidence).sequence),
425  evidence.getStart(), evidence.getEnd() - evidence.getStart(), ignore_missed_cleavages_, methionine_cleavage_);
426  }
427  else
428  {
429  if (evidence.getProteinAccession().empty())
430  {
431  LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
432  }
433  else
434  {
435  LOG_WARN << "Peptide accession '" << evidence.getProteinAccession()
436  << "' not found in fasta file!" << std::endl;
437  }
438  return true;
439  }
440  }
441 
442  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
443  {
444  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this,peptides);
445  }
446 
447  };
448 
449 
451 
452 
455 
457  template <class IdentificationType>
458  struct HasNoHits
459  {
460  typedef IdentificationType argument_type; // for use as a predicate
461 
462  bool operator()(const IdentificationType& id) const
463  {
464  return id.getHits().empty();
465  }
466  };
467 
469 
470 
473 
475  struct HasRTInRange;
476 
478  struct HasMZInRange;
479 
481 
482 
488 
491  template <class Container, class Predicate>
492  static void removeMatchingItems(Container& items, const Predicate& pred)
493  {
494  items.erase(std::remove_if(items.begin(), items.end(), pred),
495  items.end());
496  }
497 
499  template <class Container, class Predicate>
500  static void keepMatchingItems(Container& items, const Predicate& pred)
501  {
502  items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
503  items.end());
504  }
505 
507 
508 
511 
513  template <class IdentificationType>
514  static Size countHits(const std::vector<IdentificationType>& ids)
515  {
516  Size counter = 0;
517  for (typename std::vector<IdentificationType>::const_iterator id_it =
518  ids.begin(); id_it != ids.end(); ++id_it)
519  {
520  counter += id_it->getHits().size();
521  }
522  return counter;
523  }
524 
537  template <class IdentificationType>
538  static bool getBestHit(
539  const std::vector<IdentificationType>& identifications,
540  bool assume_sorted, typename IdentificationType::HitType& best_hit)
541  {
542  if (identifications.empty()) return false;
543 
544  typename std::vector<IdentificationType>::const_iterator best_id_it =
545  identifications.end();
546  typename std::vector<typename IdentificationType::HitType>::const_iterator
547  best_hit_it;
548 
549  for (typename std::vector<IdentificationType>::const_iterator id_it =
550  identifications.begin(); id_it != identifications.end(); ++id_it)
551  {
552  if (id_it->getHits().empty()) continue;
553 
554  if (best_id_it == identifications.end()) // no previous "best" hit
555  {
556  best_id_it = id_it;
557  best_hit_it = id_it->getHits().begin();
558  }
559  else if (best_id_it->getScoreType() != id_it->getScoreType())
560  {
561  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
562  }
563 
564  bool higher_better = best_id_it->isHigherScoreBetter();
565  for (typename std::vector<typename IdentificationType::HitType>::
566  const_iterator hit_it = id_it->getHits().begin(); hit_it !=
567  id_it->getHits().end(); ++hit_it)
568  {
569  if ((higher_better && (hit_it->getScore() >
570  best_hit_it->getScore())) ||
571  (!higher_better && (hit_it->getScore() <
572  best_hit_it->getScore())))
573  {
574  best_hit_it = hit_it;
575  }
576  if (assume_sorted) break; // only consider the first hit
577  }
578  }
579 
580  if (best_id_it == identifications.end())
581  {
582  return false; // no hits in any IDs
583  }
584 
585  best_hit = *best_hit_it;
586  return true;
587  }
588 
596  static void extractPeptideSequences(
597  const std::vector<PeptideIdentification>& peptides,
598  std::set<String>& sequences, bool ignore_mods = false);
599 
606  template<class EvidenceFilter>
608  EvidenceFilter& filter,
609  std::vector<PeptideIdentification>& peptides)
610  {
611  for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
612  pep_it != peptides.end(); ++pep_it)
613  {
614  for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
615  hit_it != pep_it->getHits().end(); ++hit_it )
616  {
617  std::vector<PeptideEvidence> evidences;
618  remove_copy_if(hit_it->getPeptideEvidences().begin(),
619  hit_it->getPeptideEvidences().end(),
620  back_inserter(evidences),
621  std::not1(filter));
622  hit_it->setPeptideEvidences(evidences);
623  }
624  }
625  }
626 
627 
629 
630 
633 
635  template <class IdentificationType>
636  static void updateHitRanks(std::vector<IdentificationType>& ids)
637  {
638  for (typename std::vector<IdentificationType>::iterator it = ids.begin();
639  it != ids.end(); ++it)
640  {
641  it->assignRanks();
642  }
643  }
644 
646  static void removeUnreferencedProteins(
647  std::vector<ProteinIdentification>& proteins,
648  const std::vector<PeptideIdentification>& peptides);
649 
657  static void updateProteinReferences(
658  std::vector<PeptideIdentification>& peptides,
659  const std::vector<ProteinIdentification>& proteins,
660  bool remove_peptides_without_reference = false);
661 
670  static bool updateProteinGroups(
671  std::vector<ProteinIdentification::ProteinGroup>& groups,
672  const std::vector<ProteinHit>& hits);
673 
675 
676 
679 
681  template <class IdentificationType>
682  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
683  {
684  struct HasNoHits<IdentificationType> empty_filter;
685  removeMatchingItems(ids, empty_filter);
686  }
687 
693  template <class IdentificationType>
694  static void filterHitsByScore(std::vector<IdentificationType>& ids,
695  double threshold_score)
696  {
697  for (typename std::vector<IdentificationType>::iterator id_it =
698  ids.begin(); id_it != ids.end(); ++id_it)
699  {
700  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
701  threshold_score, id_it->isHigherScoreBetter());
702  keepMatchingItems(id_it->getHits(), score_filter);
703  }
704  }
705 
711  template <class IdentificationType>
712  static void filterHitsBySignificance(std::vector<IdentificationType>& ids,
713  double threshold_fraction = 1.0)
714  {
715  for (typename std::vector<IdentificationType>::iterator id_it =
716  ids.begin(); id_it != ids.end(); ++id_it)
717  {
718  double threshold_score = (threshold_fraction *
719  id_it->getSignificanceThreshold());
720  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
721  threshold_score, id_it->isHigherScoreBetter());
722  keepMatchingItems(id_it->getHits(), score_filter);
723  }
724  }
725 
731  template <class IdentificationType>
732  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
733  {
734  for (typename std::vector<IdentificationType>::iterator id_it =
735  ids.begin(); id_it != ids.end(); ++id_it)
736  {
737  id_it->sort();
738  if (n < id_it->getHits().size()) id_it->getHits().resize(n);
739  }
740  }
741 
756  template <class IdentificationType>
757  static void filterHitsByRank(std::vector<IdentificationType>& ids,
758  Size min_rank, Size max_rank)
759  {
760  updateHitRanks(ids);
761  if (min_rank > 1)
762  {
763  struct HasMaxRank<typename IdentificationType::HitType>
764  rank_filter(min_rank - 1);
765  for (typename std::vector<IdentificationType>::iterator id_it =
766  ids.begin(); id_it != ids.end(); ++id_it)
767  {
768  removeMatchingItems(id_it->getHits(), rank_filter);
769  }
770  }
771  if (max_rank >= min_rank)
772  {
773  struct HasMaxRank<typename IdentificationType::HitType>
774  rank_filter(max_rank);
775  for (typename std::vector<IdentificationType>::iterator id_it =
776  ids.begin(); id_it != ids.end(); ++id_it)
777  {
778  keepMatchingItems(id_it->getHits(), rank_filter);
779  }
780  }
781  }
782 
790  template <class IdentificationType>
791  static void removeDecoyHits(std::vector<IdentificationType>& ids)
792  {
793  struct HasDecoyAnnotation<typename IdentificationType::HitType>
794  decoy_filter;
795  for (typename std::vector<IdentificationType>::iterator id_it =
796  ids.begin(); id_it != ids.end(); ++id_it)
797  {
798  removeMatchingItems(id_it->getHits(), decoy_filter);
799  }
800  }
801 
809  template <class IdentificationType>
810  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids,
811  const std::set<String> accessions)
812  {
813  struct HasMatchingAccession<typename IdentificationType::HitType>
814  acc_filter(accessions);
815  for (typename std::vector<IdentificationType>::iterator id_it =
816  ids.begin(); id_it != ids.end(); ++id_it)
817  {
818  removeMatchingItems(id_it->getHits(), acc_filter);
819  }
820  }
821 
829  template <class IdentificationType>
830  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids,
831  const std::set<String> accessions)
832  {
833  struct HasMatchingAccession<typename IdentificationType::HitType>
834  acc_filter(accessions);
835  for (typename std::vector<IdentificationType>::iterator id_it =
836  ids.begin(); id_it != ids.end(); ++id_it)
837  {
838  keepMatchingItems(id_it->getHits(), acc_filter);
839  }
840  }
841 
842 
843 
845 
846 
849 
856  static void keepBestPeptideHits(
857  std::vector<PeptideIdentification>& peptides, bool strict = false);
858 
867  static void filterPeptidesByLength(
868  std::vector<PeptideIdentification>& peptides, Size min_length,
869  Size max_length = UINT_MAX);
870 
879  static void filterPeptidesByCharge(
880  std::vector<PeptideIdentification>& peptides, Int min_charge,
881  Int max_charge);
882 
884  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
885  double min_rt, double max_rt);
886 
888  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
889  double min_mz, double max_mz);
890 
902  static void filterPeptidesByMZError(
903  std::vector<PeptideIdentification>& peptides, double mass_error,
904  bool unit_ppm);
905 
906 
913  template <class Filter>
914  static void filterPeptideEvidences(
915  Filter& filter,
916  std::vector<PeptideIdentification>& peptides);
917 
929  static void filterPeptidesByRTPredictPValue(
930  std::vector<PeptideIdentification>& peptides,
931  const String& metavalue_key, double threshold = 0.05);
932 
934  static void removePeptidesWithMatchingModifications(
935  std::vector<PeptideIdentification>& peptides,
936  const std::set<String>& modifications);
937 
939  static void keepPeptidesWithMatchingModifications(
940  std::vector<PeptideIdentification>& peptides,
941  const std::set<String>& modifications);
942 
950  static void removePeptidesWithMatchingSequences(
951  std::vector<PeptideIdentification>& peptides,
952  const std::vector<PeptideIdentification>& bad_peptides,
953  bool ignore_mods = false);
954 
962  static void keepPeptidesWithMatchingSequences(
963  std::vector<PeptideIdentification>& peptides,
964  const std::vector<PeptideIdentification>& good_peptides,
965  bool ignore_mods = false);
966 
968  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
969  peptides);
970 
976  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
977  peptides, bool seq_only = false);
978 
980 
981 
984 
986  static void filterHitsByScore(PeakMap& experiment,
987  double peptide_threshold_score,
988  double protein_threshold_score)
989  {
990  // filter protein hits:
991  filterHitsByScore(experiment.getProteinIdentifications(),
992  protein_threshold_score);
993  // don't remove empty protein IDs - they contain search meta data and may
994  // be referenced by peptide IDs (via run ID)
995 
996  // filter peptide hits:
997  for (PeakMap::Iterator exp_it = experiment.begin();
998  exp_it != experiment.end(); ++exp_it)
999  {
1000  filterHitsByScore(exp_it->getPeptideIdentifications(),
1001  peptide_threshold_score);
1002  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1003  updateProteinReferences(exp_it->getPeptideIdentifications(),
1004  experiment.getProteinIdentifications());
1005  }
1006  // @TODO: remove proteins that aren't referenced by peptides any more?
1007  }
1008 
1010  static void filterHitsBySignificance(PeakMap& experiment,
1011  double peptide_threshold_fraction,
1012  double protein_threshold_fraction)
1013  {
1014  // filter protein hits:
1015  filterHitsBySignificance(experiment.getProteinIdentifications(),
1016  protein_threshold_fraction);
1017  // don't remove empty protein IDs - they contain search meta data and may
1018  // be referenced by peptide IDs (via run ID)
1019 
1020  // filter peptide hits:
1021  for (PeakMap::Iterator exp_it = experiment.begin();
1022  exp_it != experiment.end(); ++exp_it)
1023  {
1024  filterHitsBySignificance(exp_it->getPeptideIdentifications(),
1025  peptide_threshold_fraction);
1026  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1027  updateProteinReferences(exp_it->getPeptideIdentifications(),
1028  experiment.getProteinIdentifications());
1029  }
1030  // @TODO: remove proteins that aren't referenced by peptides any more?
1031  }
1032 
1034  static void keepNBestHits(PeakMap& experiment, Size n)
1035  {
1036  // don't filter the protein hits by "N best" here - filter the peptides
1037  // and update the protein hits!
1038  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1039 
1040  // filter peptide hits:
1041  for (PeakMap::Iterator exp_it = experiment.begin();
1042  exp_it != experiment.end(); ++exp_it)
1043  {
1044  std::vector<PeptideIdentification>& peptides =
1045  exp_it->getPeptideIdentifications();
1046  keepNBestHits(peptides, n);
1047  removeEmptyIdentifications(peptides);
1048  updateProteinReferences(peptides,
1049  experiment.getProteinIdentifications());
1050  all_peptides.insert(all_peptides.end(), peptides.begin(),
1051  peptides.end());
1052  }
1053  // update protein hits:
1054  removeUnreferencedProteins(experiment.getProteinIdentifications(),
1055  all_peptides);
1056  }
1057 
1060  PeakMap& experiment,
1061  const std::vector<FASTAFile::FASTAEntry>& proteins)
1062  {
1063  std::set<String> accessions;
1064  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1065  proteins.begin(); it != proteins.end(); ++it)
1066  {
1067  accessions.insert(it->identifier);
1068  }
1069 
1070  // filter protein hits:
1071  keepHitsMatchingProteins(experiment.getProteinIdentifications(),
1072  accessions);
1073  updateHitRanks(experiment.getProteinIdentifications());
1074 
1075  // filter peptide hits:
1076  for (PeakMap::Iterator exp_it = experiment.begin();
1077  exp_it != experiment.end(); ++exp_it)
1078  {
1079  if (exp_it->getMSLevel() == 2)
1080  {
1081  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1082  accessions);
1083  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1084  updateHitRanks(exp_it->getPeptideIdentifications());
1085  }
1086  }
1087  }
1088 
1090 
1091 
1092  };
1093 
1094 } // namespace OpenMS
1095 
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:123
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:380
bool ignore_missed_cleavages_
Definition: IDFilter.h:400
ItemMap items
Definition: IDFilter.h:267
const String & getAccession() const
returns the accession of the protein
ProteaseDigestion & digestion_
Definition: IDFilter.h:399
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:266
bool exists(const HitType &hit) const
Definition: IDFilter.h:285
A more convenient string class.
Definition: String.h:57
bool filterByMissedCleavages(const String &sequence, std::function< bool(const Int)> filter) const
Filter based on the number of missed cleavages.
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:810
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:514
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:251
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:246
GetMatchingItems()
Definition: IDFilter.h:278
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:357
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:155
double score
Definition: IDFilter.h:99
Int max_cleavages_
Definition: IDFilter.h:353
Is this a decoy hit?
Definition: IDFilter.h:200
PeptideEvidence argument_type
Definition: IDFilter.h:395
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:458
HasMaxRank(Size rank_)
Definition: IDFilter.h:129
bool operator()(const HitType &hit) const
Definition: IDFilter.h:210
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:102
bool operator()(const HitType &hit) const
Definition: IDFilter.h:107
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
Iterator begin()
Definition: MSExperiment.h:157
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:393
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:235
const std::set< String > & accessions
Definition: IDFilter.h:229
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:830
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:185
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
double value
Definition: IDFilter.h:183
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:162
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:295
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:791
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:694
static Int disabledValue()
Definition: IDFilter.h:361
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:462
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:450
HitType argument_type
Definition: IDFilter.h:180
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:348
Iterator end()
Definition: MSExperiment.h:167
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:442
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1059
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:607
String key
Definition: IDFilter.h:182
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:636
bool operator()(const HitType &hit) const
Definition: IDFilter.h:138
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:290
HitType argument_type
Definition: IDFilter.h:157
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
Size rank
Definition: IDFilter.h:127
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:413
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:492
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:269
Representation of a peptide hit.
Definition: PeptideHit.h:54
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:398
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned...
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1034
IdentificationType argument_type
Definition: IDFilter.h:460
bool operator()(const HitType &hit) const
Definition: IDFilter.h:190
HasDecoyAnnotation()
Definition: IDFilter.h:206
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:95
DataValue value
Definition: IDFilter.h:160
Int min_cleavages_
Definition: IDFilter.h:352
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:225
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:280
HitType argument_type
Definition: IDFilter.h:202
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
HitType argument_type
Definition: IDFilter.h:97
static void filterHitsBySignificance(PeakMap &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
Filters an MS/MS experiment according to fractions of the significance thresholds.
Definition: IDFilter.h:1010
bool methionine_cleavage_
Definition: IDFilter.h:401
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:178
Representation of a protein hit.
Definition: ProteinHit.h:53
Invalid value exception.
Definition: Exception.h:335
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:403
bool operator()(PeptideHit &p)
Definition: IDFilter.h:365
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:682
bool higher_score_better
Definition: IDFilter.h:100
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:363
Builds a map index of data that have a String index to find matches and return the objects...
Definition: IDFilter.h:263
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:231
bool operator()(const HitType &hit) const
Definition: IDFilter.h:167
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:757
String key
Definition: IDFilter.h:159
HitType argument_type
Definition: IDFilter.h:227
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:986
HitType argument_type
Definition: IDFilter.h:265
HitType argument_type
Definition: IDFilter.h:125
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
String toUnmodifiedString() const
returns the peptide as string without any modifications
String identifier
Definition: FASTAFile.h:78
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:538
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:75
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
int Int
Signed integer type.
Definition: Types.h:102
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
EnzymaticDigestion & digestion_
Definition: IDFilter.h:351
PeptideHit argument_type
Definition: IDFilter.h:356
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:500
Not all required information provided.
Definition: Exception.h:195