Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FILTERING_ID_IDFILTER_H
36 #define OPENMS_FILTERING_ID_IDFILTER_H
37 
38 #include <OpenMS/config.h>
46 
47 #include <algorithm>
48 #include <climits>
49 #include <vector>
50 #include <set>
51 #include <map>
52 
53 namespace OpenMS
54 {
75  class OPENMS_DLLAPI IDFilter
76  {
77 public:
78 
80  IDFilter();
81 
83  virtual ~IDFilter();
84 
85 
91 
94  template <class HitType>
95  struct HasGoodScore
96  {
97  typedef HitType argument_type; // for use as a predicate
98 
99  double score;
101 
102  HasGoodScore(double score, bool higher_score_better):
103  score(score), higher_score_better(higher_score_better)
104  {}
105 
106  bool operator()(const HitType& hit) const
107  {
108  if (higher_score_better)
109  {
110  return hit.getScore() >= score;
111  }
112  return hit.getScore() <= score;
113  }
114  };
115 
121  template <class HitType>
122  struct HasMaxRank
123  {
124  typedef HitType argument_type; // for use as a predicate
125 
127 
129  rank(rank)
130  {
131  if (rank == 0)
132  {
133  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
134  }
135  }
136 
137  bool operator()(const HitType& hit) const
138  {
139  Size hit_rank = hit.getRank();
140  if (hit_rank == 0)
141  {
142  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
143  }
144  return hit_rank <= rank;
145  }
146  };
147 
153  template <class HitType>
155  {
156  typedef HitType argument_type; // for use as a predicate
157 
160 
161  HasMetaValue(const String& key, const DataValue& value):
162  key(key), value(value)
163  {}
164 
165  bool operator()(const HitType& hit) const
166  {
167  DataValue found = hit.getMetaValue(key);
168  if (found.isEmpty()) return false; // meta value "key" not set
169  if (value.isEmpty()) return true; // "key" is set, value doesn't matter
170  return found == value;
171  }
172  };
173 
175  template <class HitType>
177  {
178  typedef HitType argument_type; // for use as a predicate
179 
181  double value;
182 
183  HasMaxMetaValue(const String& key, const double& value):
184  key(key), value(value)
185  {}
186 
187  bool operator()(const HitType& hit) const
188  {
189  DataValue found = hit.getMetaValue(key);
190  if (found.isEmpty()) return false; // meta value "key" not set
191  return double(found) <= value;
192  }
193  };
194 
196  template <class HitType>
198  {
199  typedef HitType argument_type; // for use as a predicate
200 
201  struct HasMetaValue<HitType> target_decoy, is_decoy;
202 
204  target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
205  {}
206 
207  bool operator()(const HitType& hit) const
208  {
209  // @TODO: this could be done slightly more efficiently by returning
210  // false if the "target_decoy" meta value is "target" or "target+decoy",
211  // without checking for an "isDecoy" meta value in that case
212  return target_decoy(hit) || is_decoy(hit);
213  }
214  };
215 
221  template <class HitType>
223  {
224  typedef HitType argument_type; // for use as a predicate
225 
226  const std::set<String>& accessions;
227 
228  HasMatchingAccession(const std::set<String>& accessions):
229  accessions(accessions)
230  {}
231 
232  bool operator()(const PeptideHit& hit) const
233  {
234  std::set<String> present_accessions = hit.extractProteinAccessionsSet();
235  for (std::set<String>::iterator it = present_accessions.begin();
236  it != present_accessions.end(); ++it)
237  {
238  if (accessions.count(*it) > 0) return true;
239  }
240  return false;
241  }
242 
243  bool operator()(const ProteinHit& hit) const
244  {
245  return (accessions.count(hit.getAccession()) > 0);
246  }
247 
248  bool operator()(const PeptideEvidence& evidence) const
249  {
250  return (accessions.count(evidence.getProteinAccession()) > 0);
251  }
252  };
253 
259  template <class HitType, class Entry>
261  {
262  typedef HitType argument_type; // for use as a predicate
263  typedef std::map<String, Entry*> ItemMap;//Store pointers to avoid copying data
264  ItemMap items;
265 
266  GetMatchingItems(std::vector<Entry>& records)
267  {
268  for(typename std::vector<Entry>::iterator rec_it = records.begin();
269  rec_it != records.end(); ++rec_it)
270  {
271  items[getKey(*rec_it)] = &(*rec_it);
272  }
273  }
274 
276 
277  const String& getKey(const FASTAFile::FASTAEntry& entry) const
278  {
279  return entry.identifier;
280  }
281 
282  bool exists(const HitType& hit) const
283  {
284  return items.count(getHitKey(hit)) > 0;
285  }
286 
287  const String& getHitKey(const PeptideEvidence& p) const
288  {
289  return p.getProteinAccession();
290  }
291 
292  const Entry& getValue(const PeptideEvidence& evidence) const
293  {
294  if(!exists(evidence)){
295  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '"+ getHitKey(evidence) + "'. peptide evidence accession not in data");
296  }
297  return *(items.find(getHitKey(evidence))->second);
298  }
299 
300  };
301 
302 
304 
305 
311 
314  struct HasMinPeptideLength;
315 
317  struct HasMinCharge;
318 
320  struct HasLowMZError;
321 
327  struct HasMatchingModification;
328 
334  struct HasMatchingSequence;
335 
337  struct HasNoEvidence;
338 
345  {
347 
348  // Build an accession index to avoid the linear search cost
353 
354  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries,
355  EnzymaticDigestion& digestion,
356  bool ignore_missed_cleavages,
357  bool methionine_cleavage) :
358  accession_resolver_(entries),
359  digestion_(digestion),
360  ignore_missed_cleavages_(ignore_missed_cleavages),
361  methionine_cleavage_(methionine_cleavage)
362  {}
363 
364  bool operator()(const PeptideEvidence& evidence) const
365  {
366  if(!evidence.hasValidLimits())
367  {
368  LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
369  return true;
370  }
371 
372  if(accession_resolver_.exists(evidence))
373  {
374  return digestion_.isValidProduct(
375  AASequence::fromString(accession_resolver_.getValue(evidence).sequence),
376  evidence.getStart(), evidence.getEnd() - evidence.getStart(), methionine_cleavage_, ignore_missed_cleavages_);
377  }
378  else
379  {
380  if(evidence.getProteinAccession().empty())
381  {
382  LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
383  }
384  else
385  {
386  LOG_WARN << "Peptide accession '" <<
387  evidence.getProteinAccession() <<
388  "' not found in fasta file!" << std::endl;
389  }
390  return true;
391  }
392  }
393 
394  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
395  {
396  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this,peptides);
397  }
398 
399  };
400 
401 
403 
404 
407 
409  template <class IdentificationType>
410  struct HasNoHits
411  {
412  typedef IdentificationType argument_type; // for use as a predicate
413 
414  bool operator()(const IdentificationType& id) const
415  {
416  return id.getHits().empty();
417  }
418  };
419 
421 
422 
425 
427  struct HasRTInRange;
428 
430  struct HasMZInRange;
431 
433 
434 
440 
443  template <class Container, class Predicate>
444  static void removeMatchingItems(Container& items, const Predicate& pred)
445  {
446  items.erase(std::remove_if(items.begin(), items.end(), pred),
447  items.end());
448  }
449 
451  template <class Container, class Predicate>
452  static void keepMatchingItems(Container& items, const Predicate& pred)
453  {
454  items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
455  items.end());
456  }
457 
459 
460 
463 
465  template <class IdentificationType>
466  static Size countHits(const std::vector<IdentificationType>& ids)
467  {
468  Size counter = 0;
469  for (typename std::vector<IdentificationType>::const_iterator id_it =
470  ids.begin(); id_it != ids.end(); ++id_it)
471  {
472  counter += id_it->getHits().size();
473  }
474  return counter;
475  }
476 
489  template <class IdentificationType>
490  static bool getBestHit(
491  const std::vector<IdentificationType>& identifications,
492  bool assume_sorted, typename IdentificationType::HitType& best_hit)
493  {
494  if (identifications.empty()) return false;
495 
496  typename std::vector<IdentificationType>::const_iterator best_id_it =
497  identifications.end();
498  typename std::vector<typename IdentificationType::HitType>::const_iterator
499  best_hit_it;
500 
501  for (typename std::vector<IdentificationType>::const_iterator id_it =
502  identifications.begin(); id_it != identifications.end(); ++id_it)
503  {
504  if (id_it->getHits().empty()) continue;
505 
506  if (best_id_it == identifications.end()) // no previous "best" hit
507  {
508  best_id_it = id_it;
509  best_hit_it = id_it->getHits().begin();
510  }
511  else if (best_id_it->getScoreType() != id_it->getScoreType())
512  {
513  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
514  }
515 
516  bool higher_better = best_id_it->isHigherScoreBetter();
517  for (typename std::vector<typename IdentificationType::HitType>::
518  const_iterator hit_it = id_it->getHits().begin(); hit_it !=
519  id_it->getHits().end(); ++hit_it)
520  {
521  if ((higher_better && (hit_it->getScore() >
522  best_hit_it->getScore())) ||
523  (!higher_better && (hit_it->getScore() <
524  best_hit_it->getScore())))
525  {
526  best_hit_it = hit_it;
527  }
528  if (assume_sorted) break; // only consider the first hit
529  }
530  }
531 
532  if (best_id_it == identifications.end())
533  {
534  return false; // no hits in any IDs
535  }
536 
537  best_hit = *best_hit_it;
538  return true;
539  }
540 
548  static void extractPeptideSequences(
549  const std::vector<PeptideIdentification>& peptides,
550  std::set<String>& sequences, bool ignore_mods = false);
551 
558  template<class EvidenceFilter>
560  EvidenceFilter& filter,
561  std::vector<PeptideIdentification>& peptides)
562  {
563  for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
564  pep_it != peptides.end(); ++pep_it)
565  {
566  for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
567  hit_it != pep_it->getHits().end(); ++hit_it )
568  {
569  std::vector<PeptideEvidence> evidences;
570  remove_copy_if(hit_it->getPeptideEvidences().begin(),
571  hit_it->getPeptideEvidences().end(),
572  back_inserter(evidences),
573  std::not1(filter));
574  hit_it->setPeptideEvidences(evidences);
575  }
576  }
577  }
578 
580 
581 
584 
586  template <class IdentificationType>
587  static void updateHitRanks(std::vector<IdentificationType>& ids)
588  {
589  for (typename std::vector<IdentificationType>::iterator it = ids.begin();
590  it != ids.end(); ++it)
591  {
592  it->assignRanks();
593  }
594  }
595 
597  static void removeUnreferencedProteins(
598  std::vector<ProteinIdentification>& proteins,
599  const std::vector<PeptideIdentification>& peptides);
600 
608  static void updateProteinReferences(
609  std::vector<PeptideIdentification>& peptides,
610  const std::vector<ProteinIdentification>& proteins,
611  bool remove_peptides_without_reference = false);
612 
621  static bool updateProteinGroups(
622  std::vector<ProteinIdentification::ProteinGroup>& groups,
623  const std::vector<ProteinHit>& hits);
624 
626 
627 
630 
632  template <class IdentificationType>
633  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
634  {
635  struct HasNoHits<IdentificationType> empty_filter;
636  removeMatchingItems(ids, empty_filter);
637  }
638 
644  template <class IdentificationType>
645  static void filterHitsByScore(std::vector<IdentificationType>& ids,
646  double threshold_score)
647  {
648  for (typename std::vector<IdentificationType>::iterator id_it =
649  ids.begin(); id_it != ids.end(); ++id_it)
650  {
651  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
652  threshold_score, id_it->isHigherScoreBetter());
653  keepMatchingItems(id_it->getHits(), score_filter);
654  }
655  }
656 
662  template <class IdentificationType>
663  static void filterHitsBySignificance(std::vector<IdentificationType>& ids,
664  double threshold_fraction = 1.0)
665  {
666  for (typename std::vector<IdentificationType>::iterator id_it =
667  ids.begin(); id_it != ids.end(); ++id_it)
668  {
669  double threshold_score = (threshold_fraction *
670  id_it->getSignificanceThreshold());
671  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
672  threshold_score, id_it->isHigherScoreBetter());
673  keepMatchingItems(id_it->getHits(), score_filter);
674  }
675  }
676 
682  template <class IdentificationType>
683  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
684  {
685  for (typename std::vector<IdentificationType>::iterator id_it =
686  ids.begin(); id_it != ids.end(); ++id_it)
687  {
688  id_it->sort();
689  if (n < id_it->getHits().size()) id_it->getHits().resize(n);
690  }
691  }
692 
707  template <class IdentificationType>
708  static void filterHitsByRank(std::vector<IdentificationType>& ids,
709  Size min_rank, Size max_rank)
710  {
711  updateHitRanks(ids);
712  if (min_rank > 1)
713  {
714  struct HasMaxRank<typename IdentificationType::HitType>
715  rank_filter(min_rank - 1);
716  for (typename std::vector<IdentificationType>::iterator id_it =
717  ids.begin(); id_it != ids.end(); ++id_it)
718  {
719  removeMatchingItems(id_it->getHits(), rank_filter);
720  }
721  }
722  if (max_rank >= min_rank)
723  {
724  struct HasMaxRank<typename IdentificationType::HitType>
725  rank_filter(max_rank);
726  for (typename std::vector<IdentificationType>::iterator id_it =
727  ids.begin(); id_it != ids.end(); ++id_it)
728  {
729  keepMatchingItems(id_it->getHits(), rank_filter);
730  }
731  }
732  }
733 
741  template <class IdentificationType>
742  static void removeDecoyHits(std::vector<IdentificationType>& ids)
743  {
744  struct HasDecoyAnnotation<typename IdentificationType::HitType>
745  decoy_filter;
746  for (typename std::vector<IdentificationType>::iterator id_it =
747  ids.begin(); id_it != ids.end(); ++id_it)
748  {
749  removeMatchingItems(id_it->getHits(), decoy_filter);
750  }
751  }
752 
760  template <class IdentificationType>
761  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids,
762  const std::set<String> accessions)
763  {
764  struct HasMatchingAccession<typename IdentificationType::HitType>
765  acc_filter(accessions);
766  for (typename std::vector<IdentificationType>::iterator id_it =
767  ids.begin(); id_it != ids.end(); ++id_it)
768  {
769  removeMatchingItems(id_it->getHits(), acc_filter);
770  }
771  }
772 
780  template <class IdentificationType>
781  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids,
782  const std::set<String> accessions)
783  {
784  struct HasMatchingAccession<typename IdentificationType::HitType>
785  acc_filter(accessions);
786  for (typename std::vector<IdentificationType>::iterator id_it =
787  ids.begin(); id_it != ids.end(); ++id_it)
788  {
789  keepMatchingItems(id_it->getHits(), acc_filter);
790  }
791  }
792 
793 
794 
796 
797 
800 
807  static void keepBestPeptideHits(
808  std::vector<PeptideIdentification>& peptides, bool strict = false);
809 
818  static void filterPeptidesByLength(
819  std::vector<PeptideIdentification>& peptides, Size min_length,
820  Size max_length = UINT_MAX);
821 
830  static void filterPeptidesByCharge(
831  std::vector<PeptideIdentification>& peptides, Int min_charge,
832  Int max_charge);
833 
835  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
836  double min_rt, double max_rt);
837 
839  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
840  double min_mz, double max_mz);
841 
853  static void filterPeptidesByMZError(
854  std::vector<PeptideIdentification>& peptides, double mass_error,
855  bool unit_ppm);
856 
857 
864  template <class Filter>
865  static void filterPeptideEvidences(
866  Filter& filter,
867  std::vector<PeptideIdentification>& peptides);
868 
880  static void filterPeptidesByRTPredictPValue(
881  std::vector<PeptideIdentification>& peptides,
882  const String& metavalue_key, double threshold = 0.05);
883 
885  static void removePeptidesWithMatchingModifications(
886  std::vector<PeptideIdentification>& peptides,
887  const std::set<String>& modifications);
888 
890  static void keepPeptidesWithMatchingModifications(
891  std::vector<PeptideIdentification>& peptides,
892  const std::set<String>& modifications);
893 
901  static void removePeptidesWithMatchingSequences(
902  std::vector<PeptideIdentification>& peptides,
903  const std::vector<PeptideIdentification>& bad_peptides,
904  bool ignore_mods = false);
905 
913  static void keepPeptidesWithMatchingSequences(
914  std::vector<PeptideIdentification>& peptides,
915  const std::vector<PeptideIdentification>& good_peptides,
916  bool ignore_mods = false);
917 
919  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
920  peptides);
921 
927  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
928  peptides, bool seq_only = false);
929 
931 
932 
935 
937  static void filterHitsByScore(PeakMap& experiment,
938  double peptide_threshold_score,
939  double protein_threshold_score)
940  {
941  // filter protein hits:
942  filterHitsByScore(experiment.getProteinIdentifications(),
943  protein_threshold_score);
944  // don't remove empty protein IDs - they contain search meta data and may
945  // be referenced by peptide IDs (via run ID)
946 
947  // filter peptide hits:
948  for (PeakMap::Iterator exp_it = experiment.begin();
949  exp_it != experiment.end(); ++exp_it)
950  {
951  filterHitsByScore(exp_it->getPeptideIdentifications(),
952  peptide_threshold_score);
953  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
954  updateProteinReferences(exp_it->getPeptideIdentifications(),
955  experiment.getProteinIdentifications());
956  }
957  // @TODO: remove proteins that aren't referenced by peptides any more?
958  }
959 
961  static void filterHitsBySignificance(PeakMap& experiment,
962  double peptide_threshold_fraction,
963  double protein_threshold_fraction)
964  {
965  // filter protein hits:
966  filterHitsBySignificance(experiment.getProteinIdentifications(),
967  protein_threshold_fraction);
968  // don't remove empty protein IDs - they contain search meta data and may
969  // be referenced by peptide IDs (via run ID)
970 
971  // filter peptide hits:
972  for (PeakMap::Iterator exp_it = experiment.begin();
973  exp_it != experiment.end(); ++exp_it)
974  {
975  filterHitsBySignificance(exp_it->getPeptideIdentifications(),
976  peptide_threshold_fraction);
977  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
978  updateProteinReferences(exp_it->getPeptideIdentifications(),
979  experiment.getProteinIdentifications());
980  }
981  // @TODO: remove proteins that aren't referenced by peptides any more?
982  }
983 
985  static void keepNBestHits(PeakMap& experiment, Size n)
986  {
987  // don't filter the protein hits by "N best" here - filter the peptides
988  // and update the protein hits!
989  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
990 
991  // filter peptide hits:
992  for (PeakMap::Iterator exp_it = experiment.begin();
993  exp_it != experiment.end(); ++exp_it)
994  {
995  std::vector<PeptideIdentification>& peptides =
996  exp_it->getPeptideIdentifications();
997  keepNBestHits(peptides, n);
998  removeEmptyIdentifications(peptides);
999  updateProteinReferences(peptides,
1000  experiment.getProteinIdentifications());
1001  all_peptides.insert(all_peptides.end(), peptides.begin(),
1002  peptides.end());
1003  }
1004  // update protein hits:
1005  removeUnreferencedProteins(experiment.getProteinIdentifications(),
1006  all_peptides);
1007  }
1008 
1011  PeakMap& experiment,
1012  const std::vector<FASTAFile::FASTAEntry>& proteins)
1013  {
1014  std::set<String> accessions;
1015  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1016  proteins.begin(); it != proteins.end(); ++it)
1017  {
1018  accessions.insert(it->identifier);
1019  }
1020 
1021  // filter protein hits:
1022  keepHitsMatchingProteins(experiment.getProteinIdentifications(),
1023  accessions);
1024  updateHitRanks(experiment.getProteinIdentifications());
1025 
1026  // filter peptide hits:
1027  for (PeakMap::Iterator exp_it = experiment.begin();
1028  exp_it != experiment.end(); ++exp_it)
1029  {
1030  if (exp_it->getMSLevel() == 2)
1031  {
1032  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1033  accessions);
1034  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1035  updateHitRanks(exp_it->getPeptideIdentifications());
1036  }
1037  }
1038  }
1039 
1041 
1042 
1043  };
1044 
1045 } // namespace OpenMS
1046 
1047 #endif // OPENMS_FILTERING_ID_IDFILTER_H
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:122
bool ignore_missed_cleavages_
Definition: IDFilter.h:351
ItemMap items
Definition: IDFilter.h:264
const String & getAccession() const
returns the accession of the protein
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:263
bool exists(const HitType &hit) const
Definition: IDFilter.h:282
A more convenient string class.
Definition: String.h:57
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:761
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:466
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:248
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, EnzymaticDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:354
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:116
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:243
GetMatchingItems()
Definition: IDFilter.h:275
HasMatchingAccession(const std::set< String > &accessions)
Definition: IDFilter.h:228
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:154
double score
Definition: IDFilter.h:99
Is this a decoy hit?
Definition: IDFilter.h:197
PeptideEvidence argument_type
Definition: IDFilter.h:346
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:410
bool operator()(const HitType &hit) const
Definition: IDFilter.h:207
bool operator()(const HitType &hit) const
Definition: IDFilter.h:106
Iterator begin()
Definition: MSExperiment.h:162
HasMaxMetaValue(const String &key, const double &value)
Definition: IDFilter.h:183
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:344
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:232
const std::set< String > & accessions
Definition: IDFilter.h:226
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:781
Class for the enzymatic digestion of proteins.
Definition: EnzymaticDigestion.h:61
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
double value
Definition: IDFilter.h:181
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:292
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:742
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:645
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:414
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:57
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
HitType argument_type
Definition: IDFilter.h:178
Iterator end()
Definition: MSExperiment.h:172
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:394
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1010
EnzymaticDigestion & digestion_
Definition: IDFilter.h:350
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:559
String key
Definition: IDFilter.h:180
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:587
HasMaxRank(Size rank)
Definition: IDFilter.h:128
bool operator()(const HitType &hit) const
Definition: IDFilter.h:137
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:287
HitType argument_type
Definition: IDFilter.h:156
A method or algorithm argument contains illegal values.
Definition: Exception.h:649
Size rank
Definition: IDFilter.h:126
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:364
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:444
bool isValidProduct(const AASequence &protein, Size pep_pos, Size pep_length, bool methionine_cleavage=false, bool ignore_missed_cleavages=true) const
Returns true if peptide at position pep_pos with length pep_length within protein protein was generat...
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:266
Representation of a peptide hit.
Definition: PeptideHit.h:55
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:349
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned...
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:985
HasGoodScore(double score, bool higher_score_better)
Definition: IDFilter.h:102
IdentificationType argument_type
Definition: IDFilter.h:412
bool operator()(const HitType &hit) const
Definition: IDFilter.h:187
HasDecoyAnnotation()
Definition: IDFilter.h:203
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:95
DataValue value
Definition: IDFilter.h:159
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:222
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:277
HitType argument_type
Definition: IDFilter.h:199
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
HitType argument_type
Definition: IDFilter.h:97
static void filterHitsBySignificance(PeakMap &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
Filters an MS/MS experiment according to fractions of the significance thresholds.
Definition: IDFilter.h:961
bool methionine_cleavage_
Definition: IDFilter.h:352
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:176
Representation of a protein hit.
Definition: ProteinHit.h:54
Invalid value exception.
Definition: Exception.h:336
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:633
bool higher_score_better
Definition: IDFilter.h:100
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:364
Builds a map index of data that have a String index to find matches and return the objects...
Definition: IDFilter.h:260
bool operator()(const HitType &hit) const
Definition: IDFilter.h:165
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:708
String key
Definition: IDFilter.h:158
HitType argument_type
Definition: IDFilter.h:224
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:937
HitType argument_type
Definition: IDFilter.h:262
HitType argument_type
Definition: IDFilter.h:124
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:74
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
String identifier
Definition: FASTAFile.h:76
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:490
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:75
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
HasMetaValue(const String &key, const DataValue &value)
Definition: IDFilter.h:161
int Int
Signed integer type.
Definition: Types.h:103
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:452
Not all required information provided.
Definition: Exception.h:196

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:01 using doxygen 1.8.13