OpenMS  2.6.0
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
47 
48 #include <algorithm>
49 #include <climits>
50 #include <vector>
51 #include <set>
52 #include <map>
53 #include <unordered_set>
54 
55 namespace OpenMS
56 {
77  class OPENMS_DLLAPI IDFilter
78  {
79 public:
80 
82  IDFilter();
83 
85  virtual ~IDFilter();
86 
88  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
89  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
90  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
91 
97 
100  template <class HitType>
102  {
103  typedef HitType argument_type; // for use as a predicate
104 
105  double score;
107 
108  HasGoodScore(double score_, bool higher_score_better_) :
109  score(score_),
110  higher_score_better(higher_score_better_)
111  {}
112 
113  bool operator()(const HitType& hit) const
114  {
115  if (higher_score_better)
116  {
117  return hit.getScore() >= score;
118  }
119  return hit.getScore() <= score;
120  }
121  };
122 
128  template <class HitType>
129  struct HasMaxRank
130  {
131  typedef HitType argument_type; // for use as a predicate
132 
134 
135  HasMaxRank(Size rank_):
136  rank(rank_)
137  {
138  if (rank_ == 0)
139  {
140  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
141  }
142  }
143 
144  bool operator()(const HitType& hit) const
145  {
146  Size hit_rank = hit.getRank();
147  if (hit_rank == 0)
148  {
149  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
150  }
151  return hit_rank <= rank;
152  }
153  };
154 
160  template <class HitType>
162  {
163  typedef HitType argument_type; // for use as a predicate
164 
167 
168  HasMetaValue(const String& key_, const DataValue& value_):
169  key(key_),
170  value(value_)
171  {}
172 
173  bool operator()(const HitType& hit) const
174  {
175  DataValue found = hit.getMetaValue(key);
176  if (found.isEmpty()) return false; // meta value "key" not set
177  if (value.isEmpty()) return true; // "key" is set, value doesn't matter
178  return found == value;
179  }
180  };
181 
183  template <class HitType>
185  {
186  typedef HitType argument_type; // for use as a predicate
187 
189  double value;
190 
191  HasMaxMetaValue(const String& key_, const double& value_):
192  key(key_),
193  value(value_)
194  {}
195 
196  bool operator()(const HitType& hit) const
197  {
198  DataValue found = hit.getMetaValue(key);
199  if (found.isEmpty()) return false; // meta value "key" not set
200  return double(found) <= value;
201  }
202  };
203 
205  template <class HitType>
207  {
208  typedef HitType argument_type; // for use as a predicate
209 
210  struct HasMetaValue<HitType> target_decoy, is_decoy;
211 
213  target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
214  {}
215 
216  bool operator()(const HitType& hit) const
217  {
218  // @TODO: this could be done slightly more efficiently by returning
219  // false if the "target_decoy" meta value is "target" or "target+decoy",
220  // without checking for an "isDecoy" meta value in that case
221  return target_decoy(hit) || is_decoy(hit);
222  }
223  };
224 
230  template <class HitType>
232  {
233  typedef HitType argument_type; // for use as a predicate
234 
235  const std::unordered_set<String>& accessions;
236 
237  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_):
238  accessions(accessions_)
239  {}
240 
241  bool operator()(const PeptideHit& hit) const
242  {
243  for (const auto& it : hit.extractProteinAccessionsSet())
244  {
245  if (accessions.count(it) > 0) return true;
246  }
247  return false;
248  }
249 
250  bool operator()(const ProteinHit& hit) const
251  {
252  return (accessions.count(hit.getAccession()) > 0);
253  }
254 
255  bool operator()(const PeptideEvidence& evidence) const
256  {
257  return (accessions.count(evidence.getProteinAccession()) > 0);
258  }
259  };
260 
266  template <class HitType>
268  {
269  typedef HitType argument_type; // for use as a predicate
270 
271  const std::set<String>& accessions;
272 
273  HasMatchingAccession(const std::set<String>& accessions_):
274  accessions(accessions_)
275  {}
276 
277  bool operator()(const PeptideHit& hit) const
278  {
279  for (const auto& it : hit.extractProteinAccessionsSet())
280  {
281  if (accessions.count(it) > 0) return true;
282  }
283  return false;
284  }
285 
286  bool operator()(const ProteinHit& hit) const
287  {
288  return (accessions.count(hit.getAccession()) > 0);
289  }
290 
291  bool operator()(const PeptideEvidence& evidence) const
292  {
293  return (accessions.count(evidence.getProteinAccession()) > 0);
294  }
295  };
296 
302  template <class HitType, class Entry>
304  {
305  typedef HitType argument_type; // for use as a predicate
306  typedef std::map<String, Entry*> ItemMap;//Store pointers to avoid copying data
308 
309  GetMatchingItems(std::vector<Entry>& records)
310  {
311  for(typename std::vector<Entry>::iterator rec_it = records.begin();
312  rec_it != records.end(); ++rec_it)
313  {
314  items[getKey(*rec_it)] = &(*rec_it);
315  }
316  }
317 
319 
320  const String& getKey(const FASTAFile::FASTAEntry& entry) const
321  {
322  return entry.identifier;
323  }
324 
325  bool exists(const HitType& hit) const
326  {
327  return items.count(getHitKey(hit)) > 0;
328  }
329 
330  const String& getHitKey(const PeptideEvidence& p) const
331  {
332  return p.getProteinAccession();
333  }
334 
335  const Entry& getValue(const PeptideEvidence& evidence) const
336  {
337  if(!exists(evidence)){
338  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '"+ getHitKey(evidence) + "'. peptide evidence accession not in data");
339  }
340  return *(items.find(getHitKey(evidence))->second);
341  }
342 
343  };
344 
346 
347 
353 
356  struct HasMinPeptideLength;
357 
359  struct HasMinCharge;
360 
362  struct HasLowMZError;
363 
369  struct HasMatchingModification;
370 
376  struct HasMatchingSequence;
377 
379  struct HasNoEvidence;
380 
381 
388  {
389  private:
393 
394  public:
397  digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
398  {}
399 
400  static inline Int disabledValue(){ return -1; }
401 
404  bool operator()(PeptideHit& p) const
405  {
406  const auto& fun = [&](const Int missed_cleavages)
407  {
408 
409  bool max_filter = max_cleavages_ != disabledValue() ?
410  missed_cleavages > max_cleavages_ : false;
411  bool min_filter = min_cleavages_ != disabledValue() ?
412  missed_cleavages < min_cleavages_ : false;
413  return max_filter || min_filter;
414  };
415  return digestion_.filterByMissedCleavages(
417  fun);
418  }
419 
420  void filterPeptideSequences(std::vector<PeptideHit>& hits)
421  {
422  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)),
423  hits.end());
424  }
425 
426  };
427 
428 
435  {
437 
438  // Build an accession index to avoid the linear search cost
443 
444  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries,
445  ProteaseDigestion& digestion,
446  bool ignore_missed_cleavages,
447  bool methionine_cleavage) :
448  accession_resolver_(entries),
449  digestion_(digestion),
450  ignore_missed_cleavages_(ignore_missed_cleavages),
451  methionine_cleavage_(methionine_cleavage)
452  {}
453 
454  bool operator()(const PeptideEvidence& evidence) const
455  {
456  if(!evidence.hasValidLimits())
457  {
458  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
459  return true;
460  }
461 
462  if (accession_resolver_.exists(evidence))
463  {
464  return digestion_.isValidProduct(
465  AASequence::fromString(accession_resolver_.getValue(evidence).sequence),
466  evidence.getStart(), evidence.getEnd() - evidence.getStart(), ignore_missed_cleavages_, methionine_cleavage_);
467  }
468  else
469  {
470  if (evidence.getProteinAccession().empty())
471  {
472  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
473  }
474  else
475  {
476  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession()
477  << "' not found in fasta file!" << std::endl;
478  }
479  return true;
480  }
481  }
482 
483  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
484  {
485  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this,peptides);
486  }
487 
488  };
489 
491 
492 
495 
497  template <class IdentificationType>
498  struct HasNoHits
499  {
500  typedef IdentificationType argument_type; // for use as a predicate
501 
502  bool operator()(const IdentificationType& id) const
503  {
504  return id.getHits().empty();
505  }
506  };
507 
509 
510 
513 
515  struct HasRTInRange;
516 
518  struct HasMZInRange;
519 
521 
522 
528 
531  template <class Container, class Predicate>
532  static void removeMatchingItems(Container& items, const Predicate& pred)
533  {
534  items.erase(std::remove_if(items.begin(), items.end(), pred),
535  items.end());
536  }
537 
539  template <class Container, class Predicate>
540  static void keepMatchingItems(Container& items, const Predicate& pred)
541  {
542  items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
543  items.end());
544  }
545 
547  template <class Container, class Predicate>
548  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
549  {
550  auto part = std::partition(items.begin(), items.end(), std::not1(pred));
551  std::move(part, items.end(), std::back_inserter(target));
552  items.erase(part, items.end());
553  }
554 
556  template <class IDContainer, class Predicate>
557  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
558  {
559  for (auto& item : items)
560  {
561  removeMatchingItems(item.getHits(), pred);
562  }
563  }
564 
566  template <class IDContainer, class Predicate>
567  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
568  {
569  for (auto& item : items)
570  {
571  keepMatchingItems(item.getHits(), pred);
572  }
573  }
574 
575  template <class MapType, class Predicate>
576  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
577  {
578  for (auto& feat : prot_and_pep_ids)
579  {
580  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
581  }
582  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
583  }
584 
585  template <class MapType, class Predicate>
586  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
587  {
588  for (auto& feat : prot_and_pep_ids)
589  {
590  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
591  }
592  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
593  }
594 
595  template <class MapType, class Predicate>
596  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
597  {
598  for (auto& feat : prot_and_pep_ids)
599  {
600  removeMatchingItems(feat.getPeptideIdentifications(), pred);
601  }
602  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
603  }
604 
606 
607 
610 
612  template <class IdentificationType>
613  static Size countHits(const std::vector<IdentificationType>& ids)
614  {
615  Size counter = 0;
616  for (typename std::vector<IdentificationType>::const_iterator id_it =
617  ids.begin(); id_it != ids.end(); ++id_it)
618  {
619  counter += id_it->getHits().size();
620  }
621  return counter;
622  }
623 
636  template <class IdentificationType>
637  static bool getBestHit(
638  const std::vector<IdentificationType>& identifications,
639  bool assume_sorted, typename IdentificationType::HitType& best_hit)
640  {
641  if (identifications.empty()) return false;
642 
643  typename std::vector<IdentificationType>::const_iterator best_id_it =
644  identifications.end();
645  typename std::vector<typename IdentificationType::HitType>::const_iterator
646  best_hit_it;
647 
648  for (typename std::vector<IdentificationType>::const_iterator id_it =
649  identifications.begin(); id_it != identifications.end(); ++id_it)
650  {
651  if (id_it->getHits().empty()) continue;
652 
653  if (best_id_it == identifications.end()) // no previous "best" hit
654  {
655  best_id_it = id_it;
656  best_hit_it = id_it->getHits().begin();
657  }
658  else if (best_id_it->getScoreType() != id_it->getScoreType())
659  {
660  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
661  }
662 
663  bool higher_better = best_id_it->isHigherScoreBetter();
664  for (typename std::vector<typename IdentificationType::HitType>::
665  const_iterator hit_it = id_it->getHits().begin(); hit_it !=
666  id_it->getHits().end(); ++hit_it)
667  {
668  if ((higher_better && (hit_it->getScore() >
669  best_hit_it->getScore())) ||
670  (!higher_better && (hit_it->getScore() <
671  best_hit_it->getScore())))
672  {
673  best_hit_it = hit_it;
674  }
675  if (assume_sorted) break; // only consider the first hit
676  }
677  }
678 
679  if (best_id_it == identifications.end())
680  {
681  return false; // no hits in any IDs
682  }
683 
684  best_hit = *best_hit_it;
685  return true;
686  }
687 
695  static void extractPeptideSequences(
696  const std::vector<PeptideIdentification>& peptides,
697  std::set<String>& sequences, bool ignore_mods = false);
698 
704  static std::map<String,std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
705 
711  template<class EvidenceFilter>
713  EvidenceFilter& filter,
714  std::vector<PeptideIdentification>& peptides)
715  {
716  for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
717  pep_it != peptides.end(); ++pep_it)
718  {
719  for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
720  hit_it != pep_it->getHits().end(); ++hit_it )
721  {
722  std::vector<PeptideEvidence> evidences;
723  remove_copy_if(hit_it->getPeptideEvidences().begin(),
724  hit_it->getPeptideEvidences().end(),
725  back_inserter(evidences),
726  std::not1(filter));
727  hit_it->setPeptideEvidences(evidences);
728  }
729  }
730  }
731 
733 
734 
737 
739  template <class IdentificationType>
740  static void updateHitRanks(std::vector<IdentificationType>& ids)
741  {
742  for (typename std::vector<IdentificationType>::iterator it = ids.begin();
743  it != ids.end(); ++it)
744  {
745  it->assignRanks();
746  }
747  }
748 
751  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
752 
754  static void removeUnreferencedProteins(
755  std::vector<ProteinIdentification>& proteins,
756  const std::vector<PeptideIdentification>& peptides);
757 
765  static void updateProteinReferences(
766  std::vector<PeptideIdentification>& peptides,
767  const std::vector<ProteinIdentification>& proteins,
768  bool remove_peptides_without_reference = false);
769 
777  static void updateProteinReferences(
778  ConsensusMap& cmap,
779  bool remove_peptides_without_reference = false);
780 
789  static bool updateProteinGroups(
790  std::vector<ProteinIdentification::ProteinGroup>& groups,
791  const std::vector<ProteinHit>& hits);
792 
799  static void removeUngroupedProteins(
800  const std::vector<ProteinIdentification::ProteinGroup>& groups,
801  std::vector<ProteinHit>& hits);
803 
804 
807 
809  template <class IdentificationType>
810  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
811  {
812  struct HasNoHits<IdentificationType> empty_filter;
813  removeMatchingItems(ids, empty_filter);
814  }
815 
821  template <class IdentificationType>
822  static void filterHitsByScore(std::vector<IdentificationType>& ids,
823  double threshold_score)
824  {
825  for (typename std::vector<IdentificationType>::iterator id_it =
826  ids.begin(); id_it != ids.end(); ++id_it)
827  {
828  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
829  threshold_score, id_it->isHigherScoreBetter());
830  keepMatchingItems(id_it->getHits(), score_filter);
831  }
832  }
833 
840  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps,
841  double threshold_score, bool higher_better);
842 
848  template <class IdentificationType>
849  static void filterHitsByScore(IdentificationType& id,
850  double threshold_score)
851  {
852  struct HasGoodScore<typename IdentificationType::HitType> score_filter(
853  threshold_score, id->isHigherScoreBetter());
854  keepMatchingItems(id->getHits(), score_filter);
855  }
856 
862  template <class IdentificationType>
863  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
864  {
865  for (typename std::vector<IdentificationType>::iterator id_it =
866  ids.begin(); id_it != ids.end(); ++id_it)
867  {
868  id_it->sort();
869  if (n < id_it->getHits().size()) id_it->getHits().resize(n);
870  }
871  }
872 
887  template <class IdentificationType>
888  static void filterHitsByRank(std::vector<IdentificationType>& ids,
889  Size min_rank, Size max_rank)
890  {
891  updateHitRanks(ids);
892  if (min_rank > 1)
893  {
894  struct HasMaxRank<typename IdentificationType::HitType>
895  rank_filter(min_rank - 1);
896  for (typename std::vector<IdentificationType>::iterator id_it =
897  ids.begin(); id_it != ids.end(); ++id_it)
898  {
899  removeMatchingItems(id_it->getHits(), rank_filter);
900  }
901  }
902  if (max_rank >= min_rank)
903  {
904  struct HasMaxRank<typename IdentificationType::HitType>
905  rank_filter(max_rank);
906  for (typename std::vector<IdentificationType>::iterator id_it =
907  ids.begin(); id_it != ids.end(); ++id_it)
908  {
909  keepMatchingItems(id_it->getHits(), rank_filter);
910  }
911  }
912  }
913 
921  template <class IdentificationType>
922  static void removeDecoyHits(std::vector<IdentificationType>& ids)
923  {
924  struct HasDecoyAnnotation<typename IdentificationType::HitType>
925  decoy_filter;
926  for (typename std::vector<IdentificationType>::iterator id_it =
927  ids.begin(); id_it != ids.end(); ++id_it)
928  {
929  removeMatchingItems(id_it->getHits(), decoy_filter);
930  }
931  }
932 
940  template <class IdentificationType>
941  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids,
942  const std::set<String> accessions)
943  {
944  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
945  for (auto& id_it : ids)
946  {
947  removeMatchingItems(id_it.getHits(), acc_filter);
948  }
949  }
950 
958  template <class IdentificationType>
959  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids,
960  const std::set<String>& accessions)
961  {
962  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
963  for (auto& id_it : ids)
964  {
965  keepMatchingItems(id_it.getHits(), acc_filter);
966  }
967  }
968 
970 
971 
974 
981  static void keepBestPeptideHits(
982  std::vector<PeptideIdentification>& peptides, bool strict = false);
983 
992  static void filterPeptidesByLength(
993  std::vector<PeptideIdentification>& peptides, Size min_length,
994  Size max_length = UINT_MAX);
995 
1004  static void filterPeptidesByCharge(
1005  std::vector<PeptideIdentification>& peptides, Int min_charge,
1006  Int max_charge);
1007 
1009  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
1010  double min_rt, double max_rt);
1011 
1013  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
1014  double min_mz, double max_mz);
1015 
1027  static void filterPeptidesByMZError(
1028  std::vector<PeptideIdentification>& peptides, double mass_error,
1029  bool unit_ppm);
1030 
1031 
1038  template <class Filter>
1039  static void filterPeptideEvidences(
1040  Filter& filter,
1041  std::vector<PeptideIdentification>& peptides);
1042 
1054  static void filterPeptidesByRTPredictPValue(
1055  std::vector<PeptideIdentification>& peptides,
1056  const String& metavalue_key, double threshold = 0.05);
1057 
1059  static void removePeptidesWithMatchingModifications(
1060  std::vector<PeptideIdentification>& peptides,
1061  const std::set<String>& modifications);
1062 
1064  static void keepPeptidesWithMatchingModifications(
1065  std::vector<PeptideIdentification>& peptides,
1066  const std::set<String>& modifications);
1067 
1075  static void removePeptidesWithMatchingSequences(
1076  std::vector<PeptideIdentification>& peptides,
1077  const std::vector<PeptideIdentification>& bad_peptides,
1078  bool ignore_mods = false);
1079 
1087  static void keepPeptidesWithMatchingSequences(
1088  std::vector<PeptideIdentification>& peptides,
1089  const std::vector<PeptideIdentification>& good_peptides,
1090  bool ignore_mods = false);
1091 
1093  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
1094  peptides);
1095 
1101  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
1102  peptides, bool seq_only = false);
1103 
1105 
1106 
1109 
1111  static void filterHitsByScore(PeakMap& experiment,
1112  double peptide_threshold_score,
1113  double protein_threshold_score)
1114  {
1115  // filter protein hits:
1116  filterHitsByScore(experiment.getProteinIdentifications(),
1117  protein_threshold_score);
1118  // don't remove empty protein IDs - they contain search meta data and may
1119  // be referenced by peptide IDs (via run ID)
1120 
1121  // filter peptide hits:
1122  for (PeakMap::Iterator exp_it = experiment.begin();
1123  exp_it != experiment.end(); ++exp_it)
1124  {
1125  filterHitsByScore(exp_it->getPeptideIdentifications(),
1126  peptide_threshold_score);
1127  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1128  updateProteinReferences(exp_it->getPeptideIdentifications(),
1129  experiment.getProteinIdentifications());
1130  }
1131  // @TODO: remove proteins that aren't referenced by peptides any more?
1132  }
1133 
1135  static void keepNBestHits(PeakMap& experiment, Size n)
1136  {
1137  // don't filter the protein hits by "N best" here - filter the peptides
1138  // and update the protein hits!
1139  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1140 
1141  // filter peptide hits:
1142  for (PeakMap::Iterator exp_it = experiment.begin();
1143  exp_it != experiment.end(); ++exp_it)
1144  {
1145  std::vector<PeptideIdentification>& peptides =
1146  exp_it->getPeptideIdentifications();
1147  keepNBestHits(peptides, n);
1148  removeEmptyIdentifications(peptides);
1149  updateProteinReferences(peptides,
1150  experiment.getProteinIdentifications());
1151  all_peptides.insert(all_peptides.end(), peptides.begin(),
1152  peptides.end());
1153  }
1154  // update protein hits:
1155  removeUnreferencedProteins(experiment.getProteinIdentifications(),
1156  all_peptides);
1157  }
1158 
1160  template <class MapType>
1161  static void keepNBestPeptideHits(MapType& map, Size n)
1162  {
1163  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1164  // which sorts Hits first.
1165  for (auto& feat : map)
1166  {
1167  keepNBestHits(feat.getPeptideIdentifications(), n);
1168  }
1169  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1170  }
1171 
1172  template <class MapType>
1173  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1174  {
1175  const auto pred = HasNoHits<PeptideIdentification>();
1176  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1177  }
1178 
1180  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1181  {
1182  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1183  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1184  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1185  }
1186 
1187  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1188  {
1189  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1190  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1191  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1192  }
1193 
1194  //TODO allow skipping unassigned?
1195  template <class MapType>
1196  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1197  {
1198  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1199 
1200  RunToSequenceToChargeToPepHitP best_peps_per_run;
1201  for (const auto& idrun : prot_ids)
1202  {
1203  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1204  }
1205 
1206  for (auto& feat : prot_and_pep_ids)
1207  {
1208  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1209  }
1210 
1211  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1212  }
1213 
1214  template <class MapType>
1215  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1216  {
1217  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1218  HasMetaValue<PeptideHit> best_per_peptide{"best_per_peptide", 1};
1219  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1220  }
1221 
1224  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1225  {
1226  RunToSequenceToChargeToPepHitP best_peps_per_run;
1227  for (const auto& id : prot_ids)
1228  {
1229  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1230  }
1231  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1232  }
1233 
1237  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1238  {
1239  for (auto &pep : pep_ids)
1240  {
1241  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1242  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1243  }
1244  }
1245 
1249  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1250  {
1251  SequenceToChargeToPepHitP best_pep;
1252  for (auto& pep : pep_ids)
1253  {
1254  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1255  }
1256  }
1257 
1262  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1263  {
1264  bool higher_score_better = pep.isHigherScoreBetter();
1265  //make sure that first = best hit
1266  pep.sort();
1267 
1268  auto pepIt = pep.getHits().begin();
1269  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1270  for (; pepIt != pepItEnd; ++pepIt)
1271  {
1272  PeptideHit &hit = *pepIt;
1273 
1274  String lookup_seq;
1275  if (ignore_mods)
1276  {
1277  lookup_seq = hit.getSequence().toUnmodifiedString();
1278  }
1279  else
1280  {
1281  lookup_seq = hit.getSequence().toString();
1282  }
1283 
1284  int lookup_charge = 0;
1285  if (!ignore_charges)
1286  {
1287  lookup_charge = hit.getCharge();
1288  }
1289 
1290  // try to insert
1291  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1292  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1293 
1294  PeptideHit* &p = it_inserted_chg.first->second; //now this gets either the old one if already present, or this
1295  if (!it_inserted_chg.second) //was already present -> possibly update
1296  {
1297  if (
1298  (higher_score_better && (hit.getScore() > p->getScore())) ||
1299  (!higher_score_better && (hit.getScore() < p->getScore()))
1300  )
1301  {
1302  p->setMetaValue("best_per_peptide", 0);
1303  hit.setMetaValue("best_per_peptide", 1);
1304  p = &hit;
1305  }
1306  else //note that this was def. not the best
1307  {
1308  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1309  hit.setMetaValue("best_per_peptide", 0);
1310  }
1311  }
1312  else //newly inserted -> first for that sequence (and optionally charge)
1313  {
1314  hit.setMetaValue("best_per_peptide", 1);
1315  }
1316  }
1317  }
1318 
1321  PeakMap& experiment,
1322  const std::vector<FASTAFile::FASTAEntry>& proteins)
1323  {
1324  std::set<String> accessions;
1325  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1326  proteins.begin(); it != proteins.end(); ++it)
1327  {
1328  accessions.insert(it->identifier);
1329  }
1330 
1331  // filter protein hits:
1332  keepHitsMatchingProteins(experiment.getProteinIdentifications(),
1333  accessions);
1334  updateHitRanks(experiment.getProteinIdentifications());
1335 
1336  // filter peptide hits:
1337  for (PeakMap::Iterator exp_it = experiment.begin();
1338  exp_it != experiment.end(); ++exp_it)
1339  {
1340  if (exp_it->getMSLevel() == 2)
1341  {
1342  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1343  accessions);
1344  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1345  updateHitRanks(exp_it->getPeptideIdentifications());
1346  }
1347  }
1348  }
1349 
1351 
1352 
1355  static void keepBestMatchPerQuery(
1356  IdentificationData& id_data,
1358 
1359  static void filterQueryMatchesByScore(
1360  IdentificationData& id_data,
1361  IdentificationData::ScoreTypeRef score_ref, double cutoff);
1362 
1363  static void removeDecoys(IdentificationData& id_data);
1365 
1366  };
1367 
1368 } // namespace OpenMS
1369 
LogStream.h
OpenMS::FileTypes::IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
OpenMS::PeptideHit::getCharge
Int getCharge() const
returns the charge of the peptide
OpenMS::IDFilter::HasGoodScore::HasGoodScore
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:108
ConsensusXMLFile.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::MzIdentMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a MzIdentML file.
OpenMS::ProtXMLFile
Used to load (storing not supported, yet) ProtXML files.
Definition: ProtXMLFile.h:70
OpenMS::IDFilter::removeMatchingItems
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:532
OpenMS::SequestOutfile
Representation of a Sequest output file.
Definition: SequestOutfile.h:61
OpenMS::TextFile::end
ConstIterator end() const
Gives access to the underlying text buffer.
OpenMS::DataValue::INT_LIST
integer list
Definition: DataValue.h:71
OpenMS::XTandemXMLFile::load
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
FileHandler.h
OpenMS::MascotXMLFile
Used to load Mascot XML files.
Definition: MascotXMLFile.h:57
FileTypes.h
TextFile.h
OpenMS::EnzymaticDigestion::NamesOfSpecificity
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:77
OpenMS::SpectrumMetaDataLookup::addMissingSpectrumReferences
static bool addMissingSpectrumReferences(std::vector< PeptideIdentification > &peptides, const String &filename, bool stop_on_error=false, bool override_spectra_data=false, bool override_spectra_references=false, std::vector< ProteinIdentification > proteins=std::vector< ProteinIdentification >())
Add missing "spectrum_reference"s to peptide identifications based on raw data.
IDRipper.h
OpenMS::UInt64
OPENMS_UINT64_TYPE UInt64
Unsigned integer type (64bit)
Definition: Types.h:77
OpenMS::PercolatorOutfile::SIZE_OF_SCORETYPE
Definition: PercolatorOutfile.h:58
OpenMS::IDFilter::keepUniquePeptidesPerProtein
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
OpenMS::IDFilter::keepBestPerPeptidePerRun
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1215
XQuestResultXMLFile.h
double
OpenMS::ProtXMLFile::load
void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids)
Loads the identifications of an ProtXML file without identifier.
OpenMS::ConsensusXMLFile::store
void store(const String &filename, const ConsensusMap &consensus_map)
Stores a consensus map to file.
OpenMS::IDFilter::PeptideDigestionFilter::digestion_
EnzymaticDigestion & digestion_
Definition: IDFilter.h:390
OpenMS::IDFilter::keepNBestHits
static void keepNBestHits(std::vector< IdentificationType > &ids, Size n)
Filters peptide or protein identifications according to the score of the hits, keeping the n best hit...
Definition: IDFilter.h:863
OpenMS::Exception::IllegalArgument
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
OpenMS::DataValue::STRING_VALUE
string value
Definition: DataValue.h:67
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::PercolatorOutfile::score_type_names
static const std::string score_type_names[SIZE_OF_SCORETYPE]
Names of Percolator scores (to match ScoreType)
Definition: PercolatorOutfile.h:61
ProtXMLFile.h
OpenMS::PeptideHit::getSequence
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
OpenMS::IDFilter::HasMaxRank::operator()
bool operator()(const HitType &hit) const
Definition: IDFilter.h:144
OpenMS::ProteaseDigestion::setEnzyme
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
OpenMS::IDFilter::moveMatchingItems
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:548
EnzymaticDigestion.h
OpenMS::IDFilter::HasGoodScore::operator()
bool operator()(const HitType &hit) const
Definition: IDFilter.h:113
OpenMS::IDFilter::HasNoHits::argument_type
IdentificationType argument_type
Definition: IDFilter.h:500
MascotXMLFile.h
OpenMS::IDFilter::keepHitsMatchingProteins
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:959
OpenMS::IDFilter::HasMaxMetaValue::operator()
bool operator()(const HitType &hit) const
Definition: IDFilter.h:196
OpenMS::IDFilter::HasGoodScore::score
double score
Definition: IDFilter.h:105
OpenMS::FASTAFile::FASTAEntry::identifier
String identifier
Definition: FASTAFile.h:78
OpenMS::DigestionEnzymeDB< DigestionEnzymeProtein, ProteaseDB >::getInstance
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
OpenMS::ModificationDefinitionsSet
Representation of a set of modification definitions.
Definition: ModificationDefinitionsSet.h:58
OpenMS::IDFilter::removeHitsMatchingProteins
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:941
OpenMS::IDFilter::GetMatchingItems::getHitKey
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:330
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:335
OpenMS::IDFilter::PeptideDigestionFilter::operator()
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:404
OpenMS::FASTAFile
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
OpenMS::FileHandler::getTypeByFileName
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
OpenMS::IDFilter::filterPeptidesByMZ
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
OpenMS::String::toDouble
double toDouble() const
Conversion to double.
OpenMS::Constants::k
const double k
OpenMS::FileTypes::MZML
MzML file (.mzML)
Definition: FileTypes.h:72
OpenMS::FASTAFile::FASTAEntry::sequence
String sequence
Definition: FASTAFile.h:80
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
OpenMS::FileTypes::PEPXML
TPP pepXML file (.pepXML)
Definition: FileTypes.h:75
OpenMS::DataValue::STRING_LIST
string list
Definition: DataValue.h:70
OpenMS::ProteinIdentification::setSearchEngineVersion
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
OpenMS::XQuestResultXMLFile::load
void load(const String &filename, std::vector< PeptideIdentification > &pep_ids, std::vector< ProteinIdentification > &prot_ids)
Load the content of the xquest.xml file into the provided data structures.
OpenMS::String
A more convenient string class.
Definition: String.h:59
OpenMS::MSExperiment::begin
Iterator begin()
Definition: MSExperiment.h:157
OpenMS::IDFilter::PeptideDigestionFilter::argument_type
PeptideHit argument_type
Definition: IDFilter.h:395
OpenMS::IDFilter::RunToSequenceToChargeToPepHitP
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
OpenMS::IDFilter::HasMaxMetaValue
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:184
OpenMS::IDFilter::GetMatchingItems::exists
bool exists(const HitType &hit) const
Definition: IDFilter.h:325
OpenMS::EnzymaticDigestion
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
OpenMS::String::trim
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
OpenMS::DataValue::isEmpty
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:375
OpenMS::OMSSAXMLFile
Used to load OMSSAXML files.
Definition: OMSSAXMLFile.h:60
OpenMS::IDFilter::HasMetaValue::HasMetaValue
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:168
PeptideEvidence.h
OpenMS::IDFilter::HasMatchingAccession::HasMatchingAccession
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:273
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:67
OpenMS::Exception::ConversionError
Invalid conversion exception.
Definition: Exception.h:362
OpenMS::IDFilter::updateProteinReferences
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
OpenMS::DigestionEnzymeDB::getAllNames
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
OpenMS::IDFilter::DigestionFilter::operator()
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:454
OpenMS::FASTAFile::FASTAEntry::description
String description
Definition: FASTAFile.h:79
OpenMS::PeakFileOptions::addMSLevel
void addMSLevel(int level)
adds a desired MS level for peaks to load
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
OpenMS::ProteinHit
Representation of a protein hit.
Definition: ProteinHit.h:58
OpenMS::IDFilter::HasMaxRank::rank
Size rank
Definition: IDFilter.h:133
OpenMS::PercolatorOutfile::load
void load(const String &filename, ProteinIdentification &proteins, std::vector< PeptideIdentification > &peptides, SpectrumMetaDataLookup &lookup, enum ScoreType output_score=QVALUE)
Loads a Percolator output file.
OpenMS::IDFilter::HasMatchingAccession::operator()
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:286
OpenMS::IDFilter::updateProteinGroups
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
OpenMS::FileTypes::TSV
any TSV file, for example msInspect file or OpenSWATH transition file (see TransitionTSVFile)
Definition: FileTypes.h:87
OpenMS::DataValue::EMPTY_VALUE
empty value
Definition: DataValue.h:73
OpenMS::IDFilter::GetMatchingItems::GetMatchingItems
GetMatchingItems()
Definition: IDFilter.h:318
OpenMS::Internal::ClassTest::exception
int exception
(Used by various macros. Indicates a rough category of the exception being caught....
OpenMS::PercolatorOutfile::ScoreType
ScoreType
Types of Percolator scores.
Definition: PercolatorOutfile.h:58
OpenMS::AASequence::toString
String toString() const
returns the peptide as string with modifications embedded in brackets
XTandemXMLFile.h
OpenMS::FileTypes::MZIDENTML
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
OpenMS::ProgressLogger::startProgress
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
OpenMS::IDFilter::HasMetaValue::argument_type
HitType argument_type
Definition: IDFilter.h:163
OpenMS::IDFilter::PeptideDigestionFilter::disabledValue
static Int disabledValue()
Definition: IDFilter.h:400
OpenMS::IDFilter::HasMaxMetaValue::value
double value
Definition: IDFilter.h:189
OpenMS::AASequence::toUnmodifiedString
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
OpenMS::IDFilter::GetMatchingItems::GetMatchingItems
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:309
OpenMS::FileHandler::getOptions
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
OpenMS::PercolatorOutfile
Class for reading Percolator tab-delimited output files.
Definition: PercolatorOutfile.h:52
OpenMS::IDFilter::DigestionFilter::argument_type
PeptideEvidence argument_type
Definition: IDFilter.h:436
OpenMS::FileTypes::UNKNOWN
Unknown file extension.
Definition: FileTypes.h:60
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::EnzymaticDigestion::getSpecificityByName
static Specificity getSpecificityByName(const String &name)
OpenMS::FileTypes::FASTA
FASTA file (.fasta)
Definition: FileTypes.h:92
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::ProgressLogger::endProgress
void endProgress() const
Ends the progress display.
OpenMS::IDFilter::keepPeptidesWithMatchingModifications
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
Constants.h
OpenMS::FASTAFile::FASTAEntry
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
OpenMS::IDFilter::HasMatchingAccessionUnordered::operator()
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:255
OpenMS::IDFilter::filterHitsByRank
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:888
OpenMS::EnzymaticDigestion::filterByMissedCleavages
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
OpenMS::EnzymaticDigestion::setMissedCleavages
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
OpenMS::IDFilter::removeMatchingItemsUnroll
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:557
OpenMS::IDFilter::HasMatchingAccessionUnordered::HasMatchingAccessionUnordered
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:237
OpenMS::MSExperiment::size
Size size() const
Definition: MSExperiment.h:127
OpenMS::ExperimentalSettings::getProteinIdentifications
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
OpenMS::PeptideIdentification::setHits
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
OpenMS::EnzymaticDigestion::SPEC_FULL
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:70
OpenMS::PeptideIdentification::getHits
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
OpenMS::IDFilter::HasMatchingAccession
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:267
OpenMS::IDFilter::GetMatchingItems::ItemMap
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:306
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::SequestOutfile::load
void load(const String &result_filename, std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &protein_identification, const double p_value_threshold, std::vector< double > &pvalues, const String &database="", const bool ignore_proteins_per_peptide=false)
loads data from a Sequest outfile
OpenMS::Exception::InvalidParameter
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
OpenMS::ListUtils::concatenate
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:193
OpenMS::IDFilter::keepMatchingItems
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:540
OpenMS::IDFilter::HasDecoyAnnotation::operator()
bool operator()(const HitType &hit) const
Definition: IDFilter.h:216
OpenMS::ModificationsDB::getInstance
static ModificationsDB * getInstance()
Returns a pointer to the modifications DB (singleton)
SpectrumAnnotator.h
OpenMS::IDFilter::HasMatchingAccessionUnordered::operator()
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:250
OpenMS::SpectrumAlignment
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:67
OpenMS::IDFilter::HasMatchingAccession::accessions
const std::set< String > & accessions
Definition: IDFilter.h:271
OpenMS::PercolatorOutfile::getScoreType
static enum ScoreType getScoreType(String score_type_name)
Return a score type given its name.
OpenMS::IDFilter::DigestionFilter::methionine_cleavage_
bool methionine_cleavage_
Definition: IDFilter.h:442
OpenMS::MzIdentMLFile
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS::IDFilter::removeDuplicatePeptideHits
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
OpenMS::ProteinHit::getAccession
const String & getAccession() const
returns the accession of the protein
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::MSExperiment::Iterator
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
OpenMS::IDFilter::filterPeptidesByLength
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
OpenMS::IDFilter::PeptideDigestionFilter::filterPeptideSequences
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:420
OpenMS::IDFilter::HasMatchingAccession::argument_type
HitType argument_type
Definition: IDFilter.h:269
OpenMS::XQuestResultXMLFile
Used to load and store xQuest result files.
Definition: XQuestResultXMLFile.h:55
OpenMS::MetaInfoInterface::removeMetaValue
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
ProteaseDigestion.h
OpenMS::IDFilter::removeUnreferencedProteins
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
OpenMS::IDFilter::HasMaxRank::HasMaxRank
HasMaxRank(Size rank_)
Definition: IDFilter.h:135
OpenMS::DataValue::INT_VALUE
integer value
Definition: DataValue.h:68
OpenMS::ProteinIdentification::SearchParameters::variable_modifications
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:267
OpenMS::Constants::UserParam::CONCAT_PEPTIDE
const std::string CONCAT_PEPTIDE
OpenMS::IDFilter::SequenceToChargeToPepHitP
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
ProteaseDB.h
OpenMS::MascotXMLFile::load
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, const SpectrumMetaDataLookup &lookup)
Loads data from a Mascot XML file.
OpenMS::PeptideIdentification::sort
void sort()
Sorts the hits by score.
OpenMS::MetaInfoInterface::setMetaValue
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
OpenMS::DataValue
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
OpenMS::SpectrumLookup::findByNativeID
Size findByNativeID(const String &native_id) const
Look up spectrum by native ID.
OpenMS::IDFilter::annotateBestPerPeptide
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1249
FASTAFile.h
OpenMS::IDFilter::GetMatchingItems::getKey
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:320
OpenMS::IDFilter::filterPeptidesByRTPredictPValue
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
int
OpenMS::PeptideEvidence::hasValidLimits
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
OpenMS::IDFilter::HasMaxMetaValue::argument_type
HitType argument_type
Definition: IDFilter.h:186
OpenMS::SpectrumMetaDataLookup::readSpectra
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp, bool get_precursor_rt=false)
Read spectra and store their meta data.
Definition: SpectrumMetaDataLookup.h:213
OpenMS::IDFilter::HasMatchingAccessionUnordered::argument_type
HitType argument_type
Definition: IDFilter.h:233
OpenMS::IDFilter::annotateBestPerPeptideWithData
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1262
OpenMS::IDFilter::keepNBestHits
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1135
OpenMS::File::isDirectory
static bool isDirectory(const String &path)
Return true if the given path specifies a directory.
OpenMS::FileHandler
Facilitates file handling by file type recognition.
Definition: FileHandler.h:62
OpenMS::SpectrumLookup
Helper class for looking up spectra based on different attributes.
Definition: SpectrumLookup.h:67
OpenMS::SpectrumAnnotator::addIonMatchStatistics
void addIonMatchStatistics(PeptideIdentification &pi, MSSpectrum &spec, const TheoreticalSpectrumGenerator &tg, const SpectrumAlignment &sa) const
Adds ion match statistics to pi PeptideIdentifcation.
OpenMS::IDFilter::keepBestPeptideHits
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
OpenMS::IDFilter
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:77
OpenMS::IdentificationDataInternal::IteratorWrapper
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:43
OpenMS::FileHandler::loadExperiment
bool loadExperiment(const String &filename, MSExperiment &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=true, const bool compute_hash=true)
Loads a file into an MSExperiment.
ProteinIdentification.h
OpenMS::IDFilter::GetMatchingItems::argument_type
HitType argument_type
Definition: IDFilter.h:305
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:58
OpenMS::PeptideEvidence
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
OpenMS::FileTypes::XQUESTXML
xQuest XML file format for protein-protein cross-link identifications (.xquest.xml)
Definition: FileTypes.h:112
OpenMS::IDFilter::filterPeptidesByCharge
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
OpenMS::SpectrumLookup::readSpectra
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp)
Read and index spectra for later look-up.
Definition: SpectrumLookup.h:103
OpenMS::IDFilter::HasDecoyAnnotation::argument_type
HitType argument_type
Definition: IDFilter.h:208
OpenMS::IDFilter::filterPeptidesByRT
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
OpenMS::SpectrumLookup::addReferenceFormat
void addReferenceFormat(const String &regexp)
Register a possible format for a spectrum reference.
OpenMS::IDFilter::DigestionFilter::filterPeptideEvidences
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:483
OpenMS::PepXMLFile::store
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01)
Stores idXML as PepXML file.
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::IDFilter::updateHitRanks
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:740
OpenMS::IDFilter::GetMatchingItems::getValue
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:335
OpenMS::IDFilter::HasMaxMetaValue::HasMaxMetaValue
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:191
OMSSAXMLFile.h
PepXMLFile.h
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::IDFilter::keepPeptidesWithMatchingSequences
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
OpenMS::IDFilter::HasMatchingAccession::operator()
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:291
OpenMS::FASTAFile::writeNext
void writeNext(const FASTAEntry &protein)
Stores the data given by protein. Call writeStart() once before calling writeNext().
OpenMS::DefaultParamHandler::getDefaults
const Param & getDefaults() const
Non-mutable access to the default parameters.
OpenMS::IDFilter::HasDecoyAnnotation::HasDecoyAnnotation
HasDecoyAnnotation()
Definition: IDFilter.h:212
OpenMS::TextFile::Iterator
std::vector< String >::iterator Iterator
Mutable iterator.
Definition: TextFile.h:54
OpenMS::FileTypes::MASCOTXML
Mascot XML file format for peptide identifications (.xml)
Definition: FileTypes.h:84
OpenMS::IDFilter::HasMaxRank::argument_type
HitType argument_type
Definition: IDFilter.h:131
OpenMS::PeptideHit::extractProteinAccessionsSet
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
OpenMS::DataValue::valueType
DataType valueType() const
returns the type of value stored
Definition: DataValue.h:365
OpenMS::IDFilter::countHits
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:613
seqan::find
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
OpenMS::FileTypes::OMSSAXML
OMSSA XML file format for peptide identifications (.xml)
Definition: FileTypes.h:83
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:80
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::IDFilter::HasGoodScore::higher_score_better
bool higher_score_better
Definition: IDFilter.h:106
OpenMS::ConsensusXMLFile::load
void load(const String &filename, ConsensusMap &map)
Loads a consensus map from file and calls updateRanges.
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::DataValue::EMPTY
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
SequestOutfile.h
OpenMS::FASTAFile::writeStart
void writeStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed writing using writeNext().
OpenMS::PeptideHit::setSequence
void setSequence(const AASequence &sequence)
sets the peptide sequence
OpenMS::DataValue::DOUBLE_VALUE
double value
Definition: DataValue.h:69
ModificationsDB.h
OpenMS::MzIdentMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
OpenMS::FileTypes::PROTXML
TPP protXML file (.protXML)
Definition: FileTypes.h:76
OpenMS::ProteaseDigestion::isValidProduct
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
OpenMS::IDFilter::annotateBestPerPeptidePerRun
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1224
OpenMS::IDFilter::annotateBestPerPeptidePerRun
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1196
OpenMS::IDFilter::GetMatchingItems
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:303
OpenMS::IDFilter::keepMatchingPeptideHits
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:576
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::MascotXMLFile::initializeLookup
static void initializeLookup(SpectrumMetaDataLookup &lookup, const PeakMap &experiment, const String &scan_regex="")
Initializes a helper object for looking up spectrum meta data (RT, m/z)
OpenMS::IDFilter::DigestionFilter::ignore_missed_cleavages_
bool ignore_missed_cleavages_
Definition: IDFilter.h:441
OpenMS::ProteinIdentification::setSearchEngine
void setSearchEngine(const String &search_engine)
Sets the search engine type.
MSExperiment.h
OpenMS::StringConversions::toString
String toString(const T &i)
fallback template for general purpose using Boost::Karma; more specializations below
Definition: StringUtils.h:127
OpenMS::IDFilter::DigestionFilter
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:434
OpenMS::TextFile::begin
ConstIterator begin() const
Gives access to the underlying text buffer.
OpenMS::SpectrumMetaDataLookup
Helper class for looking up spectrum meta data.
Definition: SpectrumMetaDataLookup.h:142
OpenMS::SignedSize
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
OpenMS::TheoreticalSpectrumGenerator
Generates theoretical spectra for peptides with various options.
Definition: TheoreticalSpectrumGenerator.h:68
OpenMS::IDFilter::GetMatchingItems::items
ItemMap items
Definition: IDFilter.h:307
OpenMS::IdentificationData
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:89
OpenMS::PeptideEvidence::getStart
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
OpenMS::EnzymaticDigestion::getEnzymeName
String getEnzymeName() const
Returns the enzyme for the digestion.
OpenMS::IDFilter::keepMatchingItemsUnroll
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:567
OPENMS_LOG_ERROR
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
OpenMS::File::absolutePath
static String absolutePath(const String &file)
Replaces the relative path in the argument with the absolute path.
OpenMS::IDFilter::keepBestPerPeptide
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1180
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
OpenMS::Exception::ParseError
Parse Error exception.
Definition: Exception.h:622
AASequence.h
OpenMS::IDFilter::removeMatchingPeptideIdentifications
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:596
OpenMS::FASTAFile::load
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
OpenMS::IDFilter::HasNoHits
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:498
OpenMS::ProteaseDigestion
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
OpenMS::PeptideIdentification::getReferencingHits
static std::vector< PeptideHit > getReferencingHits(const std::vector< PeptideHit > &, const std::set< String > &accession)
returns all peptide hits which reference to a given protein accession (i.e. filter by protein accessi...
OpenMS::MSExperiment::end
Iterator end()
Definition: MSExperiment.h:167
OpenMS::IDFilter::filterPeptidesByMZError
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
OpenMS::MSExperiment::getSpectra
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
OpenMS::PeptideHit::getScore
double getScore() const
returns the PSM score
OpenMS::IDFilter::HasMetaValue::value
DataValue value
Definition: IDFilter.h:166
OpenMS::IDFilter::HasMetaValue::key
String key
Definition: IDFilter.h:165
OpenMS::IDFilter::PeptideDigestionFilter::max_cleavages_
Int max_cleavages_
Definition: IDFilter.h:392
OpenMS::PepXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
OpenMS::IDFilter::HasMatchingAccession::operator()
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:277
OpenMS::IDFilter::HasMatchingAccessionUnordered
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:231
OpenMS::IDFilter::HasMatchingAccessionUnordered::operator()
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:241
OpenMS::IDFilter::removeEmptyIdentifications
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:810
OpenMS::FileTypes::nameToType
static Type nameToType(const String &name)
Converts a file type name into a Type.
OpenMS::IDFilter::removePeptidesWithMatchingModifications
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
OpenMS::Exception::BaseException::getMessage
const char * getMessage() const noexcept
Returns the message.
OpenMS::IDFilter::DigestionFilter::digestion_
ProteaseDigestion & digestion_
Definition: IDFilter.h:440
OpenMS::IDFilter::HasGoodScore
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:101
OpenMS::IDFilter::HasGoodScore::argument_type
HitType argument_type
Definition: IDFilter.h:103
OpenMS::IDFilter::ChargeToPepHitP
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
OpenMS::XTandemXMLFile
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:56
OpenMS::ModificationsDB::getAllSearchModifications
void getAllSearchModifications(std::vector< String > &modifications) const
Collects all modifications that can be used for identification searches.
OpenMS::IDFilter::DigestionFilter::DigestionFilter
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:444
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:73
OpenMS::IDFilter::getBestHit
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:637
OpenMS::IDFilter::removePeptidesWithMatchingSequences
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
IdentificationData.h
OpenMS::IDFilter::PeptideDigestionFilter
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:387
OpenMS::DataValue::DOUBLE_LIST
double list
Definition: DataValue.h:72
OpenMS::IDFilter::keepHitsMatchingProteins
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1320
OpenMS::IDFilter::keepNBestPeptideHits
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1161
OpenMS::IDFilter::DigestionFilter::accession_resolver_
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:439
IDFilter.h
OpenMS::PepXMLFile
Used to load and store PepXML files.
Definition: PepXMLFile.h:63
OpenMS::String::ensureLastChar
String & ensureLastChar(char end)
Makes sure the string ends with the character end.
OpenMS::AASequence::fromString
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
OpenMS::IDFilter::filterHitsByScore
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:822
OpenMS::IDFilter::PeptideDigestionFilter::min_cleavages_
Int min_cleavages_
Definition: IDFilter.h:391
OpenMS::IDFilter::HasMetaValue::operator()
bool operator()(const HitType &hit) const
Definition: IDFilter.h:173
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:195
OpenMS::PeptideEvidence::getProteinAccession
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
OpenMS::EnzymaticDigestion::setSpecificity
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
OpenMS::IDFilter::removeUngroupedProteins
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
OpenMS::ProteinIdentification::setIdentifier
void setIdentifier(const String &id)
Sets the identifier.
OpenMS::IDFilter::HasMatchingAccessionUnordered::accessions
const std::unordered_set< String > & accessions
Definition: IDFilter.h:235
OpenMS::File::fileList
static bool fileList(const String &dir, const String &file_pattern, StringList &output, bool full_path=false)
Retrieves a list of files matching file_pattern in directory dir (returns filenames without paths unl...
OpenMS::EnzymaticDigestion::getSpecificity
Specificity getSpecificity() const
Returns the specificity for the digestion.
OpenMS::Exception::ElementNotFound
Element could not be found exception.
Definition: Exception.h:662
OpenMS::IDFilter::removeDecoyHits
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:922
OpenMS::XQuestResultXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a xQuest XML file.
OpenMS::IDFilter::filterHitsByScore
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1111
PeptideIdentification.h
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:258
OpenMS::OMSSAXMLFile::load
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, bool load_proteins=true, bool load_empty_hits=true)
loads data from a OMSSAXML file
OpenMS::IDFilter::annotateBestPerPeptidePerRunWithData
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1237
OpenMS::IDFilter::HasMetaValue
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:161
OpenMS::PeptideIdentification::isHigherScoreBetter
bool isHigherScoreBetter() const
returns the peptide score orientation
OpenMS::IDFilter::HasDecoyAnnotation
Is this a decoy hit?
Definition: IDFilter.h:206
OpenMS::TextFile::load
void load(const String &filename, bool trim_lines=false, Int first_n=-1, bool skip_empty_lines=false)
Loads data from a text file.
PercolatorOutfile.h
StandardTypes.h
OpenMS::FileTypes::XML
any XML format
Definition: FileTypes.h:98
OpenMS::String::toInt
Int toInt() const
Conversion to int.
File.h
MzIdentMLFile.h
OpenMS::PeptideEvidence::getEnd
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
OpenMS::ProgressLogger::setLogType
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
OpenMS::IDFilter::filterGroupsByScore
static void filterGroupsByScore(std::vector< ProteinIdentification::ProteinGroup > &grps, double threshold_score, bool higher_better)
Filters protein groups according to the score of the groups.
OpenMS::SpectrumAnnotator
Annotates spectra from identifications and theoretical spectra or identifications from spectra and th...
Definition: SpectrumAnnotator.h:60
OpenMS::IDFilter::removeEmptyIdentifications
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1173
TOPPBase.h
OpenMS::IDFilter::removeMatchingPeptideHits
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:586
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:62
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63
OpenMS::SpectrumMetaDataLookup::addMissingRTsToPeptideIDs
static bool addMissingRTsToPeptideIDs(std::vector< PeptideIdentification > &peptides, const String &filename, bool stop_on_error=false)
Add missing retention time values to peptide identifications based on raw data.
OpenMS::FileTypes::PSMS
Percolator tab-delimited output (PSM level)
Definition: FileTypes.h:107
OpenMS::IDFilter::HasMaxRank
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:129
OpenMS::IDFilter::FilterPeptideEvidences
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:712
OpenMS::IDFilter::PeptideDigestionFilter::PeptideDigestionFilter
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:396
OpenMS::IDFilter::HasMaxMetaValue::key
String key
Definition: IDFilter.h:188
OpenMS::ProgressLogger::CMD
Command line progress.
Definition: ProgressLogger.h:72
OpenMS::IDFilter::HasNoHits::operator()
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:502
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55
OpenMS::IDFilter::keepBestPerPeptidePerRun
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1187