|
OpenMS
2.5.0
|
Go to the documentation of this file.
37 #include <OpenMS/config.h>
53 #include <unordered_set>
100 template <
class HitType>
110 higher_score_better(higher_score_better_)
115 if (higher_score_better)
117 return hit.getScore() >= score;
119 return hit.getScore() <= score;
128 template <
class HitType>
140 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
146 Size hit_rank = hit.getRank();
151 return hit_rank <= rank;
160 template <
class HitType>
176 if (found.
isEmpty())
return false;
177 if (value.
isEmpty())
return true;
178 return found == value;
183 template <
class HitType>
199 if (found.
isEmpty())
return false;
200 return double(found) <= value;
205 template <
class HitType>
213 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
221 return target_decoy(hit) || is_decoy(hit);
230 template <
class HitType>
238 accessions(accessions_)
245 if (accessions.count(it) > 0)
return true;
266 template <
class HitType>
274 accessions(accessions_)
281 if (accessions.count(it) > 0)
return true;
302 template <
class HitType,
class Entry>
311 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
312 rec_it != records.end(); ++rec_it)
314 items[getKey(*rec_it)] = &(*rec_it);
327 return items.count(getHitKey(hit)) > 0;
337 if(!exists(evidence)){
338 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
340 return *(items.find(getHitKey(evidence))->second);
356 struct HasMinPeptideLength;
362 struct HasLowMZError;
369 struct HasMatchingModification;
376 struct HasMatchingSequence;
379 struct HasNoEvidence;
397 digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
408 [&](
const Int missed_cleavages)
411 bool max_filter = max_cleavages_ != disabledValue() ?
412 missed_cleavages > max_cleavages_ :
false;
413 bool min_filter = min_cleavages_ != disabledValue() ?
414 missed_cleavages < min_cleavages_ :
false;
415 return max_filter || min_filter;
421 hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)),
445 bool ignore_missed_cleavages,
446 bool methionine_cleavage) :
447 accession_resolver_(entries),
448 digestion_(digestion),
449 ignore_missed_cleavages_(ignore_missed_cleavages),
450 methionine_cleavage_(methionine_cleavage)
461 if (accession_resolver_.
exists(evidence))
465 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), ignore_missed_cleavages_, methionine_cleavage_);
471 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
476 <<
"' not found in fasta file!" << std::endl;
484 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
496 template <
class IdentificationType>
503 return id.getHits().empty();
530 template <
class Container,
class Predicate>
533 items.erase(std::remove_if(items.begin(), items.end(), pred),
538 template <
class Container,
class Predicate>
541 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
546 template <
class IDContainer,
class Predicate>
549 for (
auto& item : items)
551 removeMatchingItems(item.getHits(), pred);
556 template <
class IDContainer,
class Predicate>
559 for (
auto& item : items)
561 keepMatchingItems(item.getHits(), pred);
565 template <
class MapType,
class Predicate>
568 for (
auto& feat : prot_and_pep_ids)
570 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
572 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
575 template <
class MapType,
class Predicate>
578 for (
auto& feat : prot_and_pep_ids)
580 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
582 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
592 template <
class IdentificationType>
596 for (
typename std::vector<IdentificationType>::const_iterator id_it =
597 ids.begin(); id_it != ids.end(); ++id_it)
599 counter += id_it->getHits().size();
616 template <
class IdentificationType>
618 const std::vector<IdentificationType>& identifications,
619 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
621 if (identifications.empty())
return false;
623 typename std::vector<IdentificationType>::const_iterator best_id_it =
624 identifications.end();
625 typename std::vector<typename IdentificationType::HitType>::const_iterator
628 for (
typename std::vector<IdentificationType>::const_iterator id_it =
629 identifications.begin(); id_it != identifications.end(); ++id_it)
631 if (id_it->getHits().empty())
continue;
633 if (best_id_it == identifications.end())
636 best_hit_it = id_it->getHits().begin();
638 else if (best_id_it->getScoreType() != id_it->getScoreType())
640 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
643 bool higher_better = best_id_it->isHigherScoreBetter();
644 for (
typename std::vector<typename IdentificationType::HitType>::
645 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
646 id_it->getHits().end(); ++hit_it)
648 if ((higher_better && (hit_it->getScore() >
649 best_hit_it->getScore())) ||
650 (!higher_better && (hit_it->getScore() <
651 best_hit_it->getScore())))
653 best_hit_it = hit_it;
655 if (assume_sorted)
break;
659 if (best_id_it == identifications.end())
664 best_hit = *best_hit_it;
675 static void extractPeptideSequences(
676 const std::vector<PeptideIdentification>& peptides,
677 std::set<String>& sequences,
bool ignore_mods =
false);
685 template<
class Ev
idenceFilter>
687 EvidenceFilter& filter,
688 std::vector<PeptideIdentification>& peptides)
690 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
691 pep_it != peptides.end(); ++pep_it)
693 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
694 hit_it != pep_it->getHits().end(); ++hit_it )
696 std::vector<PeptideEvidence> evidences;
697 remove_copy_if(hit_it->getPeptideEvidences().begin(),
698 hit_it->getPeptideEvidences().end(),
699 back_inserter(evidences),
701 hit_it->setPeptideEvidences(evidences);
713 template <
class IdentificationType>
716 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
717 it != ids.end(); ++it)
724 static void removeUnreferencedProteins(
725 std::vector<ProteinIdentification>& proteins,
726 const std::vector<PeptideIdentification>& peptides);
735 static void updateProteinReferences(
736 std::vector<PeptideIdentification>& peptides,
737 const std::vector<ProteinIdentification>& proteins,
738 bool remove_peptides_without_reference =
false);
747 static void updateProteinReferences(
749 bool remove_peptides_without_reference =
false);
759 static bool updateProteinGroups(
760 std::vector<ProteinIdentification::ProteinGroup>& groups,
761 const std::vector<ProteinHit>& hits);
770 template <
class IdentificationType>
773 struct HasNoHits<IdentificationType> empty_filter;
774 removeMatchingItems(ids, empty_filter);
782 template <
class IdentificationType>
784 double threshold_score)
786 for (
typename std::vector<IdentificationType>::iterator id_it =
787 ids.begin(); id_it != ids.end(); ++id_it)
789 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
790 threshold_score, id_it->isHigherScoreBetter());
791 keepMatchingItems(id_it->getHits(), score_filter);
800 template <class IdentificationType>
801 static void filterHitsByScore(IdentificationType& id,
802 double threshold_score)
804 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
805 threshold_score, id->isHigherScoreBetter());
806 keepMatchingItems(id->getHits(), score_filter);
814 template <class IdentificationType>
815 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
817 for (
typename std::vector<IdentificationType>::iterator id_it =
818 ids.begin(); id_it != ids.end(); ++id_it)
821 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
839 template <
class IdentificationType>
846 struct HasMaxRank<typename IdentificationType::HitType>
847 rank_filter(min_rank - 1);
848 for (typename std::vector<IdentificationType>::iterator id_it =
849 ids.begin(); id_it != ids.end(); ++id_it)
851 removeMatchingItems(id_it->getHits(), rank_filter);
854 if (max_rank >= min_rank)
856 struct HasMaxRank<typename IdentificationType::HitType>
857 rank_filter(max_rank);
858 for (typename std::vector<IdentificationType>::iterator id_it =
859 ids.begin(); id_it != ids.end(); ++id_it)
861 keepMatchingItems(id_it->getHits(), rank_filter);
873 template <
class IdentificationType>
878 for (typename std::vector<IdentificationType>::iterator id_it =
879 ids.begin(); id_it != ids.end(); ++id_it)
881 removeMatchingItems(id_it->getHits(), decoy_filter);
892 template <
class IdentificationType>
894 const std::set<String> accessions)
897 for (auto& id_it : ids)
899 removeMatchingItems(id_it.getHits(), acc_filter);
910 template <
class IdentificationType>
912 const std::set<String>& accessions)
915 for (auto& id_it : ids)
917 keepMatchingItems(id_it.getHits(), acc_filter);
933 static void keepBestPeptideHits(
934 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
944 static void filterPeptidesByLength(
945 std::vector<PeptideIdentification>& peptides,
Size min_length,
946 Size max_length = UINT_MAX);
956 static void filterPeptidesByCharge(
957 std::vector<PeptideIdentification>& peptides,
Int min_charge,
961 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
962 double min_rt,
double max_rt);
965 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
966 double min_mz,
double max_mz);
979 static void filterPeptidesByMZError(
980 std::vector<PeptideIdentification>& peptides,
double mass_error,
990 template <
class Filter>
991 static void filterPeptideEvidences(
993 std::vector<PeptideIdentification>& peptides);
1006 static void filterPeptidesByRTPredictPValue(
1007 std::vector<PeptideIdentification>& peptides,
1008 const String& metavalue_key,
double threshold = 0.05);
1011 static void removePeptidesWithMatchingModifications(
1012 std::vector<PeptideIdentification>& peptides,
1013 const std::set<String>& modifications);
1016 static void keepPeptidesWithMatchingModifications(
1017 std::vector<PeptideIdentification>& peptides,
1018 const std::set<String>& modifications);
1027 static void removePeptidesWithMatchingSequences(
1028 std::vector<PeptideIdentification>& peptides,
1029 const std::vector<PeptideIdentification>& bad_peptides,
1030 bool ignore_mods =
false);
1039 static void keepPeptidesWithMatchingSequences(
1040 std::vector<PeptideIdentification>& peptides,
1041 const std::vector<PeptideIdentification>& good_peptides,
1042 bool ignore_mods =
false);
1045 static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
1053 static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
1054 peptides,
bool seq_only =
false);
1064 double peptide_threshold_score,
1065 double protein_threshold_score)
1069 protein_threshold_score);
1075 exp_it != experiment.
end(); ++exp_it)
1077 filterHitsByScore(exp_it->getPeptideIdentifications(),
1078 peptide_threshold_score);
1079 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1080 updateProteinReferences(exp_it->getPeptideIdentifications(),
1091 std::vector<PeptideIdentification> all_peptides;
1095 exp_it != experiment.
end(); ++exp_it)
1097 std::vector<PeptideIdentification>& peptides =
1098 exp_it->getPeptideIdentifications();
1099 keepNBestHits(peptides, n);
1100 removeEmptyIdentifications(peptides);
1101 updateProteinReferences(peptides,
1103 all_peptides.insert(all_peptides.end(), peptides.begin(),
1112 template <
class MapType>
1117 for (
auto& feat : map)
1119 keepNBestHits(feat.getPeptideIdentifications(), n);
1121 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1124 template <
class MapType>
1131 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1133 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1135 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1138 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1140 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1142 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1145 template <
class MapType>
1151 for (
const auto& idrun : prot_ids)
1156 for (
auto& feat : prot_and_pep_ids)
1158 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1161 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1164 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1169 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1172 for (
const auto&
id : prot_ids)
1176 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1184 for (
auto &pep : pep_ids)
1187 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1197 for (
auto& pep : pep_ids)
1199 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1213 auto pepIt = pep.
getHits().begin();
1214 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1215 for (; pepIt != pepItEnd; ++pepIt)
1229 int lookup_charge = 0;
1230 if (!ignore_charges)
1236 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1237 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1239 PeptideHit* &p = it_inserted_chg.first->second;
1240 if (!it_inserted_chg.second)
1267 const std::vector<FASTAFile::FASTAEntry>& proteins)
1269 std::set<String> accessions;
1270 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1271 proteins.begin(); it != proteins.end(); ++it)
1273 accessions.insert(it->identifier);
1283 exp_it != experiment.
end(); ++exp_it)
1285 if (exp_it->getMSLevel() == 2)
1287 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1289 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1290 updateHitRanks(exp_it->getPeptideIdentifications());
1300 static void keepBestMatchPerQuery(
1304 static void filterQueryMatchesByScore(
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
Int getCharge() const
returns the charge of the peptide
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:108
Base class for TOPP applications.
Definition: TOPPBase.h:144
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a MzIdentML file.
Used to load (storing not supported, yet) ProtXML files.
Definition: ProtXMLFile.h:70
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:531
Representation of a Sequest output file.
Definition: SequestOutfile.h:61
ConstIterator end() const
Gives access to the underlying text buffer.
integer list
Definition: DataValue.h:71
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
Used to load Mascot XML files.
Definition: MascotXMLFile.h:57
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:74
Definition: PercolatorOutfile.h:58
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1146
void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids)
Loads the identifications of an ProtXML file without identifier.
EnzymaticDigestion & digestion_
Definition: IDFilter.h:390
static void keepNBestHits(std::vector< IdentificationType > &ids, Size n)
Filters peptide or protein identifications according to the score of the hits, keeping the n best hit...
Definition: IDFilter.h:815
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
string value
Definition: DataValue.h:67
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
static const std::string score_type_names[SIZE_OF_SCORETYPE]
Names of Percolator scores (to match ScoreType)
Definition: PercolatorOutfile.h:61
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
bool operator()(const HitType &hit) const
Definition: IDFilter.h:144
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
bool operator()(const HitType &hit) const
Definition: IDFilter.h:113
IdentificationType argument_type
Definition: IDFilter.h:499
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:911
double score
Definition: IDFilter.h:105
String identifier
Definition: FASTAFile.h:78
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
Representation of a set of modification definitions.
Definition: ModificationDefinitionsSet.h:58
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:893
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:330
Invalid value exception.
Definition: Exception.h:335
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
double toDouble() const
Conversion to double.
MzML file (.mzML)
Definition: FileTypes.h:72
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
TPP pepXML file (.pepXML)
Definition: FileTypes.h:75
string list
Definition: DataValue.h:70
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
void load(const String &filename, std::vector< PeptideIdentification > &pep_ids, std::vector< ProteinIdentification > &prot_ids)
Load the content of the xquest.xml file into the provided data structures.
A more convenient string class.
Definition: String.h:58
Iterator begin()
Definition: MSExperiment.h:157
PeptideHit argument_type
Definition: IDFilter.h:395
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
bool exists(const HitType &hit) const
Definition: IDFilter.h:325
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:375
Used to load OMSSAXML files.
Definition: OMSSAXMLFile.h:60
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:273
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
Invalid conversion exception.
Definition: Exception.h:362
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:453
void addMSLevel(int level)
adds a desired MS level for peaks to load
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Representation of a protein hit.
Definition: ProteinHit.h:57
Size rank
Definition: IDFilter.h:133
void load(const String &filename, ProteinIdentification &proteins, std::vector< PeptideIdentification > &peptides, SpectrumMetaDataLookup &lookup, enum ScoreType output_score=QVALUE)
Loads a Percolator output file.
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:286
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
any TSV file, for example msInspect file or OpenSWATH transition file (see TransitionTSVFile)
Definition: FileTypes.h:87
empty value
Definition: DataValue.h:73
GetMatchingItems()
Definition: IDFilter.h:318
int exception
(Used by various macros. Indicates a rough category of the exception being caught....
ScoreType
Types of Percolator scores.
Definition: PercolatorOutfile.h:58
String toString() const
returns the peptide as string with modifications embedded in brackets
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
static Int disabledValue()
Definition: IDFilter.h:400
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:309
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
Class for reading Percolator tab-delimited output files.
Definition: PercolatorOutfile.h:52
PeptideEvidence argument_type
Definition: IDFilter.h:435
Unknown file extension.
Definition: FileTypes.h:60
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
static Specificity getSpecificityByName(const String &name)
FASTA file (.fasta)
Definition: FileTypes.h:92
Representation of a protein identification run.
Definition: ProteinIdentification.h:71
void endProgress() const
Ends the progress display.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:255
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:840
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:547
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:237
Size size() const
Definition: MSExperiment.h:127
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:68
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:267
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:306
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
void load(const String &result_filename, std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &protein_identification, const double p_value_threshold, std::vector< double > &pvalues, const String &database="", const bool ignore_proteins_per_peptide=false)
loads data from a Sequest outfile
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:193
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:539
bool operator()(const HitType &hit) const
Definition: IDFilter.h:216
static ModificationsDB * getInstance()
Returns a pointer to the modifications DB (singleton)
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:250
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:67
const std::set< String > & accessions
Definition: IDFilter.h:271
static enum ScoreType getScoreType(String score_type_name)
Return a score type given its name.
bool methionine_cleavage_
Definition: IDFilter.h:441
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
int Int
Signed integer type.
Definition: Types.h:102
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
const String & getAccession() const
returns the accession of the protein
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:419
HitType argument_type
Definition: IDFilter.h:269
Used to load and store xQuest result files.
Definition: XQuestResultXMLFile.h:55
HasMaxRank(Size rank_)
Definition: IDFilter.h:135
integer value
Definition: DataValue.h:68
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:230
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, const SpectrumMetaDataLookup &lookup)
Loads data from a Mascot XML file.
void sort()
Sorts the hits by score.
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
Size findByNativeID(const String &native_id) const
Look up spectrum by native ID.
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1194
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:320
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
HitType argument_type
Definition: IDFilter.h:233
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1207
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1087
static bool isDirectory(const String &path)
Return true if the given path specifies a directory.
Facilitates file handling by file type recognition.
Definition: FileHandler.h:62
Helper class for looking up spectra based on different attributes.
Definition: SpectrumLookup.h:67
void addIonMatchStatistics(PeptideIdentification &pi, MSSpectrum &spec, const TheoreticalSpectrumGenerator &tg, const SpectrumAlignment &sa) const
Adds ion match statistics to pi PeptideIdentifcation.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:77
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:43
bool loadExperiment(const String &filename, MSExperiment &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=true, const bool compute_hash=true)
Loads a file into an MSExperiment.
HitType argument_type
Definition: IDFilter.h:305
Type
Actual file types enum.
Definition: FileTypes.h:58
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
xQuest XML file format for protein-protein cross-link identifications (.xquest.xml)
Definition: FileTypes.h:112
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp)
Read and index spectra for later look-up.
Definition: SpectrumLookup.h:103
HitType argument_type
Definition: IDFilter.h:208
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
void addReferenceFormat(const String ®exp)
Register a possible format for a spectrum reference.
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:482
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01)
Stores idXML as PepXML file.
bool filterByMissedCleavages(const String &sequence, std::function< bool(const Int)> filter) const
Filter based on the number of missed cleavages.
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:714
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:335
void setParameters(const Param ¶m)
Sets the parameters.
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:291
const Param & getDefaults() const
Non-mutable access to the default parameters.
HasDecoyAnnotation()
Definition: IDFilter.h:212
std::vector< String >::iterator Iterator
Mutable iterator.
Definition: TextFile.h:54
Mascot XML file format for peptide identifications (.xml)
Definition: FileTypes.h:84
HitType argument_type
Definition: IDFilter.h:131
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
DataType valueType() const
returns the type of value stored
Definition: DataValue.h:365
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:593
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
OMSSA XML file format for peptide identifications (.xml)
Definition: FileTypes.h:83
A container for consensus elements.
Definition: ConsensusMap.h:79
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
bool higher_score_better
Definition: IDFilter.h:106
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
void setSequence(const AASequence &sequence)
sets the peptide sequence
bool operator()(PeptideHit &p)
Definition: IDFilter.h:404
double value
Definition: DataValue.h:69
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
TPP protXML file (.protXML)
Definition: FileTypes.h:76
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1169
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:303
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:566
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
static void initializeLookup(SpectrumMetaDataLookup &lookup, const PeakMap &experiment, const String &scan_regex="")
Initializes a helper object for looking up spectrum meta data (RT, m/z)
bool ignore_missed_cleavages_
Definition: IDFilter.h:440
void setSearchEngine(const String &search_engine)
Sets the search engine type.
String toString(const T &i)
fallback template for general purpose using Boost::Karma; more specializations below
Definition: StringUtils.h:85
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:433
ConstIterator begin() const
Gives access to the underlying text buffer.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Generates theoretical spectra for peptides with various options.
Definition: TheoreticalSpectrumGenerator.h:67
ItemMap items
Definition: IDFilter.h:307
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:89
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
String getEnzymeName() const
Returns the enzyme for the digestion.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:557
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
static String absolutePath(const String &file)
Replaces the relative path in the argument with the absolute path.
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1131
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Parse Error exception.
Definition: Exception.h:622
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:497
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
static std::vector< PeptideHit > getReferencingHits(const std::vector< PeptideHit > &, const std::set< String > &accession)
returns all peptide hits which reference to a given protein accession (i.e. filter by protein accessi...
Iterator end()
Definition: MSExperiment.h:167
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
double getScore() const
returns the PSM score
Definition: EnzymaticDigestion.h:71
Int max_cleavages_
Definition: IDFilter.h:392
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:277
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:231
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:241
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:771
static Type nameToType(const String &name)
Converts a file type name into a Type.
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
const char * getMessage() const noexcept
Returns the message.
ProteaseDigestion & digestion_
Definition: IDFilter.h:439
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:101
HitType argument_type
Definition: IDFilter.h:103
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:56
void getAllSearchModifications(std::vector< String > &modifications) const
Collects all modifications that can be used for identification searches.
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:443
Management and storage of parameters / INI files.
Definition: Param.h:73
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:617
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:387
double list
Definition: DataValue.h:72
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1265
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1113
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:438
Used to load and store PepXML files.
Definition: PepXMLFile.h:62
String & ensureLastChar(char end)
Makes sure the string ends with the character end.
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:783
Int min_cleavages_
Definition: IDFilter.h:391
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
void setIdentifier(const String &id)
Sets the identifier.
const std::unordered_set< String > & accessions
Definition: IDFilter.h:235
static bool fileList(const String &dir, const String &file_pattern, StringList &output, bool full_path=false)
Retrieves a list of files matching file_pattern in directory dir (returns filenames without paths unl...
Specificity getSpecificity() const
Returns the specificity for the digestion.
Element could not be found exception.
Definition: Exception.h:662
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:874
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a xQuest XML file.
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1063
Search parameters of the DB search.
Definition: ProteinIdentification.h:221
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, bool load_proteins=true, bool load_empty_hits=true)
loads data from a OMSSAXML file
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1182
bool isHigherScoreBetter() const
returns the peptide score orientation
Is this a decoy hit?
Definition: IDFilter.h:206
void load(const String &filename, bool trim_lines=false, Int first_n=-1, bool skip_empty_lines=false)
Loads data from a text file.
any XML format
Definition: FileTypes.h:98
Int toInt() const
Conversion to int.
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Annotates spectra from identifications and theoretical spectra or identifications from spectra and th...
Definition: SpectrumAnnotator.h:60
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1125
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:576
Used to load and store idXML files.
Definition: IdXMLFile.h:63
Percolator tab-delimited output (PSM level)
Definition: FileTypes.h:107
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:129
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:686
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:396
Command line progress.
Definition: ProgressLogger.h:72
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:501
Representation of a peptide hit.
Definition: PeptideHit.h:54
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1138