OpenMS  2.6.0
ConsensusIDAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Andreas Bertsch, Marc Sturm, Sven Nahnsen, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 
40 #include <map>
41 #include <vector>
42 
43 namespace OpenMS
44 {
59  class OPENMS_DLLAPI ConsensusIDAlgorithm :
60  public DefaultParamHandler
61  {
62  public:
73  void apply(std::vector<PeptideIdentification>& ids,
74  const std::map<String, String>& se_info,
75  Size number_of_runs = 0);
76 
77  void apply(std::vector<PeptideIdentification>& ids,
78  Size number_of_runs = 0);
79 
81  ~ConsensusIDAlgorithm() override;
82 
83  protected:
84  struct HitInfo
85  {
87  std::vector<double> scores;
88  std::vector<String> types;
89  // in case too much information is stored, TD and evidence
90  // could be re-annotated with PeptideIndexer later
92  std::set<PeptideEvidence> evidence;
93  double final_score;
94  double support;
95  //TODO: we could gather spectrum_refs here as well,
96  // to support passing of spectrum_ref if ALL refs of a group are the same
97  // For now, we do it in the ConsensusID TOPP tool class in cases where we
98  // know that refs will be the same.
99  };
100 
102  typedef std::map<AASequence, HitInfo>
104 
107 
110 
112  double min_support_;
113 
116 
119 
122 
130  virtual void apply_(std::vector<PeptideIdentification>& ids,
131  const std::map<String, String>& se_info,
132  SequenceGrouping& results) = 0;
133 
135  void updateMembers_() override;
136 
138  void compareChargeStates_(Int& recorded_charge, Int new_charge,
139  const AASequence& peptide);
140 
141  private:
144 
146  ConsensusIDAlgorithm& operator=(const ConsensusIDAlgorithm&);
147 
148  };
149 
150 } // namespace OpenMS
151 
DefaultParamHandler.h
OpenMS::Param::merge
void merge(const Param &toMerge)
Adds missing parameters from the given param toMerge to this param. Existing parameters will not be m...
OpenMS::ProteinIdentification::SearchParameters::digestion_enzyme
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:273
OpenMS::FileTypes::IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
ConsensusXMLFile.h
ConsensusIDAlgorithmBest.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
FileHandler.h
FileTypes.h
OpenMS::EnzymaticDigestion::NamesOfSpecificity
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:77
OpenMS::ConsensusIDAlgorithm::HitInfo::types
std::vector< String > types
Definition: ConsensusIDAlgorithm.h:88
OpenMS::FeatureGroupingAlgorithmQT
A feature grouping algorithm for unlabeled data.
Definition: FeatureGroupingAlgorithmQT.h:52
VersionInfo.h
double
OpenMS::ConsensusXMLFile::store
void store(const String &filename, const ConsensusMap &consensus_map)
Stores a consensus map to file.
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::DigestionEnzymeDB< DigestionEnzymeProtein, ProteaseDB >::getInstance
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
OpenMS::ConsensusIDAlgorithm::HitInfo::evidence
std::set< PeptideEvidence > evidence
Definition: ConsensusIDAlgorithm.h:92
OpenMS::File::basename
static String basename(const String &file)
Returns the basename of the file (without the path).
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:335
OpenMS::String::substitute
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
OpenMS::ConsensusIDAlgorithm::count_empty_
bool count_empty_
Count empty runs in "min_support" calculation? (input parameter)
Definition: ConsensusIDAlgorithm.h:115
OpenMS::ProteinIdentification::getSearchEngine
const String & getSearchEngine() const
Returns the type of search engine used.
OpenMS::FeatureGroupingAlgorithmQT::group
void group(const std::vector< FeatureMap > &maps, ConsensusMap &out) override
Applies the algorithm to feature maps.
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
OpenMS::MetaInfoInterface::getMetaValue
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
OpenMS::ProteinIdentification::setSearchEngineVersion
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
OpenMS::ProteinIdentification::SearchParameters::missed_cleavages
UInt missed_cleavages
The number of allowed missed cleavages.
Definition: ProteinIdentification.h:268
OpenMS::ConsensusIDAlgorithm::HitInfo::support
double support
Definition: ConsensusIDAlgorithm.h:94
OpenMS::String
A more convenient string class.
Definition: String.h:59
OpenMS::MSExperiment::begin
Iterator begin()
Definition: MSExperiment.h:157
OpenMS::ConsensusIDAlgorithmAverage
Calculates a consensus from multiple ID runs by averaging the search scores.
Definition: ConsensusIDAlgorithmAverage.h:48
OpenMS::ConsensusIDAlgorithmRanks
Calculates a consensus from multiple ID runs based on the ranks of the search hits.
Definition: ConsensusIDAlgorithmRanks.h:48
OpenMS::PeptideIdentification::setRT
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:67
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
FeatureXMLFile.h
ConsensusIDAlgorithmPEPMatrix.h
OpenMS::ProteinIdentification::getPrimaryMSRunPath
void getPrimaryMSRunPath(StringList &output, bool raw=false) const
OpenMS::ConsensusIDAlgorithm::HitInfo::target_decoy
String target_decoy
Definition: ConsensusIDAlgorithm.h:91
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::BaseFeature::getPeptideIdentifications
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
returns a const reference to the PeptideIdentification vector
OpenMS::ConsensusIDAlgorithm::keep_old_scores_
bool keep_old_scores_
Keep old scores?
Definition: ConsensusIDAlgorithm.h:118
OpenMS::String::hasSubstring
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
OpenMS::ExperimentalSettings::getProteinIdentifications
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
OpenMS::EnzymaticDigestion::SPEC_FULL
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:70
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::PeptideIdentification::getHits
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
OpenMS::ProteinIdentification::SearchParameters::fragment_mass_tolerance_ppm
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:270
OpenMS::DateTime::now
static DateTime now()
Returns the current date and time.
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::ConsensusIDAlgorithm::SequenceGrouping
std::map< AASequence, HitInfo > SequenceGrouping
Mapping: peptide sequence -> (charge, scores)
Definition: ConsensusIDAlgorithm.h:103
OpenMS::EnzymaticDigestion::SPEC_NONE
no requirements on start / end
Definition: EnzymaticDigestion.h:68
OPENMS_LOG_FATAL_ERROR
#define OPENMS_LOG_FATAL_ERROR
Macro to be used if fatal error are reported (processing stops)
Definition: LogStream.h:450
OpenMS::ListUtils::concatenate
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:193
ConsensusIDAlgorithmRanks.h
OpenMS::ProteinIdentification::getIdentifier
const String & getIdentifier() const
Returns the identifier.
OpenMS::ProteinIdentification::SearchParameters::charges
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:264
OpenMS::DigestionEnzymeDB::getEnzyme
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:98
OpenMS::Peak2D::setRT
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::MSExperiment::Iterator
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
OpenMS::DataValue::toBool
bool toBool() const
Conversion to bool.
OpenMS::ProteinIdentification::getOriginalSearchEngineName
const String getOriginalSearchEngineName() const
Return the type of search engine that was first applied (e.g., before percolator or consensusID) or "...
ConsensusIDAlgorithmPEPIons.h
OpenMS::ProteinIdentification::SearchParameters::variable_modifications
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:267
OpenMS::FileTypes::FEATUREXML
OpenMS feature file (.featureXML)
Definition: FileTypes.h:65
ProteaseDB.h
OpenMS::ProteinIdentification::setPrimaryMSRunPath
void setPrimaryMSRunPath(const StringList &s, bool raw=false)
OpenMS::MetaInfoInterface::setMetaValue
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
OpenMS::EnzymaticDigestion::SPEC_SEMI
semi specific, i.e., one of the two cleavage sites must fulfill requirements
Definition: EnzymaticDigestion.h:69
ConsensusIDAlgorithmWorst.h
OpenMS::ConsensusIDAlgorithm::min_support_
double min_support_
Fraction of required support by other ID runs (input parameter)
Definition: ConsensusIDAlgorithm.h:112
int
OpenMS::ProteinIdentification::getSearchEngineVersion
const String & getSearchEngineVersion() const
Returns the search engine version.
OpenMS::ProteinIdentification::SearchParameters::db
String db
The used database.
Definition: ProteinIdentification.h:261
OpenMS::ProteinIdentification::getHits
const std::vector< ProteinHit > & getHits() const
Returns the protein hits.
OpenMS::ConsensusIDAlgorithmWorst
Calculates a consensus from multiple ID runs by taking the worst search score (conservative approach)...
Definition: ConsensusIDAlgorithmWorst.h:48
OpenMS::FeatureXMLFile::load
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
OpenMS::ConsensusIDAlgorithm::HitInfo::final_score
double final_score
Definition: ConsensusIDAlgorithm.h:93
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:58
OpenMS::EnzymaticDigestion::SPEC_NONTERM
no requirements on NTerm (currently not supported in the class)
Definition: EnzymaticDigestion.h:73
OpenMS::EnzymaticDigestion::SPEC_NOCTERM
no requirements on CTerm (currently not supported in the class)
Definition: EnzymaticDigestion.h:72
OpenMS::DigestionEnzymeProtein
Representation of a digestion enzyme for proteins (protease)
Definition: DigestionEnzymeProtein.h:48
OpenMS::ConsensusIDAlgorithm::HitInfo
Definition: ConsensusIDAlgorithm.h:84
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
ConsensusIDAlgorithmAverage.h
OpenMS::String::split
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
OpenMS::Peak2D::setMZ
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::DefaultParamHandler::getDefaults
const Param & getDefaults() const
Non-mutable access to the default parameters.
OpenMS::VersionInfo::getVersion
static String getVersion()
Return the version number of OpenMS.
OpenMS::ProteinIdentification::setSearchParameters
void setSearchParameters(const SearchParameters &search_parameters)
Sets the search parameters.
OpenMS::ConsensusIDAlgorithmPEPIons
Calculates a consensus from multiple ID runs based on PEPs and shared ions.
Definition: ConsensusIDAlgorithmPEPIons.h:48
OpenMS::ProteinIdentification::SearchParameters::fragment_mass_tolerance
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:269
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:80
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::FeatureXMLFile::store
void store(const String &filename, const FeatureMap &feature_map)
stores the map feature_map in file with name filename.
OpenMS::ConsensusIDAlgorithmBest
Calculates a consensus from multiple ID runs by taking the best search score.
Definition: ConsensusIDAlgorithmBest.h:48
OpenMS::ConsensusXMLFile::load
void load(const String &filename, ConsensusMap &map)
Loads a consensus map from file and calls updateRanges.
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::ProteinIdentification::getSearchParameters
const SearchParameters & getSearchParameters() const
Returns the search parameters.
OpenMS::ProteinIdentification::SearchParameters::fixed_modifications
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:266
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::ProteinIdentification::setSearchEngine
void setSearchEngine(const String &search_engine)
Sets the search engine type.
OpenMS::ProteinIdentification::SearchParameters::precursor_mass_tolerance
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:271
OpenMS::Param::update
bool update(const Param &p_outdated, const bool add_unknown=false)
Rescue parameter values from p_outdated to current param.
OpenMS::ConsensusIDAlgorithm::HitInfo::charge
Int charge
Definition: ConsensusIDAlgorithm.h:86
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:97
OpenMS::OpenMS_Log_debug
Logger::LogStream OpenMS_Log_debug
Global static instance of a LogStream to capture messages classified as debug output....
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
OpenMS::Param::remove
void remove(const String &key)
Remove the entry key or a section key (when suffix is ':')
OpenMS::String::substr
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
OpenMS::Feature
An LC-MS feature.
Definition: Feature.h:70
OpenMS::MSExperiment::end
Iterator end()
Definition: MSExperiment.h:167
OpenMS::ConsensusIDAlgorithmPEPMatrix
Calculates a consensus from multiple ID runs based on PEPs and sequence similarities.
Definition: ConsensusIDAlgorithmPEPMatrix.h:157
OpenMS::ConsensusMap::Iterator
std::vector< ConsensusFeature >::iterator Iterator
Mutable iterator.
Definition: ConsensusMap.h:170
FeatureGroupingAlgorithmQT.h
OpenMS::MetaInfoInterface::getKeys
void getKeys(std::vector< String > &keys) const
Fills the given vector with a list of all keys for which a value is set.
OpenMS::FeatureXMLFile
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:68
ConsensusIDAlgorithm.h
OpenMS::FileHandler::stripExtension
static String stripExtension(const String &filename)
If filename contains an extension, it will be removed (including the '.'). Special extensions,...
OpenMS::ProteinIdentification::setDateTime
void setDateTime(const DateTime &date)
Sets the date of the protein identification run.
OpenMS::ProteinIdentification::SearchParameters::precursor_mass_tolerance_ppm
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:272
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:73
OpenMS::MSExperiment::clear
void clear(bool clear_meta_data)
Clears all data and meta data.
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:111
OpenMS::ConsensusIDAlgorithm::apply
void apply(std::vector< PeptideIdentification > &ids, const std::map< String, String > &se_info, Size number_of_runs=0)
Calculates the consensus ID for a set of peptide identifications of one spectrum or (consensus) featu...
OpenMS::ConsensusIDAlgorithm
Abstract base class for all ConsensusID algorithms (that calculate a consensus from multiple ID runs)...
Definition: ConsensusIDAlgorithm.h:59
OpenMS::ConsensusIDAlgorithm::number_of_runs_
Size number_of_runs_
Number of ID runs.
Definition: ConsensusIDAlgorithm.h:109
PeptideIdentification.h
OpenMS::ConsensusIDAlgorithm::considered_hits_
Size considered_hits_
Number of peptide hits considered per ID run (input parameter)
Definition: ConsensusIDAlgorithm.h:106
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:258
OpenMS::PeptideIdentification::setMZ
void setMZ(double mz)
sets the MZ of the MS2 spectrum
OpenMS::ProteinIdentification::SearchParameters::enzyme_term_specificity
EnzymaticDigestion::Specificity enzyme_term_specificity
The number of required cutting-rule matching termini during search (none=0, semi=1,...
Definition: ProteinIdentification.h:274
OpenMS::Internal::ClassTest::infile
std::ifstream infile
Questionable file tested by TEST_FILE_EQUAL.
OpenMS::ConsensusIDAlgorithm::HitInfo::scores
std::vector< double > scores
Definition: ConsensusIDAlgorithm.h:87
TOPPBase.h
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:62
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63