29 class PeptideIdentification;
30 class PeptideIdentificationList;
31 class PeptideEvidence;
124 return float_data_arrays_;
151 auto it = std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
153 if (it == integer_data_arrays_.end())
163 auto it = std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
165 if (it == string_data_arrays_.end())
175 auto it = std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
176 [&name](
const FloatDataArray& da) { return da.getName() == name; } );
177 if (it == float_data_arrays_.end())
187 auto it = std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
189 if (it == integer_data_arrays_.end())
199 auto it = std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
201 if (it == string_data_arrays_.end())
211 auto it = std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
212 [&name](
const FloatDataArray& da) { return da.getName() == name; } );
213 if (it == float_data_arrays_.end())
240 static const std::string NamesOfPeakMassType[
static_cast<size_t>(PeakMassType::SIZE_OF_PEAKMASSTYPE)];
319 const std::vector<ProteinHit>&
getHits()
const;
332 void setHits(
const std::vector<ProteinHit>& hits);
335 std::vector<ProteinHit>::iterator
findHit(
const std::string& accession);
391 bool use_unassigned_ids);
501 std::unordered_map<std::string, std::set<std::pair<Size, ResidueModification>>>& prot2mod)
const;
512 struct hash<
OpenMS::ProteinIdentification::ProteinGroup>
517 for (
const auto& acc : pg.accessions)
527 struct hash<
OpenMS::ProteinIdentification::SearchParameters>
537 for (
const auto& mod : sp.fixed_modifications)
542 for (
const auto& mod : sp.variable_modifications)
562 struct hash<
OpenMS::ProteinIdentification>
571 OpenMS::hash_combine(seed, std::hash<OpenMS::ProteinIdentification::SearchParameters>{}(pi.getSearchParameters()));
575 for (
const auto& hit : pi.getHits())
584 for (
const auto& group : pi.getProteinGroups())
589 for (
const auto& group : pi.getIndistinguishableProteins())
A container for consensus elements.
Definition ConsensusMap.h:67
Float data array class.
Definition DataArrays.h:25
Integer data array class.
Definition DataArrays.h:75
std::string data array class
Definition DataArrays.h:125
DateTime Class.
Definition DateTime.h:31
Representation of a digestion enzyme for proteins (protease)
Definition DigestionEnzymeProtein.h:24
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Element could not be found exception.
Definition Exception.h:654
Two-way mapping from ms-run-path to protID|pepID-identifier.
Definition IdentifierMSRunMapper.h:51
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Representation of a protein hit.
Definition ProteinHit.h:35
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition ProteinIdentification.h:73
void setIntegerDataArrays(const IntegerDataArrays &ida)
Sets the integer meta data arrays.
const StringDataArray & getStringDataArrayByName(const std::string &name) const
Returns a const reference to the first string meta data array with the given name.
Definition ProteinIdentification.h:197
const IntegerDataArrays & getIntegerDataArrays() const
Returns a const reference to the integer meta data arrays.
double probability
Probability of this group.
Definition ProteinIdentification.h:86
bool operator<(const ProteinGroup &rhs) const
std::vector< StringDataArray > StringDataArrays
Definition ProteinIdentification.h:80
OpenMS::DataArrays::FloatDataArray FloatDataArray
Float data array vector type.
Definition ProteinIdentification.h:76
IntegerDataArray & getIntegerDataArrayByName(const std::string &name)
Returns a mutable reference to the first integer meta data array with the given name.
Definition ProteinIdentification.h:149
const StringDataArrays & getStringDataArrays() const
Returns a const reference to the string meta data arrays.
std::vector< std::string > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition ProteinIdentification.h:89
StringDataArrays string_data_arrays_
std::string data arrays
Definition ProteinIdentification.h:225
const FloatDataArray & getFloatDataArrayByName(const std::string &name) const
Returns a const reference to the first float meta data array with the given name.
Definition ProteinIdentification.h:209
FloatDataArrays & getFloatDataArrays()
Returns a mutable reference to the float meta data arrays.
Definition ProteinIdentification.h:122
IntegerDataArrays integer_data_arrays_
Integer data arrays.
Definition ProteinIdentification.h:228
OpenMS::DataArrays::StringDataArray StringDataArray
std::string data array vector type
Definition ProteinIdentification.h:79
IntegerDataArrays & getIntegerDataArrays()
Returns a mutable reference to the integer meta data arrays.
FloatDataArrays float_data_arrays_
Float data arrays.
Definition ProteinIdentification.h:222
void setStringDataArrays(const StringDataArrays &sda)
Sets the string meta data arrays.
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
FloatDataArray & getFloatDataArrayByName(const std::string &name)
Returns a mutable reference to the first float meta data array with the given name.
Definition ProteinIdentification.h:173
const IntegerDataArray & getIntegerDataArrayByName(const std::string &name) const
Returns a const reference to the first integer meta data array with the given name.
Definition ProteinIdentification.h:185
std::vector< FloatDataArray > FloatDataArrays
Definition ProteinIdentification.h:77
OpenMS::DataArrays::IntegerDataArray IntegerDataArray
Integer data array vector type.
Definition ProteinIdentification.h:82
bool operator==(const ProteinGroup &rhs) const
Equality operator.
std::vector< IntegerDataArray > IntegerDataArrays
Definition ProteinIdentification.h:83
StringDataArray & getStringDataArrayByName(const std::string &name)
Returns a mutable reference to the first string meta data array with the given name.
Definition ProteinIdentification.h:161
void setFloatDataArrays(const FloatDataArrays &fda)
Sets the float meta data arrays.
StringDataArrays & getStringDataArrays()
Returns a mutable reference to the string meta data arrays.
Representation of a protein identification run.
Definition ProteinIdentification.h:55
ProteinIdentification(const ProteinIdentification &)=default
Copy constructor.
void insertProteinGroup(const ProteinGroup &group)
Appends a new protein group.
const std::vector< ProteinGroup > & getProteinGroups() const
Returns the protein groups.
void insertHit(const ProteinHit &input)
Appends a protein hit.
const std::string getInferenceEngineVersion() const
Returns the search engine version.
bool hasInferenceEngineAsSearchEngine() const
Checks if the search engine name matches an inference engine known to OpenMS.
const std::string & getSearchEngine() const
Returns the type of search engine used.
void setHigherScoreBetter(bool higher_is_better)
Sets the orientation of the score (is higher better?)
Size nrPrimaryMSRunPaths(bool raw=false) const
get the number of primary MS runs involve in this ID run
double getSignificanceThreshold() const
Returns the protein significance threshold value.
void setIdentifier(const std::string &id)
Sets the identifier.
std::vector< ProteinGroup > & getProteinGroups()
Returns the protein groups (mutable)
void computeModifications(const PeptideIdentificationList &pep_ids, const StringList &skip_modifications)
Compute the modifications of all ProteinHits given PeptideHits.
void sort()
Sorts the protein hits according to their score.
void insertIndistinguishableProteins(const ProteinGroup &group)
Appends new indistinguishable proteins.
const std::vector< ProteinGroup > & getIndistinguishableProteins() const
Returns the indistinguishable proteins.
void setSearchEngineVersion(const std::string &search_engine_version)
Sets the search engine version.
std::string search_engine_
Definition ProteinIdentification.h:478
PeakMassType
Peak mass type.
Definition ProteinIdentification.h:233
ProteinIdentification()
Default constructor.
ProteinIdentification & operator=(ProteinIdentification &&)=default
Move assignment operator.
void setSignificanceThreshold(double value)
Sets the protein significance threshold value.
std::vector< ProteinHit > protein_hits_
Definition ProteinIdentification.h:488
std::string search_engine_version_
Definition ProteinIdentification.h:479
const SearchParameters & getSearchParameters() const
Returns the search parameters.
std::vector< ProteinGroup > & getIndistinguishableProteins()
Returns the indistinguishable proteins (mutable)
bool hasInferenceData() const
void setInferenceEngine(const std::string &search_engine)
Sets the inference engine type.
std::vector< ProteinHit > & getHits()
Returns the protein hits (mutable)
const std::string & getScoreType() const
Returns the protein score type.
bool operator!=(const ProteinIdentification &rhs) const
Inequality operator.
const std::string & getIdentifier() const
Returns the identifier.
ProteinHit HitType
Hit type definition.
Definition ProteinIdentification.h:58
void setHits(const std::vector< ProteinHit > &hits)
Sets the protein hits.
void computeCoverage(const ConsensusMap &cmap, bool use_unassigned_ids)
void getPrimaryMSRunPath(StringList &output, bool raw=false) const
double protein_significance_threshold_
Definition ProteinIdentification.h:492
SearchParameters search_parameters_
Definition ProteinIdentification.h:480
void fillIndistinguishableGroupsWithSingletons()
Appends singleton groups (with the current score) for every yet ungrouped protein hit.
std::vector< std::pair< std::string, std::string > > getSearchEngineSettingsAsPairs(const std::string &se="") const
void setScoreType(const std::string &type)
Sets the protein score type.
std::string protein_score_type_
Definition ProteinIdentification.h:486
static StringList getAllNamesOfPeakMassType()
returns all peak mass type names known to OpenMS
void setSearchEngine(const std::string &search_engine)
Sets the search engine type.
bool higher_score_better_
Definition ProteinIdentification.h:487
void setPrimaryMSRunPath(const StringList &s, bool raw=false)
void computeModifications(const ConsensusMap &cmap, const StringList &skip_modifications, bool use_unassigned_ids)
void setInferenceEngineVersion(const std::string &inference_engine_version)
Sets the search engine version.
DateTime date_
Definition ProteinIdentification.h:481
bool peptideIDsMergeable(const ProteinIdentification &id_run, const std::string &experiment_type) const
const std::string getInferenceEngine() const
Returns the type of search engine used.
void copyMetaDataOnly(const ProteinIdentification &)
Copies only metadata (no protein hits or protein groups)
void addPrimaryMSRunPath(const std::string &s, bool raw=false)
const std::vector< ProteinHit > & getHits() const
Returns the protein hits.
void fillModMapping_(const PeptideIdentificationList &pep_ids, const StringList &skip_modifications, std::unordered_map< std::string, std::set< std::pair< Size, ResidueModification > > > &prot2mod) const
bool isHigherScoreBetter() const
Returns true if a higher score represents a better score.
const std::string & getSearchEngineVersion() const
Returns the search engine version.
void computeCoverage(const PeptideIdentificationList &pep_ids)
Compute the coverage (in percent) of all ProteinHits given PeptideHits.
ProteinIdentification(ProteinIdentification &&)=default
Move constructor.
std::vector< ProteinGroup > indistinguishable_proteins_
Indistinguishable proteins: accessions[0] is "group leader", probability is meaningless.
Definition ProteinIdentification.h:491
const std::string getOriginalSearchEngineName() const
Return the type of search engine that was first applied (e.g., before percolator or consensusID) or "...
void computeCoverageFromEvidenceMapping_(const std::unordered_map< std::string, std::set< PeptideEvidence > > &map)
const DateTime & getDateTime() const
Returns the date of the protein identification run.
std::vector< ProteinGroup > protein_groups_
Definition ProteinIdentification.h:489
std::string id_
Definition ProteinIdentification.h:477
std::vector< ProteinHit >::iterator findHit(const std::string &accession)
Finds a protein hit by accession (returns past-the-end iterator if not found)
virtual ~ProteinIdentification()
Destructor.
void setDateTime(const DateTime &date)
Sets the date of the protein identification run.
void setSearchParameters(SearchParameters &&search_parameters)
Sets the search parameters (move)
bool operator==(const ProteinIdentification &rhs) const
Equality operator.
void insertHit(ProteinHit &&input)
Appends a protein hit.
void setPrimaryMSRunPath(const StringList &s, MSExperiment &e)
set the file path to the primary MS run but try to use the mzML annotated in the MSExperiment.
ProteinIdentification & operator=(const ProteinIdentification &)=default
Assignment operator.
void addPrimaryMSRunPath(const StringList &s, bool raw=false)
SearchParameters & getSearchParameters()
Returns the search parameters (mutable)
void setSearchParameters(const SearchParameters &search_parameters)
Sets the search parameters.
void fillEvidenceMapping_(std::unordered_map< std::string, std::set< PeptideEvidence > > &map_acc_2_evidence, const PeptideIdentificationList &pep_ids) const
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::size_t hash_int(T value) noexcept
Hash for an integer type.
Definition HashUtils.h:107
void hash_combine(std::size_t &seed, std::size_t value) noexcept
Combine a hash value with additional data using golden ratio mixing.
Definition HashUtils.h:87
std::size_t hash_float(T value) noexcept
Hash for a floating point type (float or double).
Definition HashUtils.h:142
std::size_t fnv1a_hash_string(const std::string &s) noexcept
FNV-1a hash for a string.
Definition HashUtils.h:70
Search parameters of the DB search.
Definition ProteinIdentification.h:248
bool operator!=(const SearchParameters &rhs) const
~SearchParameters()=default
Destructor.
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition ProteinIdentification.h:258
SearchParameters & operator=(SearchParameters &&) &=default
Move assignment operator.
EnzymaticDigestion::Specificity enzyme_term_specificity
The number of required cutting-rule matching termini during search (none=0, semi=1,...
Definition ProteinIdentification.h:262
bool mergeable(const ProteinIdentification::SearchParameters &sp, const std::string &experiment_type) const
std::vector< std::string > fixed_modifications
Used fixed modifications.
Definition ProteinIdentification.h:254
SearchParameters(const SearchParameters &)=default
Copy constructor.
std::string taxonomy
The taxonomy restriction.
Definition ProteinIdentification.h:251
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition ProteinIdentification.h:261
bool operator==(const SearchParameters &rhs) const
std::string charges
The allowed charges for the search.
Definition ProteinIdentification.h:252
SearchParameters & operator=(const SearchParameters &)=default
Assignment operator.
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition ProteinIdentification.h:257
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition ProteinIdentification.h:260
SearchParameters(SearchParameters &&)=default
Move constructor.
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition ProteinIdentification.h:259
int getChargeValue_(std::string &charge_str) const
std::vector< std::string > variable_modifications
Allowed variable modifications.
Definition ProteinIdentification.h:255
std::pair< int, int > getChargeRange() const
returns the charge range from the search engine settings as a pair of ints
PeakMassType mass_type
Mass type of the peaks.
Definition ProteinIdentification.h:253
std::string db
The used database.
Definition ProteinIdentification.h:249
std::string db_version
The database version.
Definition ProteinIdentification.h:250
UInt missed_cleavages
The number of allowed missed cleavages.
Definition ProteinIdentification.h:256
std::size_t operator()(const OpenMS::ProteinIdentification &pi) const noexcept
Definition ProteinIdentification.h:564
std::size_t operator()(const OpenMS::ProteinIdentification::ProteinGroup &pg) const noexcept
Definition ProteinIdentification.h:514
std::size_t operator()(const OpenMS::ProteinIdentification::SearchParameters &sp) const noexcept
Definition ProteinIdentification.h:529