31 class PeptideIdentification;
32 class PeptideEvidence;
125 return float_data_arrays_;
152 auto it = std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
154 if (it == integer_data_arrays_.end())
164 auto it = std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
166 if (it == string_data_arrays_.end())
176 auto it = std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
177 [&name](
const FloatDataArray& da) { return da.getName() == name; } );
178 if (it == float_data_arrays_.end())
188 auto it = std::find_if(integer_data_arrays_.begin(), integer_data_arrays_.end(),
190 if (it == integer_data_arrays_.end())
200 auto it = std::find_if(string_data_arrays_.begin(), string_data_arrays_.end(),
202 if (it == string_data_arrays_.end())
212 auto it = std::find_if(float_data_arrays_.begin(), float_data_arrays_.end(),
213 [&name](
const FloatDataArray& da) { return da.getName() == name; } );
214 if (it == float_data_arrays_.end())
241 static const std::string NamesOfPeakMassType[
static_cast<size_t>(PeakMassType::SIZE_OF_PEAKMASSTYPE)];
320 const std::vector<ProteinHit>&
getHits()
const;
333 void setHits(
const std::vector<ProteinHit>& hits);
392 bool use_unassigned_ids);
502 std::unordered_map<
String, std::set<std::pair<Size, ResidueModification>>>& prot2mod)
const;
513 struct hash<
OpenMS::ProteinIdentification::ProteinGroup>
518 for (
const auto& acc : pg.accessions)
528 struct hash<
OpenMS::ProteinIdentification::SearchParameters>
538 for (
const auto& mod : sp.fixed_modifications)
543 for (
const auto& mod : sp.variable_modifications)
563 struct hash<
OpenMS::ProteinIdentification>
572 OpenMS::hash_combine(seed, std::hash<OpenMS::ProteinIdentification::SearchParameters>{}(pi.getSearchParameters()));
576 for (
const auto& hit : pi.getHits())
585 for (
const auto& group : pi.getProteinGroups())
590 for (
const auto& group : pi.getIndistinguishableProteins())
A container for consensus elements.
Definition ConsensusMap.h:69
Float data array class.
Definition DataArrays.h:25
Integer data array class.
Definition DataArrays.h:75
String data array class.
Definition DataArrays.h:125
DateTime Class.
Definition DateTime.h:35
Representation of a digestion enzyme for proteins (protease)
Definition DigestionEnzymeProtein.h:24
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Element could not be found exception.
Definition Exception.h:654
Two-way mapping from ms-run-path to protID|pepID-identifier.
Definition IdentifierMSRunMapper.h:51
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Representation of a protein hit.
Definition ProteinHit.h:35
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition ProteinIdentification.h:74
void setIntegerDataArrays(const IntegerDataArrays &ida)
Sets the integer meta data arrays.
FloatDataArray & getFloatDataArrayByName(const String &name)
Returns a mutable reference to the first float meta data array with the given name.
Definition ProteinIdentification.h:174
const IntegerDataArrays & getIntegerDataArrays() const
Returns a const reference to the integer meta data arrays.
double probability
Probability of this group.
Definition ProteinIdentification.h:87
bool operator<(const ProteinGroup &rhs) const
const StringDataArray & getStringDataArrayByName(const String &name) const
Returns a const reference to the first string meta data array with the given name.
Definition ProteinIdentification.h:198
std::vector< StringDataArray > StringDataArrays
Definition ProteinIdentification.h:81
OpenMS::DataArrays::FloatDataArray FloatDataArray
Float data array vector type.
Definition ProteinIdentification.h:77
const StringDataArrays & getStringDataArrays() const
Returns a const reference to the string meta data arrays.
std::vector< String > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition ProteinIdentification.h:90
StringDataArrays string_data_arrays_
String data arrays.
Definition ProteinIdentification.h:226
const IntegerDataArray & getIntegerDataArrayByName(const String &name) const
Returns a const reference to the first integer meta data array with the given name.
Definition ProteinIdentification.h:186
const FloatDataArray & getFloatDataArrayByName(const String &name) const
Returns a const reference to the first float meta data array with the given name.
Definition ProteinIdentification.h:210
FloatDataArrays & getFloatDataArrays()
Returns a mutable reference to the float meta data arrays.
Definition ProteinIdentification.h:123
IntegerDataArrays integer_data_arrays_
Integer data arrays.
Definition ProteinIdentification.h:229
OpenMS::DataArrays::StringDataArray StringDataArray
String data array vector type.
Definition ProteinIdentification.h:80
IntegerDataArrays & getIntegerDataArrays()
Returns a mutable reference to the integer meta data arrays.
FloatDataArrays float_data_arrays_
Float data arrays.
Definition ProteinIdentification.h:223
StringDataArray & getStringDataArrayByName(const String &name)
Returns a mutable reference to the first string meta data array with the given name.
Definition ProteinIdentification.h:162
void setStringDataArrays(const StringDataArrays &sda)
Sets the string meta data arrays.
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
std::vector< FloatDataArray > FloatDataArrays
Definition ProteinIdentification.h:78
IntegerDataArray & getIntegerDataArrayByName(const String &name)
Returns a mutable reference to the first integer meta data array with the given name.
Definition ProteinIdentification.h:150
OpenMS::DataArrays::IntegerDataArray IntegerDataArray
Integer data array vector type.
Definition ProteinIdentification.h:83
bool operator==(const ProteinGroup &rhs) const
Equality operator.
std::vector< IntegerDataArray > IntegerDataArrays
Definition ProteinIdentification.h:84
void setFloatDataArrays(const FloatDataArrays &fda)
Sets the float meta data arrays.
StringDataArrays & getStringDataArrays()
Returns a mutable reference to the string meta data arrays.
Representation of a protein identification run.
Definition ProteinIdentification.h:56
void setIdentifier(const String &id)
Sets the identifier.
ProteinIdentification(const ProteinIdentification &)=default
Copy constructor.
const String & getIdentifier() const
Returns the identifier.
void insertProteinGroup(const ProteinGroup &group)
Appends a new protein group.
const std::vector< ProteinGroup > & getProteinGroups() const
Returns the protein groups.
void insertHit(const ProteinHit &input)
Appends a protein hit.
const String getInferenceEngineVersion() const
Returns the search engine version.
void setSearchEngine(const String &search_engine)
Sets the search engine type.
void addPrimaryMSRunPath(const String &s, bool raw=false)
const String & getSearchEngine() const
Returns the type of search engine used.
const String getOriginalSearchEngineName() const
Return the type of search engine that was first applied (e.g., before percolator or consensusID) or "...
bool hasInferenceEngineAsSearchEngine() const
Checks if the search engine name matches an inference engine known to OpenMS.
const String & getScoreType() const
Returns the protein score type.
void setHigherScoreBetter(bool higher_is_better)
Sets the orientation of the score (is higher better?)
Size nrPrimaryMSRunPaths(bool raw=false) const
get the number of primary MS runs involve in this ID run
double getSignificanceThreshold() const
Returns the protein significance threshold value.
const String getInferenceEngine() const
Returns the type of search engine used.
std::vector< ProteinGroup > & getProteinGroups()
Returns the protein groups (mutable)
void computeModifications(const PeptideIdentificationList &pep_ids, const StringList &skip_modifications)
Compute the modifications of all ProteinHits given PeptideHits.
void sort()
Sorts the protein hits according to their score.
void insertIndistinguishableProteins(const ProteinGroup &group)
Appends new indistinguishable proteins.
const std::vector< ProteinGroup > & getIndistinguishableProteins() const
Returns the indistinguishable proteins.
String search_engine_
Definition ProteinIdentification.h:479
PeakMassType
Peak mass type.
Definition ProteinIdentification.h:234
ProteinIdentification()
Default constructor.
ProteinIdentification & operator=(ProteinIdentification &&)=default
Move assignment operator.
void setSignificanceThreshold(double value)
Sets the protein significance threshold value.
void fillModMapping_(const PeptideIdentificationList &pep_ids, const StringList &skip_modifications, std::unordered_map< String, std::set< std::pair< Size, ResidueModification > > > &prot2mod) const
std::vector< ProteinHit > protein_hits_
Definition ProteinIdentification.h:489
const SearchParameters & getSearchParameters() const
Returns the search parameters.
std::vector< ProteinGroup > & getIndistinguishableProteins()
Returns the indistinguishable proteins (mutable)
bool hasInferenceData() const
std::vector< ProteinHit > & getHits()
Returns the protein hits (mutable)
void setInferenceEngineVersion(const String &inference_engine_version)
Sets the search engine version.
bool operator!=(const ProteinIdentification &rhs) const
Inequality operator.
std::vector< std::pair< String, String > > getSearchEngineSettingsAsPairs(const String &se="") const
ProteinHit HitType
Hit type definition.
Definition ProteinIdentification.h:59
String search_engine_version_
Definition ProteinIdentification.h:480
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
void setHits(const std::vector< ProteinHit > &hits)
Sets the protein hits.
void computeCoverage(const ConsensusMap &cmap, bool use_unassigned_ids)
void getPrimaryMSRunPath(StringList &output, bool raw=false) const
double protein_significance_threshold_
Definition ProteinIdentification.h:493
SearchParameters search_parameters_
Definition ProteinIdentification.h:481
void fillIndistinguishableGroupsWithSingletons()
Appends singleton groups (with the current score) for every yet ungrouped protein hit.
void setScoreType(const String &type)
Sets the protein score type.
String protein_score_type_
Definition ProteinIdentification.h:487
static StringList getAllNamesOfPeakMassType()
returns all peak mass type names known to OpenMS
std::vector< ProteinHit >::iterator findHit(const String &accession)
Finds a protein hit by accession (returns past-the-end iterator if not found)
bool higher_score_better_
Definition ProteinIdentification.h:488
void fillEvidenceMapping_(std::unordered_map< String, std::set< PeptideEvidence > > &map_acc_2_evidence, const PeptideIdentificationList &pep_ids) const
void setPrimaryMSRunPath(const StringList &s, bool raw=false)
void computeModifications(const ConsensusMap &cmap, const StringList &skip_modifications, bool use_unassigned_ids)
DateTime date_
Definition ProteinIdentification.h:482
void setInferenceEngine(const String &search_engine)
Sets the inference engine type.
void copyMetaDataOnly(const ProteinIdentification &)
Copies only metadata (no protein hits or protein groups)
const std::vector< ProteinHit > & getHits() const
Returns the protein hits.
bool peptideIDsMergeable(const ProteinIdentification &id_run, const String &experiment_type) const
bool isHigherScoreBetter() const
Returns true if a higher score represents a better score.
void computeCoverage(const PeptideIdentificationList &pep_ids)
Compute the coverage (in percent) of all ProteinHits given PeptideHits.
ProteinIdentification(ProteinIdentification &&)=default
Move constructor.
std::vector< ProteinGroup > indistinguishable_proteins_
Indistinguishable proteins: accessions[0] is "group leader", probability is meaningless.
Definition ProteinIdentification.h:492
const DateTime & getDateTime() const
Returns the date of the protein identification run.
std::vector< ProteinGroup > protein_groups_
Definition ProteinIdentification.h:490
virtual ~ProteinIdentification()
Destructor.
void setDateTime(const DateTime &date)
Sets the date of the protein identification run.
void setSearchParameters(SearchParameters &&search_parameters)
Sets the search parameters (move)
String id_
Definition ProteinIdentification.h:478
bool operator==(const ProteinIdentification &rhs) const
Equality operator.
void insertHit(ProteinHit &&input)
Appends a protein hit.
const String & getSearchEngineVersion() const
Returns the search engine version.
void setPrimaryMSRunPath(const StringList &s, MSExperiment &e)
set the file path to the primary MS run but try to use the mzML annotated in the MSExperiment.
ProteinIdentification & operator=(const ProteinIdentification &)=default
Assignment operator.
void computeCoverageFromEvidenceMapping_(const std::unordered_map< String, std::set< PeptideEvidence > > &map)
void addPrimaryMSRunPath(const StringList &s, bool raw=false)
SearchParameters & getSearchParameters()
Returns the search parameters (mutable)
void setSearchParameters(const SearchParameters &search_parameters)
Sets the search parameters.
A more convenient string class.
Definition String.h:34
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::size_t hash_int(T value) noexcept
Hash for an integer type.
Definition HashUtils.h:107
void hash_combine(std::size_t &seed, std::size_t value) noexcept
Combine a hash value with additional data using golden ratio mixing.
Definition HashUtils.h:87
std::size_t hash_float(T value) noexcept
Hash for a floating point type (float or double).
Definition HashUtils.h:142
std::size_t fnv1a_hash_string(const std::string &s) noexcept
FNV-1a hash for a string.
Definition HashUtils.h:70
Search parameters of the DB search.
Definition ProteinIdentification.h:249
String db_version
The database version.
Definition ProteinIdentification.h:251
bool operator!=(const SearchParameters &rhs) const
~SearchParameters()=default
Destructor.
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition ProteinIdentification.h:259
SearchParameters & operator=(SearchParameters &&) &=default
Move assignment operator.
EnzymaticDigestion::Specificity enzyme_term_specificity
The number of required cutting-rule matching termini during search (none=0, semi=1,...
Definition ProteinIdentification.h:263
String taxonomy
The taxonomy restriction.
Definition ProteinIdentification.h:252
std::vector< String > fixed_modifications
Used fixed modifications.
Definition ProteinIdentification.h:255
SearchParameters(const SearchParameters &)=default
Copy constructor.
String charges
The allowed charges for the search.
Definition ProteinIdentification.h:253
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition ProteinIdentification.h:262
bool operator==(const SearchParameters &rhs) const
SearchParameters & operator=(const SearchParameters &)=default
Assignment operator.
bool mergeable(const ProteinIdentification::SearchParameters &sp, const String &experiment_type) const
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition ProteinIdentification.h:258
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition ProteinIdentification.h:261
SearchParameters(SearchParameters &&)=default
Move constructor.
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition ProteinIdentification.h:260
std::pair< int, int > getChargeRange() const
returns the charge range from the search engine settings as a pair of ints
std::vector< String > variable_modifications
Allowed variable modifications.
Definition ProteinIdentification.h:256
PeakMassType mass_type
Mass type of the peaks.
Definition ProteinIdentification.h:254
String db
The used database.
Definition ProteinIdentification.h:250
int getChargeValue_(String &charge_str) const
UInt missed_cleavages
The number of allowed missed cleavages.
Definition ProteinIdentification.h:257
std::size_t operator()(const OpenMS::ProteinIdentification &pi) const noexcept
Definition ProteinIdentification.h:565
std::size_t operator()(const OpenMS::ProteinIdentification::ProteinGroup &pg) const noexcept
Definition ProteinIdentification.h:515
std::size_t operator()(const OpenMS::ProteinIdentification::SearchParameters &sp) const noexcept
Definition ProteinIdentification.h:530