OpenMS
Loading...
Searching...
No Matches
AccurateMassSearchEngine.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Erhan Kenar, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
16#include <OpenMS/FORMAT/MzTab.h>
22#include <OpenMS/SYSTEM/File.h>
24
25
26#include <iosfwd>
27#include <vector>
28
29namespace OpenMS
30{
106 class OPENMS_DLLAPI AccurateMassSearchResult
107 {
108 public:
112
115
119
123
125 double getObservedMZ() const;
126
131 void setObservedMZ(const double& mz);
132
134 double getCalculatedMZ() const;
135
140 void setCalculatedMZ(const double& mz);
141
145 double getQueryMass() const;
146
151 void setQueryMass(const double& mass);
152
154 double getFoundMass() const;
155
160 void setFoundMass(const double& mass);
161
164 Int getCharge() const;
165
170 void setCharge(const Int& ch);
171
174 double getMZErrorPPM() const;
175
180 void setMZErrorPPM(const double ppm);
181
183 double getObservedRT() const;
184
189 void setObservedRT(const double& rt);
190
193 double getObservedIntensity() const;
194
199 void setObservedIntensity(const double& intensity);
200
203 std::vector<double> getIndividualIntensities() const;
204
209 void setIndividualIntensities(const std::vector<double>& indiv_ints);
210
215
220 void setMatchingIndex(const Size& idx);
221
225
230 void setSourceFeatureIndex(const Size& idx);
231
234 const std::string& getFoundAdduct() const;
235
240 void setFoundAdduct(const std::string& add);
241
243 const std::string& getFormulaString() const;
244
249 void setEmpiricalFormula(const std::string& ep);
250
254 const std::vector<std::string>& getMatchingHMDBids() const;
255
260 void setMatchingHMDBids(const std::vector<std::string>& ids);
261
265 const std::vector<double>& getMasstraceIntensities() const;
266
271 void setMasstraceIntensities(const std::vector<double>& intensities);
272
276 double getIsotopesSimScore() const;
277
282 void setIsotopesSimScore(const double& score);
283
286 friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
287
288private:
293 double db_mass_;
298 std::vector<double> individual_intensities_;
301
302 std::string found_adduct_;
303 std::string empirical_formula_;
304 std::vector<std::string> matching_hmdb_ids_;
305
306 std::vector<double> mass_trace_intensities_;
308 };
309
310 OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
311
343 class OPENMS_DLLAPI AccurateMassSearchEngine :
344 public DefaultParamHandler,
345 public ProgressLogger
346 {
347public:
348
350 static constexpr char search_engine_identifier[] = "AccurateMassSearchEngine";
351
354
357
363 void queryByMZ(const double& observed_mz, const Int& observed_charge, const std::string& ion_mode, std::vector<AccurateMassSearchResult>& results, const EmpiricalFormula& observed_adduct = EmpiricalFormula()) const;
364 void queryByFeature(const Feature& feature, const Size& feature_index, const std::string& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
365 void queryByConsensusFeature(const ConsensusFeature& cfeat, const Size& cf_index, const Size& number_of_maps, const std::string& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
366
369 void run(FeatureMap&, MzTab&) const;
370
371 void run(FeatureMap&, MzTabM&) const;
372
376 void run(ConsensusMap&, MzTab&) const;
377
379 void init();
380
381protected:
382 void updateMembers_() override;
383
384private:
386
389 template <typename MAPTYPE> std::string resolveAutoMode_(const MAPTYPE& map) const
390 {
391 std::string ion_mode_internal;
392 std::string ion_mode_detect_msg;
393 if (map.size() > 0)
394 {
395 if (map[0].metaValueExists("scan_polarity"))
396 {
397 StringList pols = ListUtils::create<std::string>(StringUtils::toStr(map[0].getMetaValue("scan_polarity")), ';');
398 if (pols.size() == 1 && !pols[0].empty())
399 {
400 StringUtils::toLower(pols[0]);
401 if (pols[0] == "positive" || pols[0] == "negative")
402 {
403 ion_mode_internal = pols[0];
404 OPENMS_LOG_INFO << "Setting auto ion-mode to '" << ion_mode_internal << "' for file " << File::basename(map.getLoadedFilePath()) << std::endl;
405 }
406 else ion_mode_detect_msg ="Meta value 'scan_polarity' does not contain unknown ion mode" + StringUtils::toStr(map[0].getMetaValue("scan_polarity"));
407 }
408 else
409 {
410 ion_mode_detect_msg ="ambiguous ion mode: " + StringUtils::toStr(map[0].getMetaValue("scan_polarity"));
411 }
412 }
413 else
414 {
415 ion_mode_detect_msg =std::string("Meta value 'scan_polarity' not found in (Consensus-)Feature map");
416 }
417 }
418 else
419 { // do nothing, since map is
420 OPENMS_LOG_INFO << "Meta value 'scan_polarity' cannot be determined since (Consensus-)Feature map is empty!" << std::endl;
421 }
422
423 if (!ion_mode_detect_msg.empty())
424 {
425 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,std::string("Auto ionization mode could not resolve ion mode of data (") + ion_mode_detect_msg + "!");
426 }
427
428 return ion_mode_internal;
429 }
430
433 void parseAdductsFile_(const std::string& filename, std::vector<AdductInfo>& result);
434 void searchMass_(double neutral_query_mass, double diff_mass, std::pair<Size, Size>& hit_indices) const;
435
437 void annotate_(const std::vector<AccurateMassSearchResult>&, BaseFeature&) const;
438
440 std::vector<AccurateMassSearchResult> extractQueryResults_(const Feature& feature, const Size& feature_index, const std::string& ion_mode_internal, Size& dummy_count) const;
441
445 const std::vector<AccurateMassSearchResult>& amr,
446 const IdentificationData::InputFileRef& file_ref,
447 const IdentificationData::ScoreTypeRef& mass_error_ppm_score_ref,
448 const IdentificationData::ScoreTypeRef& mass_error_Da_score_ref,
450 BaseFeature& f) const;
451
454 double computeCosineSim_(const std::vector<double>& x, const std::vector<double>& y) const;
455
456 double computeIsotopePatternSimilarity_(const Feature& feat, const EmpiricalFormula& form) const;
457
458 typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable;
459
460 void exportMzTab_(const QueryResultsTable& overall_results, const Size number_of_maps, MzTab& mztab_out, const std::vector<std::string>& file_locations) const;
461
462 void exportMzTabM_(const FeatureMap& fmap, MzTabM& mztabm_out) const;
463
465 typedef std::vector<std::vector<std::string> > MassIDMapping;
466 typedef std::map<std::string, std::vector<std::string> > HMDBPropsMapping;
467
469 {
470 double mass;
471 std::vector<std::string> massIDs;
472 std::string formula;
473 };
474 std::vector<MappingEntry_> mass_mappings_;
475
476 struct CompareEntryAndMass_ // defined here to allow for inlining by compiler
477 {
478 double asMass(const MappingEntry_& v) const
479 {
480 return v.mass;
481 }
482
483 double asMass(double t) const
484 {
485 return t;
486 }
487
488 template <typename T1, typename T2>
489 bool operator()(T1 const& t1, T2 const& t2) const
490 {
491 return asMass(t1) < asMass(t2);
492 }
493
494 };
495
497
499
500 bool legacyID_ = true;
501
504 std::string mass_error_unit_;
505 std::string ion_mode_;
507
510
513
514 std::vector<AdductInfo> pos_adducts_;
515 std::vector<AdductInfo> neg_adducts_;
516
517 std::string database_name_;
518 std::string database_version_;
520
522 };
523
524}
#define OPENMS_LOG_INFO
Macro for information/status messages.
Definition LogStream.h:587
An algorithm to search for exact mass matches from a spectrum against a database (e....
Definition AccurateMassSearchEngine.h:346
void run(FeatureMap &, MzTabM &) const
void init()
parse database and adduct files
void parseStructMappingFile_(const StringList &)
void queryByConsensusFeature(const ConsensusFeature &cfeat, const Size &cf_index, const Size &number_of_maps, const std::string &ion_mode, std::vector< AccurateMassSearchResult > &results) const
bool keep_unidentified_masses_
Definition AccurateMassSearchEngine.h:521
std::vector< AccurateMassSearchResult > extractQueryResults_(const Feature &feature, const Size &feature_index, const std::string &ion_mode_internal, Size &dummy_count) const
Extract query results from feature.
double mass
Definition AccurateMassSearchEngine.h:470
AccurateMassSearchEngine()
Default constructor.
std::string mass_error_unit_
Definition AccurateMassSearchEngine.h:504
StringList db_struct_file_
Definition AccurateMassSearchEngine.h:512
void exportMzTabM_(const FeatureMap &fmap, MzTabM &mztabm_out) const
std::map< std::string, std::vector< std::string > > HMDBPropsMapping
Definition AccurateMassSearchEngine.h:466
~AccurateMassSearchEngine() override
Default destructor.
void exportMzTab_(const QueryResultsTable &overall_results, const Size number_of_maps, MzTab &mztab_out, const std::vector< std::string > &file_locations) const
void addMatchesToID_(IdentificationData &id, const std::vector< AccurateMassSearchResult > &amr, const IdentificationData::InputFileRef &file_ref, const IdentificationData::ScoreTypeRef &mass_error_ppm_score_ref, const IdentificationData::ScoreTypeRef &mass_error_Da_score_ref, const IdentificationData::ProcessingStepRef &step_ref, BaseFeature &f) const
Add resulting matches to IdentificationData.
double mass_error_value_
parameter stuff
Definition AccurateMassSearchEngine.h:503
std::string database_location_
Definition AccurateMassSearchEngine.h:519
void run(ConsensusMap &, MzTab &) const
void searchMass_(double neutral_query_mass, double diff_mass, std::pair< Size, Size > &hit_indices) const
std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
Definition AccurateMassSearchEngine.h:458
void annotate_(const std::vector< AccurateMassSearchResult > &, BaseFeature &) const
Add search results to a Consensus/Feature.
bool iso_similarity_
Definition AccurateMassSearchEngine.h:506
std::vector< AdductInfo > pos_adducts_
Definition AccurateMassSearchEngine.h:514
HMDBPropsMapping hmdb_properties_mapping_
Definition AccurateMassSearchEngine.h:496
std::string resolveAutoMode_(const MAPTYPE &map) const
private member functions
Definition AccurateMassSearchEngine.h:389
void queryByMZ(const double &observed_mz, const Int &observed_charge, const std::string &ion_mode, std::vector< AccurateMassSearchResult > &results, const EmpiricalFormula &observed_adduct=EmpiricalFormula()) const
search for a specific observed mass by enumerating all possible adducts and search M+X against databa...
std::string ion_mode_
Definition AccurateMassSearchEngine.h:505
double computeIsotopePatternSimilarity_(const Feature &feat, const EmpiricalFormula &form) const
void parseMappingFile_(const StringList &)
double computeCosineSim_(const std::vector< double > &x, const std::vector< double > &y) const
std::string database_version_
Definition AccurateMassSearchEngine.h:518
std::string pos_adducts_fname_
Definition AccurateMassSearchEngine.h:508
void parseAdductsFile_(const std::string &filename, std::vector< AdductInfo > &result)
bool is_initialized_
true if init_() was called without any subsequent param changes
Definition AccurateMassSearchEngine.h:498
StringList db_mapping_file_
Definition AccurateMassSearchEngine.h:511
std::vector< AdductInfo > neg_adducts_
Definition AccurateMassSearchEngine.h:515
std::string neg_adducts_fname_
Definition AccurateMassSearchEngine.h:509
std::string formula
Definition AccurateMassSearchEngine.h:472
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void run(FeatureMap &, MzTab &) const
void queryByFeature(const Feature &feature, const Size &feature_index, const std::string &ion_mode, std::vector< AccurateMassSearchResult > &results) const
std::vector< std::string > massIDs
Definition AccurateMassSearchEngine.h:471
std::vector< MappingEntry_ > mass_mappings_
Definition AccurateMassSearchEngine.h:474
std::vector< std::vector< std::string > > MassIDMapping
private member variables
Definition AccurateMassSearchEngine.h:465
std::string database_name_
Definition AccurateMassSearchEngine.h:517
Definition AccurateMassSearchEngine.h:469
One small-molecule hit produced by AccurateMassSearchEngine.
Definition AccurateMassSearchEngine.h:107
friend std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
Diagnostic stream output of all fields (not a parseable serialization).
AccurateMassSearchResult(const AccurateMassSearchResult &other)
Copy constructor.
Size getMatchingIndex() const
Index of this hit in the engine's internal candidate list for the source feature. Useful when reconst...
void setObservedIntensity(const double &intensity)
Set the intensity of the source feature.
std::vector< double > mass_trace_intensities_
per-isotopologue intensities from the feature
Definition AccurateMassSearchEngine.h:306
void setIndividualIntensities(const std::vector< double > &indiv_ints)
Set the per-sample intensities of the source feature.
double getObservedMZ() const
Observed m/z of the source feature (Th).
std::vector< double > individual_intensities_
per-sample intensities (ConsensusMap input)
Definition AccurateMassSearchEngine.h:298
double getFoundMass() const
Neutral mass of the matched database compound (Dalton).
double observed_intensity_
intensity of source feature
Definition AccurateMassSearchEngine.h:297
void setMatchingIndex(const Size &idx)
Set the candidate-list index.
void setCalculatedMZ(const double &mz)
Set the theoretical m/z of the matched compound + adduct.
double searched_mass_
neutral mass used to query the database (Da)
Definition AccurateMassSearchEngine.h:292
double theoretical_mz_
theoretical m/z of matched compound + adduct (Th)
Definition AccurateMassSearchEngine.h:291
AccurateMassSearchResult()
Default constructor; all numeric fields zero, strings and vectors empty.
void setObservedMZ(const double &mz)
Set the observed m/z of the source feature.
void setFoundAdduct(const std::string &add)
Set the adduct that produced the match.
double getMZErrorPPM() const
Signed m/z error in ppm: (observed − theoretical) / theoretical · 1e6.
const std::string & getFoundAdduct() const
Adduct that produced the match, in AdductInfo notation (e.g. "M+H;1+", "M-H2O+H;1+").
double observed_rt_
RT of source feature (s)
Definition AccurateMassSearchEngine.h:296
Int getCharge() const
Ion charge under which this match was attempted (positive for cations, negative for anions).
double isotopes_sim_score_
observed vs. theoretical isotope-pattern similarity
Definition AccurateMassSearchEngine.h:307
~AccurateMassSearchResult()
Default destructor.
void setEmpiricalFormula(const std::string &ep)
Set the empirical formula of the matched database compound.
double getQueryMass() const
Neutral mass used to query the database (Dalton), back- calculated from getObservedMZ assuming getFou...
void setMasstraceIntensities(const std::vector< double > &intensities)
Set the underlying feature's per-isotopologue intensities.
AccurateMassSearchResult & operator=(const AccurateMassSearchResult &other)
Copy-assignment operator.
std::vector< std::string > matching_hmdb_ids_
zero or more DB IDs (multiple isobars share a formula)
Definition AccurateMassSearchEngine.h:304
std::string empirical_formula_
empirical formula of matched compound
Definition AccurateMassSearchEngine.h:303
void setCharge(const Int &ch)
Set the ion charge under which this match was attempted.
Size getSourceFeatureIndex() const
Index of the source feature in the input map (back- reference to the raw observation).
void setMatchingHMDBids(const std::vector< std::string > &ids)
Set the matched database identifiers.
const std::vector< std::string > & getMatchingHMDBids() const
All database identifiers (HMDB-style) that share the matched compound's formula and mass; can be empt...
double getObservedRT() const
Retention time of the source feature (seconds).
double getObservedIntensity() const
Intensity of the source feature (consensus-aggregated when the input was a ConsensusMap).
void setSourceFeatureIndex(const Size &idx)
Set the source feature index.
double getIsotopesSimScore() const
Similarity between the observed isotope pattern (from getMasstraceIntensities) and the theoretical pa...
void setQueryMass(const double &mass)
Set the neutral mass used to query the database.
void setMZErrorPPM(const double ppm)
Set the signed m/z error in ppm.
double observed_mz_
Stored information/results of DB query.
Definition AccurateMassSearchEngine.h:290
double db_mass_
neutral mass of the matched database compound (Da)
Definition AccurateMassSearchEngine.h:293
Size source_feature_index_
index in the input feature/peak map
Definition AccurateMassSearchEngine.h:300
std::vector< double > getIndividualIntensities() const
Per-sample intensities of the source feature when the input is a ConsensusMap; empty otherwise.
Size matching_index_
index in the engine's internal candidate list
Definition AccurateMassSearchEngine.h:299
const std::vector< double > & getMasstraceIntensities() const
Per-isotopologue intensities of the underlying feature's mass traces; used as the empirical isotope p...
const std::string & getFormulaString() const
Empirical formula of the matched database compound.
void setFoundMass(const double &mass)
Set the neutral mass of the matched database compound.
void setIsotopesSimScore(const double &score)
Set the isotope-pattern similarity score.
double mz_error_ppm_
signed m/z error (observed − theoretical), ppm
Definition AccurateMassSearchEngine.h:295
void setObservedRT(const double &rt)
Set the retention time of the source feature (seconds).
std::string found_adduct_
adduct in AdductInfo notation (e.g. "M+H;1+")
Definition AccurateMassSearchEngine.h:302
Int charge_
charge assumed for this match (positive for cations)
Definition AccurateMassSearchEngine.h:294
double getCalculatedMZ() const
Theoretical m/z of the matched compound + adduct (Th).
A basic LC-MS feature.
Definition BaseFeature.h:34
A consensus feature spanning multiple LC-MS/MS experiments.
Definition ConsensusFeature.h:45
A container for consensus elements.
Definition ConsensusMap.h:67
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Representation of an empirical formula.
Definition EmpiricalFormula.h:62
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
A container for features.
Definition FeatureMap.h:78
An LC-MS feature.
Definition Feature.h:46
Definition IdentificationData.h:87
Data model of MzTab-M files Please see the MzTab-M specification at https://github....
Definition MzTabM.h:208
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition MzTab.h:455
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
Definition AccurateMassSearchEngine.h:477
bool operator()(T1 const &t1, T2 const &t2) const
Definition AccurateMassSearchEngine.h:489
double asMass(double t) const
Definition AccurateMassSearchEngine.h:483
double asMass(const MappingEntry_ &v) const
Definition AccurateMassSearchEngine.h:478