OpenMS
Loading...
Searching...
No Matches
AccurateMassSearchEngine.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Erhan Kenar, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
16#include <OpenMS/FORMAT/MzTab.h>
22#include <OpenMS/SYSTEM/File.h>
24
25
26#include <iosfwd>
27#include <vector>
28
29namespace OpenMS
30{
31 class OPENMS_DLLAPI AccurateMassSearchResult
32 {
33 public:
36
39
42
45
47 double getObservedMZ() const;
48
50 void setObservedMZ(const double&);
51
53 double getCalculatedMZ() const;
54
56 void setCalculatedMZ(const double&);
57
59 double getQueryMass() const;
60
62 void setQueryMass(const double&);
63
65 double getFoundMass() const;
66
68 void setFoundMass(const double&);
69
71 Int getCharge() const;
72
74 void setCharge(const Int&);
75
77 double getMZErrorPPM() const;
78
80 void setMZErrorPPM(const double);
81
83 double getObservedRT() const;
84
86 void setObservedRT(const double& rt);
87
89 double getObservedIntensity() const;
90
92 void setObservedIntensity(const double&);
93
95 std::vector<double> getIndividualIntensities() const;
96
98 void setIndividualIntensities(const std::vector<double>&);
99
101 void setMatchingIndex(const Size&);
102
105
106 const String& getFoundAdduct() const;
107 void setFoundAdduct(const String&);
108
109 const String& getFormulaString() const;
111
112 const std::vector<String>& getMatchingHMDBids() const;
113 void setMatchingHMDBids(const std::vector<String>&);
114
116 const std::vector<double>& getMasstraceIntensities() const;
117 void setMasstraceIntensities(const std::vector<double>&);
118
119 double getIsotopesSimScore() const;
120 void setIsotopesSimScore(const double&);
121
122 // debug/output functions
123 friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
124
125private:
130 double db_mass_;
135 std::vector<double> individual_intensities_;
138
141 std::vector<String> matching_hmdb_ids_;
142
143 std::vector<double> mass_trace_intensities_;
145 };
146
147 OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
148
180 class OPENMS_DLLAPI AccurateMassSearchEngine :
181 public DefaultParamHandler,
182 public ProgressLogger
183 {
184public:
185
187 static constexpr char search_engine_identifier[] = "AccurateMassSearchEngine";
188
191
194
200 void queryByMZ(const double& observed_mz, const Int& observed_charge, const String& ion_mode, std::vector<AccurateMassSearchResult>& results, const EmpiricalFormula& observed_adduct = EmpiricalFormula()) const;
201 void queryByFeature(const Feature& feature, const Size& feature_index, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
202 void queryByConsensusFeature(const ConsensusFeature& cfeat, const Size& cf_index, const Size& number_of_maps, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
203
206 void run(FeatureMap&, MzTab&) const;
207
208 void run(FeatureMap&, MzTabM&) const;
209
213 void run(ConsensusMap&, MzTab&) const;
214
216 void init();
217
218protected:
219 void updateMembers_() override;
220
221private:
223
226 template <typename MAPTYPE> String resolveAutoMode_(const MAPTYPE& map) const
227 {
228 String ion_mode_internal;
229 String ion_mode_detect_msg = "";
230 if (map.size() > 0)
231 {
232 if (map[0].metaValueExists("scan_polarity"))
233 {
234 StringList pols = ListUtils::create<String>(String(map[0].getMetaValue("scan_polarity")), ';');
235 if (pols.size() == 1 && !pols[0].empty())
236 {
237 pols[0].toLower();
238 if (pols[0] == "positive" || pols[0] == "negative")
239 {
240 ion_mode_internal = pols[0];
241 OPENMS_LOG_INFO << "Setting auto ion-mode to '" << ion_mode_internal << "' for file " << File::basename(map.getLoadedFilePath()) << std::endl;
242 }
243 else ion_mode_detect_msg = String("Meta value 'scan_polarity' does not contain unknown ion mode") + String(map[0].getMetaValue("scan_polarity"));
244 }
245 else
246 {
247 ion_mode_detect_msg = String("ambiguous ion mode: ") + String(map[0].getMetaValue("scan_polarity"));
248 }
249 }
250 else
251 {
252 ion_mode_detect_msg = String("Meta value 'scan_polarity' not found in (Consensus-)Feature map");
253 }
254 }
255 else
256 { // do nothing, since map is
257 OPENMS_LOG_INFO << "Meta value 'scan_polarity' cannot be determined since (Consensus-)Feature map is empty!" << std::endl;
258 }
259
260 if (!ion_mode_detect_msg.empty())
261 {
262 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Auto ionization mode could not resolve ion mode of data (") + ion_mode_detect_msg + "!");
263 }
264
265 return ion_mode_internal;
266 }
267
270 void parseAdductsFile_(const String& filename, std::vector<AdductInfo>& result);
271 void searchMass_(double neutral_query_mass, double diff_mass, std::pair<Size, Size>& hit_indices) const;
272
274 void annotate_(const std::vector<AccurateMassSearchResult>&, BaseFeature&) const;
275
277 std::vector<AccurateMassSearchResult> extractQueryResults_(const Feature& feature, const Size& feature_index, const String& ion_mode_internal, Size& dummy_count) const;
278
282 const std::vector<AccurateMassSearchResult>& amr,
283 const IdentificationData::InputFileRef& file_ref,
284 const IdentificationData::ScoreTypeRef& mass_error_ppm_score_ref,
285 const IdentificationData::ScoreTypeRef& mass_error_Da_score_ref,
287 BaseFeature& f) const;
288
291 double computeCosineSim_(const std::vector<double>& x, const std::vector<double>& y) const;
292
293 double computeIsotopePatternSimilarity_(const Feature& feat, const EmpiricalFormula& form) const;
294
295 typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable;
296
297 void exportMzTab_(const QueryResultsTable& overall_results, const Size number_of_maps, MzTab& mztab_out, const std::vector<String>& file_locations) const;
298
299 void exportMzTabM_(const FeatureMap& fmap, MzTabM& mztabm_out) const;
300
302 typedef std::vector<std::vector<String> > MassIDMapping;
303 typedef std::map<String, std::vector<String> > HMDBPropsMapping;
304
306 {
307 double mass;
308 std::vector<String> massIDs;
310 };
311 std::vector<MappingEntry_> mass_mappings_;
312
313 struct CompareEntryAndMass_ // defined here to allow for inlining by compiler
314 {
315 double asMass(const MappingEntry_& v) const
316 {
317 return v.mass;
318 }
319
320 double asMass(double t) const
321 {
322 return t;
323 }
324
325 template <typename T1, typename T2>
326 bool operator()(T1 const& t1, T2 const& t2) const
327 {
328 return asMass(t1) < asMass(t2);
329 }
330
331 };
332
334
336
337 bool legacyID_ = true;
338
344
347
350
351 std::vector<AdductInfo> pos_adducts_;
352 std::vector<AdductInfo> neg_adducts_;
353
357
359 };
360
361}
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition LogStream.h:452
An algorithm to search for exact mass matches from a spectrum against a database (e....
Definition AccurateMassSearchEngine.h:183
void run(FeatureMap &, MzTabM &) const
void init()
parse database and adduct files
void parseStructMappingFile_(const StringList &)
std::vector< std::vector< String > > MassIDMapping
private member variables
Definition AccurateMassSearchEngine.h:302
String pos_adducts_fname_
Definition AccurateMassSearchEngine.h:345
bool keep_unidentified_masses_
Definition AccurateMassSearchEngine.h:358
double mass
Definition AccurateMassSearchEngine.h:307
void queryByConsensusFeature(const ConsensusFeature &cfeat, const Size &cf_index, const Size &number_of_maps, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
AccurateMassSearchEngine()
Default constructor.
String mass_error_unit_
Definition AccurateMassSearchEngine.h:341
StringList db_struct_file_
Definition AccurateMassSearchEngine.h:349
void exportMzTabM_(const FeatureMap &fmap, MzTabM &mztabm_out) const
~AccurateMassSearchEngine() override
Default destructor.
void addMatchesToID_(IdentificationData &id, const std::vector< AccurateMassSearchResult > &amr, const IdentificationData::InputFileRef &file_ref, const IdentificationData::ScoreTypeRef &mass_error_ppm_score_ref, const IdentificationData::ScoreTypeRef &mass_error_Da_score_ref, const IdentificationData::ProcessingStepRef &step_ref, BaseFeature &f) const
Add resulting matches to IdentificationData.
double mass_error_value_
parameter stuff
Definition AccurateMassSearchEngine.h:340
void run(ConsensusMap &, MzTab &) const
void searchMass_(double neutral_query_mass, double diff_mass, std::pair< Size, Size > &hit_indices) const
void queryByMZ(const double &observed_mz, const Int &observed_charge, const String &ion_mode, std::vector< AccurateMassSearchResult > &results, const EmpiricalFormula &observed_adduct=EmpiricalFormula()) const
search for a specific observed mass by enumerating all possible adducts and search M+X against databa...
std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
Definition AccurateMassSearchEngine.h:295
void annotate_(const std::vector< AccurateMassSearchResult > &, BaseFeature &) const
Add search results to a Consensus/Feature.
bool iso_similarity_
Definition AccurateMassSearchEngine.h:343
std::vector< AdductInfo > pos_adducts_
Definition AccurateMassSearchEngine.h:351
String neg_adducts_fname_
Definition AccurateMassSearchEngine.h:346
HMDBPropsMapping hmdb_properties_mapping_
Definition AccurateMassSearchEngine.h:333
std::vector< String > massIDs
Definition AccurateMassSearchEngine.h:308
String ion_mode_
Definition AccurateMassSearchEngine.h:342
double computeIsotopePatternSimilarity_(const Feature &feat, const EmpiricalFormula &form) const
void parseMappingFile_(const StringList &)
String database_version_
Definition AccurateMassSearchEngine.h:355
double computeCosineSim_(const std::vector< double > &x, const std::vector< double > &y) const
String database_location_
Definition AccurateMassSearchEngine.h:356
bool is_initialized_
true if init_() was called without any subsequent param changes
Definition AccurateMassSearchEngine.h:335
StringList db_mapping_file_
Definition AccurateMassSearchEngine.h:348
std::vector< AdductInfo > neg_adducts_
Definition AccurateMassSearchEngine.h:352
String database_name_
Definition AccurateMassSearchEngine.h:354
String formula
Definition AccurateMassSearchEngine.h:309
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void run(FeatureMap &, MzTab &) const
String resolveAutoMode_(const MAPTYPE &map) const
private member functions
Definition AccurateMassSearchEngine.h:226
std::map< String, std::vector< String > > HMDBPropsMapping
Definition AccurateMassSearchEngine.h:303
void exportMzTab_(const QueryResultsTable &overall_results, const Size number_of_maps, MzTab &mztab_out, const std::vector< String > &file_locations) const
void queryByFeature(const Feature &feature, const Size &feature_index, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
void parseAdductsFile_(const String &filename, std::vector< AdductInfo > &result)
std::vector< MappingEntry_ > mass_mappings_
Definition AccurateMassSearchEngine.h:311
std::vector< AccurateMassSearchResult > extractQueryResults_(const Feature &feature, const Size &feature_index, const String &ion_mode_internal, Size &dummy_count) const
Extract query results from feature.
Definition AccurateMassSearchEngine.h:306
Definition AccurateMassSearchEngine.h:32
friend std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
std::vector< double > mass_trace_intensities_
Definition AccurateMassSearchEngine.h:143
void setMasstraceIntensities(const std::vector< double > &)
std::vector< String > matching_hmdb_ids_
Definition AccurateMassSearchEngine.h:141
double getObservedMZ() const
get the m/z of the small molecule + adduct
std::vector< double > individual_intensities_
Definition AccurateMassSearchEngine.h:135
double getFoundMass() const
get the mass returned by the query (uncharged small molecule)
void setQueryMass(const double &)
set the mass used to query the database (uncharged small molecule)
double observed_intensity_
Definition AccurateMassSearchEngine.h:134
void setIsotopesSimScore(const double &)
void setSourceFeatureIndex(const Size &)
AccurateMassSearchResult & operator=(const AccurateMassSearchResult &)
assignment operator
double searched_mass_
Definition AccurateMassSearchEngine.h:129
void setFoundMass(const double &)
set the mass returned by the query (uncharged small molecule)
double theoretical_mz_
Definition AccurateMassSearchEngine.h:128
AccurateMassSearchResult()
Default constructor.
double getMZErrorPPM() const
get the error between observed and theoretical m/z in ppm
double observed_rt_
Definition AccurateMassSearchEngine.h:133
Int getCharge() const
get the charge
double isotopes_sim_score_
Definition AccurateMassSearchEngine.h:144
~AccurateMassSearchResult()
Default destructor.
String empirical_formula_
Definition AccurateMassSearchEngine.h:140
void setCharge(const Int &)
set the charge
void setFoundAdduct(const String &)
double getQueryMass() const
get the mass used to query the database (uncharged small molecule)
void setObservedMZ(const double &)
set the m/z of the small molecule + adduct
AccurateMassSearchResult(const AccurateMassSearchResult &)
copy constructor
void setEmpiricalFormula(const String &)
void setMZErrorPPM(const double)
set the error between observed and theoretical m/z in ppm
double getObservedRT() const
get the observed rt
const String & getFoundAdduct() const
double getObservedIntensity() const
get the observed intensity
String found_adduct_
Definition AccurateMassSearchEngine.h:139
const std::vector< String > & getMatchingHMDBids() const
double observed_mz_
Stored information/results of DB query.
Definition AccurateMassSearchEngine.h:127
double db_mass_
Definition AccurateMassSearchEngine.h:130
void setMatchingIndex(const Size &)
Size source_feature_index_
Definition AccurateMassSearchEngine.h:137
std::vector< double > getIndividualIntensities() const
get the observed intensities
Size matching_index_
Definition AccurateMassSearchEngine.h:136
const std::vector< double > & getMasstraceIntensities() const
return trace intensities of the underlying feature;
void setObservedIntensity(const double &)
set the observed intensity
void setMatchingHMDBids(const std::vector< String > &)
double mz_error_ppm_
Definition AccurateMassSearchEngine.h:132
const String & getFormulaString() const
void setObservedRT(const double &rt)
set the observed rt
Int charge_
Definition AccurateMassSearchEngine.h:131
void setCalculatedMZ(const double &)
set the theoretical m/z of the small molecule + adduct
void setIndividualIntensities(const std::vector< double > &)
set the observed intensities
double getCalculatedMZ() const
get the theoretical m/z of the small molecule + adduct
A basic LC-MS feature.
Definition BaseFeature.h:34
A consensus feature spanning multiple LC-MS/MS experiments.
Definition ConsensusFeature.h:45
A container for consensus elements.
Definition ConsensusMap.h:68
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Representation of an empirical formula.
Definition EmpiricalFormula.h:63
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
A container for features.
Definition FeatureMap.h:82
An LC-MS feature.
Definition Feature.h:46
Definition IdentificationData.h:87
Data model of MzTab-M files Please see the MzTab-M specification at https://github....
Definition MzTabM.h:208
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition MzTab.h:455
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
Definition AccurateMassSearchEngine.h:314
bool operator()(T1 const &t1, T2 const &t2) const
Definition AccurateMassSearchEngine.h:326
double asMass(double t) const
Definition AccurateMassSearchEngine.h:320
double asMass(const MappingEntry_ &v) const
Definition AccurateMassSearchEngine.h:315