OpenMS
AccurateMassSearchEngine.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Erhan Kenar, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
12 #include <OpenMS/KERNEL/Feature.h>
16 #include <OpenMS/FORMAT/MzTab.h>
17 #include <OpenMS/FORMAT/MzTabM.h>
22 #include <OpenMS/SYSTEM/File.h>
24 
25 
26 #include <iosfwd>
27 #include <vector>
28 
29 namespace OpenMS
30 {
31  class OPENMS_DLLAPI AccurateMassSearchResult
32  {
33  public:
36 
39 
42 
45 
47  double getObservedMZ() const;
48 
50  void setObservedMZ(const double&);
51 
53  double getCalculatedMZ() const;
54 
56  void setCalculatedMZ(const double&);
57 
59  double getQueryMass() const;
60 
62  void setQueryMass(const double&);
63 
65  double getFoundMass() const;
66 
68  void setFoundMass(const double&);
69 
71  Int getCharge() const;
72 
74  void setCharge(const Int&);
75 
77  double getMZErrorPPM() const;
78 
80  void setMZErrorPPM(const double);
81 
83  double getObservedRT() const;
84 
86  void setObservedRT(const double& rt);
87 
89  double getObservedIntensity() const;
90 
92  void setObservedIntensity(const double&);
93 
95  std::vector<double> getIndividualIntensities() const;
96 
98  void setIndividualIntensities(const std::vector<double>&);
99 
101  void setMatchingIndex(const Size&);
102 
105 
106  const String& getFoundAdduct() const;
107  void setFoundAdduct(const String&);
108 
109  const String& getFormulaString() const;
111 
112  const std::vector<String>& getMatchingHMDBids() const;
113  void setMatchingHMDBids(const std::vector<String>&);
114 
116  const std::vector<double>& getMasstraceIntensities() const;
117  void setMasstraceIntensities(const std::vector<double>&);
118 
119  double getIsotopesSimScore() const;
120  void setIsotopesSimScore(const double&);
121 
122  // debug/output functions
123  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
124 
125 private:
127  double observed_mz_;
130  double db_mass_;
133  double observed_rt_;
135  std::vector<double> individual_intensities_;
138 
141  std::vector<String> matching_hmdb_ids_;
142 
143  std::vector<double> mass_trace_intensities_;
145  };
146 
147  OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
148 
180  class OPENMS_DLLAPI AccurateMassSearchEngine :
181  public DefaultParamHandler,
182  public ProgressLogger
183  {
184 public:
185 
187  static constexpr char search_engine_identifier[] = "AccurateMassSearchEngine";
188 
191 
194 
200  void queryByMZ(const double& observed_mz, const Int& observed_charge, const String& ion_mode, std::vector<AccurateMassSearchResult>& results, const EmpiricalFormula& observed_adduct = EmpiricalFormula()) const;
201  void queryByFeature(const Feature& feature, const Size& feature_index, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
202  void queryByConsensusFeature(const ConsensusFeature& cfeat, const Size& cf_index, const Size& number_of_maps, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
203 
206  void run(FeatureMap&, MzTab&) const;
207 
208  void run(FeatureMap&, MzTabM&) const;
209 
213  void run(ConsensusMap&, MzTab&) const;
214 
216  void init();
217 
218 protected:
219  void updateMembers_() override;
220 
221 private:
223 
226  template <typename MAPTYPE> String resolveAutoMode_(const MAPTYPE& map) const
227  {
228  String ion_mode_internal;
229  String ion_mode_detect_msg = "";
230  if (map.size() > 0)
231  {
232  if (map[0].metaValueExists("scan_polarity"))
233  {
234  StringList pols = ListUtils::create<String>(String(map[0].getMetaValue("scan_polarity")), ';');
235  if (pols.size() == 1 && !pols[0].empty())
236  {
237  pols[0].toLower();
238  if (pols[0] == "positive" || pols[0] == "negative")
239  {
240  ion_mode_internal = pols[0];
241  OPENMS_LOG_INFO << "Setting auto ion-mode to '" << ion_mode_internal << "' for file " << File::basename(map.getLoadedFilePath()) << std::endl;
242  }
243  else ion_mode_detect_msg = String("Meta value 'scan_polarity' does not contain unknown ion mode") + String(map[0].getMetaValue("scan_polarity"));
244  }
245  else
246  {
247  ion_mode_detect_msg = String("ambiguous ion mode: ") + String(map[0].getMetaValue("scan_polarity"));
248  }
249  }
250  else
251  {
252  ion_mode_detect_msg = String("Meta value 'scan_polarity' not found in (Consensus-)Feature map");
253  }
254  }
255  else
256  { // do nothing, since map is
257  OPENMS_LOG_INFO << "Meta value 'scan_polarity' cannot be determined since (Consensus-)Feature map is empty!" << std::endl;
258  }
259 
260  if (!ion_mode_detect_msg.empty())
261  {
262  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Auto ionization mode could not resolve ion mode of data (") + ion_mode_detect_msg + "!");
263  }
264 
265  return ion_mode_internal;
266  }
267 
270  void parseAdductsFile_(const String& filename, std::vector<AdductInfo>& result);
271  void searchMass_(double neutral_query_mass, double diff_mass, std::pair<Size, Size>& hit_indices) const;
272 
274  void annotate_(const std::vector<AccurateMassSearchResult>&, BaseFeature&) const;
275 
277  std::vector<AccurateMassSearchResult> extractQueryResults_(const Feature& feature, const Size& feature_index, const String& ion_mode_internal, Size& dummy_count) const;
278 
281  IdentificationData& id,
282  const std::vector<AccurateMassSearchResult>& amr,
283  const IdentificationData::InputFileRef& file_ref,
284  const IdentificationData::ScoreTypeRef& mass_error_ppm_score_ref,
285  const IdentificationData::ScoreTypeRef& mass_error_Da_score_ref,
287  BaseFeature& f) const;
288 
291  double computeCosineSim_(const std::vector<double>& x, const std::vector<double>& y) const;
292 
293  double computeIsotopePatternSimilarity_(const Feature& feat, const EmpiricalFormula& form) const;
294 
295  typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable;
296 
297  void exportMzTab_(const QueryResultsTable& overall_results, const Size number_of_maps, MzTab& mztab_out, const std::vector<String>& file_locations) const;
298 
299  void exportMzTabM_(const FeatureMap& fmap, MzTabM& mztabm_out) const;
300 
302  typedef std::vector<std::vector<String> > MassIDMapping;
303  typedef std::map<String, std::vector<String> > HMDBPropsMapping;
304 
306  {
307  double mass;
308  std::vector<String> massIDs;
310  };
311  std::vector<MappingEntry_> mass_mappings_;
312 
313  struct CompareEntryAndMass_ // defined here to allow for inlining by compiler
314  {
315  double asMass(const MappingEntry_& v) const
316  {
317  return v.mass;
318  }
319 
320  double asMass(double t) const
321  {
322  return t;
323  }
324 
325  template <typename T1, typename T2>
326  bool operator()(T1 const& t1, T2 const& t2) const
327  {
328  return asMass(t1) < asMass(t2);
329  }
330 
331  };
332 
334 
336 
337  bool legacyID_ = true;
338 
344 
347 
350 
351  std::vector<AdductInfo> pos_adducts_;
352  std::vector<AdductInfo> neg_adducts_;
353 
357 
359  };
360 
361 }
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:449
An algorithm to search for exact mass matches from a spectrum against a database (e....
Definition: AccurateMassSearchEngine.h:183
void run(FeatureMap &, MzTabM &) const
void init()
parse database and adduct files
void parseStructMappingFile_(const StringList &)
std::vector< std::vector< String > > MassIDMapping
private member variables
Definition: AccurateMassSearchEngine.h:302
String pos_adducts_fname_
Definition: AccurateMassSearchEngine.h:345
bool keep_unidentified_masses_
Definition: AccurateMassSearchEngine.h:358
double mass
Definition: AccurateMassSearchEngine.h:307
void queryByConsensusFeature(const ConsensusFeature &cfeat, const Size &cf_index, const Size &number_of_maps, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
AccurateMassSearchEngine()
Default constructor.
String mass_error_unit_
Definition: AccurateMassSearchEngine.h:341
StringList db_struct_file_
Definition: AccurateMassSearchEngine.h:349
void exportMzTabM_(const FeatureMap &fmap, MzTabM &mztabm_out) const
~AccurateMassSearchEngine() override
Default destructor.
void addMatchesToID_(IdentificationData &id, const std::vector< AccurateMassSearchResult > &amr, const IdentificationData::InputFileRef &file_ref, const IdentificationData::ScoreTypeRef &mass_error_ppm_score_ref, const IdentificationData::ScoreTypeRef &mass_error_Da_score_ref, const IdentificationData::ProcessingStepRef &step_ref, BaseFeature &f) const
Add resulting matches to IdentificationData.
double mass_error_value_
parameter stuff
Definition: AccurateMassSearchEngine.h:340
void run(ConsensusMap &, MzTab &) const
void searchMass_(double neutral_query_mass, double diff_mass, std::pair< Size, Size > &hit_indices) const
void queryByMZ(const double &observed_mz, const Int &observed_charge, const String &ion_mode, std::vector< AccurateMassSearchResult > &results, const EmpiricalFormula &observed_adduct=EmpiricalFormula()) const
search for a specific observed mass by enumerating all possible adducts and search M+X against databa...
std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
Definition: AccurateMassSearchEngine.h:295
void annotate_(const std::vector< AccurateMassSearchResult > &, BaseFeature &) const
Add search results to a Consensus/Feature.
bool iso_similarity_
Definition: AccurateMassSearchEngine.h:343
std::vector< AdductInfo > pos_adducts_
Definition: AccurateMassSearchEngine.h:351
String neg_adducts_fname_
Definition: AccurateMassSearchEngine.h:346
HMDBPropsMapping hmdb_properties_mapping_
Definition: AccurateMassSearchEngine.h:333
std::vector< String > massIDs
Definition: AccurateMassSearchEngine.h:308
String ion_mode_
Definition: AccurateMassSearchEngine.h:342
double computeIsotopePatternSimilarity_(const Feature &feat, const EmpiricalFormula &form) const
void parseMappingFile_(const StringList &)
String database_version_
Definition: AccurateMassSearchEngine.h:355
double computeCosineSim_(const std::vector< double > &x, const std::vector< double > &y) const
String database_location_
Definition: AccurateMassSearchEngine.h:356
bool is_initialized_
true if init_() was called without any subsequent param changes
Definition: AccurateMassSearchEngine.h:335
std::vector< AccurateMassSearchResult > extractQueryResults_(const Feature &feature, const Size &feature_index, const String &ion_mode_internal, Size &dummy_count) const
Extract query results from feature.
StringList db_mapping_file_
Definition: AccurateMassSearchEngine.h:348
std::vector< AdductInfo > neg_adducts_
Definition: AccurateMassSearchEngine.h:352
String database_name_
Definition: AccurateMassSearchEngine.h:354
String formula
Definition: AccurateMassSearchEngine.h:309
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void run(FeatureMap &, MzTab &) const
String resolveAutoMode_(const MAPTYPE &map) const
private member functions
Definition: AccurateMassSearchEngine.h:226
std::map< String, std::vector< String > > HMDBPropsMapping
Definition: AccurateMassSearchEngine.h:303
void exportMzTab_(const QueryResultsTable &overall_results, const Size number_of_maps, MzTab &mztab_out, const std::vector< String > &file_locations) const
void queryByFeature(const Feature &feature, const Size &feature_index, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
void parseAdductsFile_(const String &filename, std::vector< AdductInfo > &result)
std::vector< MappingEntry_ > mass_mappings_
Definition: AccurateMassSearchEngine.h:311
Definition: AccurateMassSearchEngine.h:306
Definition: AccurateMassSearchEngine.h:32
const std::vector< String > & getMatchingHMDBids() const
std::vector< double > mass_trace_intensities_
Definition: AccurateMassSearchEngine.h:143
std::vector< double > getIndividualIntensities() const
get the observed intensities
void setMasstraceIntensities(const std::vector< double > &)
std::vector< String > matching_hmdb_ids_
Definition: AccurateMassSearchEngine.h:141
double getObservedMZ() const
get the m/z of the small molecule + adduct
std::vector< double > individual_intensities_
Definition: AccurateMassSearchEngine.h:135
AccurateMassSearchResult & operator=(const AccurateMassSearchResult &)
assignment operator
double getFoundMass() const
get the mass returned by the query (uncharged small molecule)
void setQueryMass(const double &)
set the mass used to query the database (uncharged small molecule)
const String & getFoundAdduct() const
double observed_intensity_
Definition: AccurateMassSearchEngine.h:134
void setIsotopesSimScore(const double &)
void setSourceFeatureIndex(const Size &)
const String & getFormulaString() const
double searched_mass_
Definition: AccurateMassSearchEngine.h:129
void setFoundMass(const double &)
set the mass returned by the query (uncharged small molecule)
double theoretical_mz_
Definition: AccurateMassSearchEngine.h:128
AccurateMassSearchResult()
Default constructor.
double getMZErrorPPM() const
get the error between observed and theoretical m/z in ppm
double observed_rt_
Definition: AccurateMassSearchEngine.h:133
Int getCharge() const
get the charge
double isotopes_sim_score_
Definition: AccurateMassSearchEngine.h:144
~AccurateMassSearchResult()
Default destructor.
String empirical_formula_
Definition: AccurateMassSearchEngine.h:140
void setCharge(const Int &)
set the charge
friend std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
void setFoundAdduct(const String &)
double getQueryMass() const
get the mass used to query the database (uncharged small molecule)
void setObservedMZ(const double &)
set the m/z of the small molecule + adduct
AccurateMassSearchResult(const AccurateMassSearchResult &)
copy constructor
void setEmpiricalFormula(const String &)
void setMZErrorPPM(const double)
set the error between observed and theoretical m/z in ppm
double getObservedRT() const
get the observed rt
double getObservedIntensity() const
get the observed intensity
String found_adduct_
Definition: AccurateMassSearchEngine.h:139
double observed_mz_
Stored information/results of DB query.
Definition: AccurateMassSearchEngine.h:127
double db_mass_
Definition: AccurateMassSearchEngine.h:130
void setMatchingIndex(const Size &)
Size source_feature_index_
Definition: AccurateMassSearchEngine.h:137
Size matching_index_
Definition: AccurateMassSearchEngine.h:136
void setObservedIntensity(const double &)
set the observed intensity
void setMatchingHMDBids(const std::vector< String > &)
double mz_error_ppm_
Definition: AccurateMassSearchEngine.h:132
const std::vector< double > & getMasstraceIntensities() const
return trace intensities of the underlying feature;
void setObservedRT(const double &rt)
set the observed rt
Int charge_
Definition: AccurateMassSearchEngine.h:131
void setCalculatedMZ(const double &)
set the theoretical m/z of the small molecule + adduct
void setIndividualIntensities(const std::vector< double > &)
set the observed intensities
double getCalculatedMZ() const
get the theoretical m/z of the small molecule + adduct
A basic LC-MS feature.
Definition: BaseFeature.h:33
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:45
A container for consensus elements.
Definition: ConsensusMap.h:66
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Representation of an empirical formula.
Definition: EmpiricalFormula.h:59
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:315
A container for features.
Definition: FeatureMap.h:80
An LC-MS feature.
Definition: Feature.h:46
static String basename(const String &file)
Definition: IdentificationData.h:87
Data model of MzTab-M files Please see the MzTab-M specification at https://github....
Definition: MzTabM.h:208
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:452
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:76
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
Definition: AccurateMassSearchEngine.h:314
bool operator()(T1 const &t1, T2 const &t2) const
Definition: AccurateMassSearchEngine.h:326
double asMass(double t) const
Definition: AccurateMassSearchEngine.h:320
double asMass(const MappingEntry_ &v) const
Definition: AccurateMassSearchEngine.h:315