Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
AccurateMassSearchEngine.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Erhan Kenar, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_ID_ACCURATEMASSSEARCHENGINE_H
36 #define OPENMS_ANALYSIS_ID_ACCURATEMASSSEARCHENGINE_H
37 
39 #include <OpenMS/KERNEL/Feature.h>
43 #include <OpenMS/FORMAT/MzTab.h>
48 #include <OpenMS/SYSTEM/File.h>
50 
51 #include <iosfwd>
52 #include <vector>
53 
54 namespace OpenMS
55 {
56  class EmpiricalFormula;
57 
58  class OPENMS_DLLAPI AdductInfo
59  {
60 
61  public:
72  AdductInfo(const String& name, const EmpiricalFormula& adduct, int charge, UInt mol_multiplier = 1);
73 
75  double getNeutralMass(double observed_mz) const;
76 
78  double getMZ(double neutral_mass) const;
79 
82  bool isCompatible(EmpiricalFormula db_entry) const;
83 
85  int getCharge() const;
86 
88  const String& getName() const;
89 
93  static AdductInfo parseAdductString(const String& adduct);
94 
95  private:
97  AdductInfo();
98 
100  String name_; //< arbitrary name, only used for error reporting
101  EmpiricalFormula ef_; //< EF for the actual adduct e.g. 'H' in 2M+H;+1
102  double mass_; //< computed from ef_.getMonoWeight(), but stored explicitly for efficiency
103  int charge_; //< negative or positive charge; must not be 0
104  UInt mol_multiplier_; //< Mol multiplier, e.g. 2 in 2M+H;+1
105  };
106 
107  class OPENMS_DLLAPI AccurateMassSearchResult
108  {
109  public:
112 
115 
118 
121 
123  double getObservedMZ() const;
124 
126  void setObservedMZ(const double&);
127 
129  double getCalculatedMZ() const;
130 
132  void setCalculatedMZ(const double&);
133 
135  double getQueryMass() const;
136 
138  void setQueryMass(const double&);
139 
141  double getFoundMass() const;
142 
144  void setFoundMass(const double&);
145 
147  Int getCharge() const;
148 
150  void setCharge(const Int&);
151 
153  double getMZErrorPPM() const;
154 
156  void setMZErrorPPM(const double);
157 
159  double getObservedRT() const;
160 
162  void setObservedRT(const double& rt);
163 
165  double getObservedIntensity() const;
166 
168  void setObservedIntensity(const double&);
169 
171  std::vector<double> getIndividualIntensities() const;
172 
174  void setIndividualIntensities(const std::vector<double>&);
175 
176  Size getMatchingIndex() const;
177  void setMatchingIndex(const Size&);
178 
179  Size getSourceFeatureIndex() const;
180  void setSourceFeatureIndex(const Size&);
181 
182  const String& getFoundAdduct() const;
183  void setFoundAdduct(const String&);
184 
185  const String& getFormulaString() const;
186  void setEmpiricalFormula(const String&);
187 
188  const std::vector<String>& getMatchingHMDBids() const;
189  void setMatchingHMDBids(const std::vector<String>&);
190 
192  const std::vector<double>& getMasstraceIntensities() const;
193  void setMasstraceIntensities(const std::vector<double>&);
194 
195  double getIsotopesSimScore() const;
196  void setIsotopesSimScore(const double&);
197 
198  // debug/output functions
199  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
200 
201 private:
203  double observed_mz_;
206  double db_mass_;
209  double observed_rt_;
211  std::vector<double> individual_intensities_;
214 
217  std::vector<String> matching_hmdb_ids_;
218 
219  std::vector<double> mass_trace_intensities_;
221  };
222 
223  OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
224 
256  class OPENMS_DLLAPI AccurateMassSearchEngine :
257  public DefaultParamHandler,
258  public ProgressLogger
259  {
260 public:
263 
265  virtual ~AccurateMassSearchEngine();
266 
271  void queryByMZ(const double& observed_mz, const Int& observed_charge, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
272  void queryByFeature(const Feature& feature, const Size& feature_index, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
273  void queryByConsensusFeature(const ConsensusFeature& cfeat, const Size& cf_index, const Size& number_of_maps, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
274 
277  void run(FeatureMap&, MzTab&) const;
278 
282  void run(ConsensusMap&, MzTab&) const;
283 
285  void init();
286 
287 protected:
288  virtual void updateMembers_();
289 
290 private:
292 
295  template <typename MAPTYPE> String resolveAutoMode_(const MAPTYPE& map) const
296  {
297  String ion_mode_internal;
298  String ion_mode_detect_msg = "";
299  if (map.size() > 0)
300  {
301  if (map[0].metaValueExists("scan_polarity"))
302  {
303  StringList pols = ListUtils::create<String>(String(map[0].getMetaValue("scan_polarity")), ';');
304  if (pols.size() == 1 && pols[0].size() > 0)
305  {
306  pols[0].toLower();
307  if (pols[0] == "positive" || pols[0] == "negative")
308  {
309  ion_mode_internal = pols[0];
310  LOG_INFO << "Setting auto ion-mode to '" << ion_mode_internal << "' for file " << File::basename(map.getLoadedFilePath()) << std::endl;
311  }
312  else ion_mode_detect_msg = String("Meta value 'scan_polarity' does not contain unknown ion mode") + String(map[0].getMetaValue("scan_polarity"));
313  }
314  else
315  {
316  ion_mode_detect_msg = String("ambiguous ion mode: ") + String(map[0].getMetaValue("scan_polarity"));
317  }
318  }
319  else
320  {
321  ion_mode_detect_msg = String("Meta value 'scan_polarity' not found in (Consensus-)Feature map");
322  }
323  }
324  else
325  { // do nothing, since map is
326  LOG_INFO << "Meta value 'scan_polarity' cannot be determined since (Consensus-)Feature map is empty!" << std::endl;
327  }
328 
329  if (ion_mode_detect_msg.size() > 0)
330  {
331  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Auto ionization mode could not resolve ion mode of data (") + ion_mode_detect_msg + "!");
332  }
333 
334  return ion_mode_internal;
335  }
336 
337  void parseMappingFile_(const StringList&);
338  void parseStructMappingFile_(const StringList&);
339  void parseAdductsFile_(const String& filename, std::vector<AdductInfo>& result);
340  void searchMass_(double neutral_query_mass, double diff_mass, std::pair<Size, Size>& hit_indices) const;
341 
343  void annotate_(const std::vector<AccurateMassSearchResult>&, BaseFeature&) const;
344 
347  double computeCosineSim_(const std::vector<double>& x, const std::vector<double>& y) const;
348 
349  double computeIsotopePatternSimilarity_(const Feature& feat, const EmpiricalFormula& form) const;
350 
351  typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable;
352 
353  void exportMzTab_(const QueryResultsTable& overall_results, const Size number_of_maps, MzTab& mztab_out) const;
354 
356  typedef std::vector<std::vector<String> > MassIDMapping;
357  typedef std::map<String, std::vector<String> > HMDBPropsMapping;
358 
360  {
361  double mass;
362  std::vector<String> massIDs;
364  };
365  std::vector<MappingEntry_> mass_mappings_;
366 
367  struct CompareEntryAndMass_ // defined here to allow for inlining by compiler
368  {
369  double asMass(const MappingEntry_& v) const
370  {
371  return v.mass;
372  }
373 
374  double asMass(double t) const
375  {
376  return t;
377  }
378 
379  template <typename T1, typename T2>
380  bool operator()(T1 const& t1, T2 const& t2) const
381  {
382  return asMass(t1) < asMass(t2);
383  }
384 
385  };
386 
387  HMDBPropsMapping hmdb_properties_mapping_;
388 
389  bool is_initialized_; //< true if init_() was called without any subsequent param changes
390 
396 
399 
402 
403  std::vector<AdductInfo> pos_adducts_;
404  std::vector<AdductInfo> neg_adducts_;
405 
408 
410  };
411 
412 }
413 
414 #endif // OPENMS_ANALYSIS_ID_ACCURATEMASSSEARCHENGINE_H
String name_
members
Definition: AccurateMassSearchEngine.h:100
String formula
Definition: AccurateMassSearchEngine.h:363
std::vector< AdductInfo > pos_adducts_
Definition: AccurateMassSearchEngine.h:403
String ion_mode_
Definition: AccurateMassSearchEngine.h:394
String mass_error_unit_
Definition: AccurateMassSearchEngine.h:393
bool operator()(T1 const &t1, T2 const &t2) const
Definition: AccurateMassSearchEngine.h:380
Definition: AccurateMassSearchEngine.h:58
double searched_mass_
Definition: AccurateMassSearchEngine.h:205
A more convenient string class.
Definition: String.h:57
bool iso_similarity_
Definition: AccurateMassSearchEngine.h:395
bool keep_unidentified_masses_
Definition: AccurateMassSearchEngine.h:409
Definition: AccurateMassSearchEngine.h:367
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:455
int charge_
Definition: AccurateMassSearchEngine.h:103
String empirical_formula_
Definition: AccurateMassSearchEngine.h:216
double db_mass_
Definition: AccurateMassSearchEngine.h:206
std::vector< std::vector< String > > MassIDMapping
private member variables
Definition: AccurateMassSearchEngine.h:356
A container for features.
Definition: FeatureMap.h:94
unsigned int UInt
Unsigned integer type.
Definition: Types.h:95
A container for consensus elements.
Definition: ConsensusMap.h:72
String database_name_
Definition: AccurateMassSearchEngine.h:406
std::vector< double > individual_intensities_
Definition: AccurateMassSearchEngine.h:211
double theoretical_mz_
Definition: AccurateMassSearchEngine.h:204
StringList db_mapping_file_
Definition: AccurateMassSearchEngine.h:400
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Definition: AccurateMassSearchEngine.h:359
std::vector< String > massIDs
Definition: AccurateMassSearchEngine.h:362
A basic LC-MS feature.
Definition: BaseFeature.h:56
std::vector< String > matching_hmdb_ids_
Definition: AccurateMassSearchEngine.h:217
String found_adduct_
Definition: AccurateMassSearchEngine.h:215
double observed_rt_
Definition: AccurateMassSearchEngine.h:209
UInt mol_multiplier_
Definition: AccurateMassSearchEngine.h:104
Definition: AccurateMassSearchEngine.h:107
HMDBPropsMapping hmdb_properties_mapping_
Definition: AccurateMassSearchEngine.h:387
std::vector< AdductInfo > neg_adducts_
Definition: AccurateMassSearchEngine.h:404
Representation of an empirical formula.
Definition: EmpiricalFormula.h:80
String resolveAutoMode_(const MAPTYPE &map) const
private member functions
Definition: AccurateMassSearchEngine.h:295
double mass
Definition: AccurateMassSearchEngine.h:361
String pos_adducts_fname_
Definition: AccurateMassSearchEngine.h:397
std::map< String, std::vector< String > > HMDBPropsMapping
Definition: AccurateMassSearchEngine.h:357
Int charge_
Definition: AccurateMassSearchEngine.h:207
String & toLower()
Converts the string to lowercase.
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
Definition: AccurateMassSearchEngine.h:351
double observed_mz_
Stored information/results of DB query.
Definition: AccurateMassSearchEngine.h:203
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
An LC-MS feature.
Definition: Feature.h:70
double mz_error_ppm_
Definition: AccurateMassSearchEngine.h:208
static String basename(const String &file)
Returns the basename of the file (without the path).
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:74
double asMass(double t) const
Definition: AccurateMassSearchEngine.h:374
StringList db_struct_file_
Definition: AccurateMassSearchEngine.h:401
double isotopes_sim_score_
Definition: AccurateMassSearchEngine.h:220
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
EmpiricalFormula ef_
Definition: AccurateMassSearchEngine.h:101
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
double mass_error_value_
parameter stuff
Definition: AccurateMassSearchEngine.h:392
double mass_
Definition: AccurateMassSearchEngine.h:102
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
Size source_feature_index_
Definition: AccurateMassSearchEngine.h:213
Size matching_index_
Definition: AccurateMassSearchEngine.h:212
String neg_adducts_fname_
Definition: AccurateMassSearchEngine.h:398
double observed_intensity_
Definition: AccurateMassSearchEngine.h:210
int Int
Signed integer type.
Definition: Types.h:103
std::vector< double > mass_trace_intensities_
Definition: AccurateMassSearchEngine.h:219
double asMass(const MappingEntry_ &v) const
Definition: AccurateMassSearchEngine.h:369
bool is_initialized_
Definition: AccurateMassSearchEngine.h:389
std::vector< MappingEntry_ > mass_mappings_
Definition: AccurateMassSearchEngine.h:365
String database_version_
Definition: AccurateMassSearchEngine.h:407
A 2-dimensional consensus feature.
Definition: ConsensusFeature.h:65
Data model of MzTab files. Please see the official MzTab specification at https://code.google.com/p/mztab/.
Definition: MzTab.h:700
An algorithm to search for exact mass matches from a spectrum against a database (e.g. HMDB).
Definition: AccurateMassSearchEngine.h:256

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:21:59 using doxygen 1.8.13