OpenMS
SpectrumLookup.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 
40 #include <boost/regex.hpp>
41 
42 namespace OpenMS
43 {
67  class OPENMS_DLLAPI SpectrumLookup
68  {
69  public:
70 
72  static const String& default_scan_regexp;
73 
75  std::vector<boost::regex> reference_formats;
76 
78  double rt_tolerance;
79 
82 
84  virtual ~SpectrumLookup();
85 
87  bool empty() const;
88 
102  template <typename SpectrumContainer>
103  void readSpectra(const SpectrumContainer& spectra,
104  const String& scan_regexp = default_scan_regexp)
105  {
106  rts_.clear();
107  ids_.clear();
108  scans_.clear();
109  n_spectra_ = spectra.size();
110  setScanRegExp_(scan_regexp);
111  for (Size i = 0; i < n_spectra_; ++i)
112  {
113  const MSSpectrum& spectrum = spectra[i];
114  const String& native_id = spectrum.getNativeID();
115  Int scan_no = -1;
116  if (!scan_regexp.empty())
117  {
118  scan_no = extractScanNumber(native_id, scan_regexp_, true);
119  if (scan_no < 0)
120  {
121  OPENMS_LOG_WARN << "Warning: Could not extract scan number from spectrum native ID '" + native_id + "' using regular expression '" + scan_regexp + "'. Look-up by scan number may not work properly." << std::endl;
122  }
123  }
124  addEntry_(i, spectrum.getRT(), scan_no, native_id);
125  }
126  }
127 
139  Size findByRT(double rt) const;
140 
150  Size findByNativeID(const String& native_id) const;
151 
162  Size findByIndex(Size index, bool count_from_one = false) const;
163 
173  Size findByScanNumber(Size scan_number) const;
174 
187  Size findByReference(const String& spectrum_ref) const;
188 
198  void addReferenceFormat(const String& regexp);
199 
211  static Int extractScanNumber(const String& native_id,
212  const boost::regex& scan_regexp,
213  bool no_error = false);
214 
215  static Int extractScanNumber(const String& native_id,
216  const String& native_id_type_accession);
222  static std::string getRegExFromNativeID(const String& id);
223 
227  static bool isNativeID(const String& id);
228 
229  protected:
230 
232  static const String& regexp_names_;
233 
235 
236  boost::regex scan_regexp_;
237 
238  std::vector<String> regexp_name_list_;
239 
240  std::map<double, Size> rts_;
241  std::map<String, Size> ids_;
242  std::map<Size, Size> scans_;
243 
252  void addEntry_(Size index, double rt, Int scan_number,
253  const String& native_id);
254 
266  Size findByRegExpMatch_(const String& spectrum_ref, const String& regexp,
267  const boost::smatch& match) const;
268 
274  void setScanRegExp_(const String& scan_regexp);
275 
276  private:
277 
280 
283 
284  };
285 
286 } //namespace OpenMS
287 
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:470
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
double getRT() const
Helper class for looking up spectra based on different attributes.
Definition: SpectrumLookup.h:68
virtual ~SpectrumLookup()
Destructor.
std::map< Size, Size > scans_
Mapping: scan number -> spectrum index.
Definition: SpectrumLookup.h:242
void setScanRegExp_(const String &scan_regexp)
Set the regular expression for extracting scan numbers from spectrum native IDs.
Size findByIndex(Size index, bool count_from_one=false) const
Look up spectrum by index (position in the vector of spectra).
std::map< double, Size > rts_
Mapping: RT -> spectrum index.
Definition: SpectrumLookup.h:240
Size n_spectra_
Number of spectra.
Definition: SpectrumLookup.h:234
bool empty() const
Check if any spectra were set.
boost::regex scan_regexp_
Regular expression to extract scan numbers.
Definition: SpectrumLookup.h:236
Size findByRT(double rt) const
Look up spectrum by retention time (RT).
static const String & default_scan_regexp
Default regular expression for extracting scan numbers from spectrum native IDs.
Definition: SpectrumLookup.h:72
SpectrumLookup(const SpectrumLookup &)
Copy constructor (not implemented)
Size findByScanNumber(Size scan_number) const
Look up spectrum by scan number (extracted from the native ID).
void addReferenceFormat(const String &regexp)
Register a possible format for a spectrum reference.
Size findByReference(const String &spectrum_ref) const
Look up spectrum by reference.
std::vector< boost::regex > reference_formats
Possible formats of spectrum references, defined as regular expressions.
Definition: SpectrumLookup.h:75
Size findByRegExpMatch_(const String &spectrum_ref, const String &regexp, const boost::smatch &match) const
Look up spectrum by regular expression match.
SpectrumLookup()
Constructor.
Size findByNativeID(const String &native_id) const
Look up spectrum by native ID.
double rt_tolerance
Tolerance for look-up by retention time.
Definition: SpectrumLookup.h:78
void addEntry_(Size index, double rt, Int scan_number, const String &native_id)
Add a look-up entry for a spectrum.
std::vector< String > regexp_name_list_
Named groups in vector format.
Definition: SpectrumLookup.h:238
static Int extractScanNumber(const String &native_id, const boost::regex &scan_regexp, bool no_error=false)
Extract the scan number from the native ID of a spectrum.
static Int extractScanNumber(const String &native_id, const String &native_id_type_accession)
static std::string getRegExFromNativeID(const String &id)
Determine the RegEx string to extract scan/index number from native IDs. Can be used for extractScanN...
SpectrumLookup & operator=(const SpectrumLookup &)
Assignment operator (not implemented).
static bool isNativeID(const String &id)
Simple prefix check if a spectrum identifier id is a nativeID from a vendor file.
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp)
Read and index spectra for later look-up.
Definition: SpectrumLookup.h:103
std::map< String, Size > ids_
Mapping: native ID -> spectrum index.
Definition: SpectrumLookup.h:241
static const String & regexp_names_
Named groups recognized in regular expression.
Definition: SpectrumLookup.h:232
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48