OpenMS  2.4.0
MascotGenericFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Andreas Bertsch, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
38 #include <OpenMS/SYSTEM/File.h>
42 
43 #include <vector>
44 #include <fstream>
45 
46 #ifdef _OPENMP
47 #include <omp.h>
48 #endif
49 
50 namespace OpenMS
51 {
61  class OPENMS_DLLAPI MascotGenericFile :
62  public ProgressLogger,
63  public DefaultParamHandler
64  {
65 public:
66 
69 
71  ~MascotGenericFile() override;
72 
74  void updateMembers_() override;
75 
77  void store(const String& filename, const PeakMap& experiment,
78  bool compact = false);
79 
81  void store(std::ostream& os, const String& filename,
82  const PeakMap& experiment, bool compact = false);
83 
91  template <typename MapType>
92  void load(const String& filename, MapType& exp)
93  {
94  if (!File::exists(filename))
95  {
96  throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
97  }
98 
99  exp.reset();
100 
101  std::ifstream is(filename.c_str());
102  // get size of file
103  is.seekg(0, std::ios::end);
104  startProgress(0, is.tellg(), "loading MGF");
105  is.seekg(0, std::ios::beg);
106 
107  UInt spectrum_number(0);
108  Size line_number(0); // carry line number for error messages within getNextSpectrum()
109 
110  typename MapType::SpectrumType spectrum;
111  spectrum.setMSLevel(2);
112  spectrum.getPrecursors().resize(1);
113  while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
114  {
115  exp.addSpectrum(spectrum);
116  setProgress(is.tellg());
117  ++spectrum_number;
118  } // next spectrum
119 
120 
121  endProgress();
122  }
123 
131  std::pair<String, String> getHTTPPeakListEnclosure(const String& filename) const;
132 
133 protected:
134 
137 
139  std::map<String, String> mod_group_map_;
140 
142  void writeParameterHeader_(const String& name, std::ostream& os);
143 
145  void writeModifications_(const std::vector<String>& mods, std::ostream& os,
146  bool variable_mods = false);
147 
149  void writeHeader_(std::ostream& os);
150 
152  void writeSpectrum_(std::ostream& os, const PeakSpectrum& spec, const String& filename, const String& native_id_type_accession);
153 
155  void writeMSExperiment_(std::ostream& os, const String& filename, const PeakMap& experiment);
156 
158  template <typename SpectrumType>
159  bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
160  {
161  spectrum.resize(0);
162 
163  spectrum.setNativeID(String("index=") + (spectrum_number));
164  if (spectrum.metaValueExists("TITLE"))
165  {
166  spectrum.removeMetaValue("TITLE");
167  }
168  typename SpectrumType::PeakType p;
169 
170  String line;
171  // seek to next peak list block
172  while (getline(is, line, '\n'))
173  {
174  ++line_number;
175 
176  line.trim(); // remove whitespaces, line-endings etc
177 
178  // found peak list block?
179  if (line == "BEGIN IONS")
180  {
181  while (getline(is, line, '\n'))
182  {
183  ++line_number;
184  line.trim(); // remove whitespaces, line-endings etc
185 
186  if (line.empty()) continue;
187 
188  if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
189  {
190  std::vector<String> split;
191  do
192  {
193  if (line.empty())
194  {
195  continue;
196  }
197 
198  line.simplify(); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
199  line.substitute('\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
200  if (line.split(' ', split, false))
201  {
202  try
203  {
204  p.setPosition(split[0].toDouble());
205  p.setIntensity(split[1].toDouble());
206  }
207  catch (Exception::ConversionError& /*e*/)
208  {
209  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
210  }
211  spectrum.push_back(p);
212  }
213  else
214  {
215  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
216  }
217  }
218  while (getline(is, line, '\n') && ++line_number && line.trim() != "END IONS"); // line.trim() is important here!
219 
220  if (line == "END IONS")
221  {
222  return true; // found end of spectrum
223  }
224  else
225  {
226  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Reached end of file. Found \"BEGIN IONS\" but not the corresponding \"END IONS\"!", "");
227  }
228  }
229  else if (line.hasPrefix("PEPMASS")) // parse precursor position
230  {
231  String tmp = line.substr(8); // copy since we might need the original line for error reporting later
232  tmp.substitute('\t', ' ');
233  std::vector<String> split;
234  tmp.split(' ', split);
235  if (split.size() == 1)
236  {
237  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
238  }
239  else if (split.size() == 2)
240  {
241  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
242  spectrum.getPrecursors()[0].setIntensity(split[1].trim().toDouble());
243  }
244  else
245  {
246  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + String(line_number) + " (expected 1 or 2 entries, but " + String(split.size()) + " were present)!", "");
247  }
248  }
249  else if (line.hasPrefix("CHARGE"))
250  {
251  String tmp = line.substr(7);
252  tmp.remove('+');
253  spectrum.getPrecursors()[0].setCharge(tmp.toInt());
254  }
255  else if (line.hasPrefix("RTINSECONDS"))
256  {
257  String tmp = line.substr(12);
258  spectrum.setRT(tmp.toDouble());
259  }
260  else if (line.hasPrefix("TITLE"))
261  {
262  // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
263  if (line.hasSubstring("min"))
264  {
265  try
266  {
267  std::vector<String> split;
268  line.split(',', split);
269  if (!split.empty())
270  {
271  for (Size i = 0; i != split.size(); ++i)
272  {
273  if (split[i].hasSubstring("min"))
274  {
275  std::vector<String> split2;
276  split[i].trim().split(' ', split2);
277  if (!split2.empty())
278  {
279  spectrum.setRT(split2[0].trim().toDouble() * 60.0);
280  }
281  }
282  }
283  }
284  }
285  catch (Exception::BaseException& /*e*/)
286  {
287  // just do nothing and write the whole title to spec
288  std::vector<String> split;
289  if (line.split('=', split))
290  {
291  if (split[1] != "") spectrum.setMetaValue("TITLE", split[1]);
292  }
293  }
294  }
295  else // just write the title as metainfo to the spectrum
296  {
297  std::vector<String> split;
298  line.split('=', split);
299  if (split.size() == 2)
300  {
301  if (split[1] != "") spectrum.setMetaValue("TITLE", split[1]);
302  }
303  // TODO concatenate the other parts if the title contains additional '=' chars
304  }
305  }
306  }
307  }
308  }
309 
310  return false; // found end of file
311  }
312 
313  };
314 
315 } // namespace OpenMS
316 
String & simplify()
merges subsequent whitespaces to one blank character
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between &#39;BEGIN IONS&#39; and &#39;END IONS&#39; of a MGF file ...
Definition: MascotGenericFile.h:159
A more convenient string class.
Definition: String.h:57
void reset()
Resets all internal values.
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
File not found exception.
Definition: Exception.h:523
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
Read/write Mascot generic files (MGF).
Definition: MascotGenericFile.h:61
String & remove(char what)
Remove all occurrences of the character what.
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
std::map< String, String > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e...
Definition: MascotGenericFile.h:139
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
static bool exists(const String &file)
Method used to test if a file exists.
Int toInt() const
Conversion to int.
double toDouble() const
Conversion to double.
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
void setPosition(PositionType const &position)
Mutable access to the position.
Definition: Peak1D.h:149
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setMSLevel(UInt ms_level)
Sets the MS level.
Exception base class.
Definition: Exception.h:89
Invalid conversion exception.
Definition: Exception.h:362
void setRT(double rt)
Sets the absolute retention time (in seconds)
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
void load(const String &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition: MascotGenericFile.h:92
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)...
Definition: MascotGenericFile.h:136
Parse Error exception.
Definition: Exception.h:622