Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MascotGenericFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Andreas Bertsch, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_MASCOTGENERICFILE_H
36 #define OPENMS_FORMAT_MASCOTGENERICFILE_H
37 
39 #include <OpenMS/SYSTEM/File.h>
43 
44 #include <vector>
45 #include <fstream>
46 
47 #ifdef _OPENMP
48 #include <omp.h>
49 #endif
50 
51 namespace OpenMS
52 {
62  class OPENMS_DLLAPI MascotGenericFile :
63  public ProgressLogger,
64  public DefaultParamHandler
65  {
66 public:
67 
70 
72  virtual ~MascotGenericFile();
73 
75  virtual void updateMembers_();
76 
78  void store(const String& filename, const PeakMap& experiment,
79  bool compact = false);
80 
82  void store(std::ostream& os, const String& filename,
83  const PeakMap& experiment, bool compact = false);
84 
92  template <typename MapType>
93  void load(const String& filename, MapType& exp)
94  {
95  if (!File::exists(filename))
96  {
97  throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
98  }
99 
100  exp.reset();
101 
102  std::ifstream is(filename.c_str());
103  // get size of file
104  is.seekg(0, std::ios::end);
105  startProgress(0, is.tellg(), "loading MGF");
106  is.seekg(0, std::ios::beg);
107 
108  UInt spectrum_number(0);
109  Size line_number(0); // carry line number for error messages within getNextSpectrum()
110 
111  typename MapType::SpectrumType spectrum;
112  spectrum.setMSLevel(2);
113  spectrum.getPrecursors().resize(1);
114  while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
115  {
116  exp.addSpectrum(spectrum);
117  setProgress(is.tellg());
118  ++spectrum_number;
119  } // next spectrum
120 
121 
122  endProgress();
123  }
124 
132  std::pair<String, String> getHTTPPeakListEnclosure(const String& filename) const;
133 
134 protected:
135 
138 
140  std::map<String, String> mod_group_map_;
141 
143  void writeParameterHeader_(const String& name, std::ostream& os);
144 
146  void writeModifications_(const std::vector<String>& mods, std::ostream& os,
147  bool variable_mods = false);
148 
150  void writeHeader_(std::ostream& os);
151 
153  void writeSpectrum_(std::ostream& os, const PeakSpectrum& spec, const String& filename);
154 
156  void writeMSExperiment_(std::ostream& os, const String& filename, const PeakMap& experiment);
157 
159  template <typename SpectrumType>
160  bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
161  {
162  spectrum.resize(0);
163 
164  spectrum.setNativeID(String("index=") + (spectrum_number));
165  if (spectrum.metaValueExists("TITLE"))
166  {
167  spectrum.removeMetaValue("TITLE");
168  }
169  typename SpectrumType::PeakType p;
170 
171  String line;
172  // seek to next peak list block
173  while (getline(is, line, '\n'))
174  {
175  ++line_number;
176 
177  line.trim(); // remove whitespaces, line-endings etc
178 
179  // found peak list block?
180  if (line == "BEGIN IONS")
181  {
182  while (getline(is, line, '\n'))
183  {
184  ++line_number;
185  line.trim(); // remove whitespaces, line-endings etc
186 
187  if (line.empty()) continue;
188 
189  if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
190  {
191  std::vector<String> split;
192  do
193  {
194  if (line.empty())
195  {
196  continue;
197  }
198 
199  line.simplify(); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
200  line.substitute('\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
201  if (line.split(' ', split, false))
202  {
203  try
204  {
205  p.setPosition(split[0].toDouble());
206  p.setIntensity(split[1].toDouble());
207  }
208  catch (Exception::ConversionError& /*e*/)
209  {
210  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
211  }
212  spectrum.push_back(p);
213  }
214  else
215  {
216  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
217  }
218  }
219  while (getline(is, line, '\n') && ++line_number && line.trim() != "END IONS"); // line.trim() is important here!
220 
221  if (line == "END IONS")
222  {
223  return true; // found end of spectrum
224  }
225  else
226  {
227  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Reached end of file. Found \"BEGIN IONS\" but not the corresponding \"END IONS\"!", "");
228  }
229  }
230  else if (line.hasPrefix("PEPMASS")) // parse precursor position
231  {
232  String tmp = line.substr(8); // copy since we might need the original line for error reporting later
233  tmp.substitute('\t', ' ');
234  std::vector<String> split;
235  tmp.split(' ', split);
236  if (split.size() == 1)
237  {
238  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
239  }
240  else if (split.size() == 2)
241  {
242  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
243  spectrum.getPrecursors()[0].setIntensity(split[1].trim().toDouble());
244  }
245  else
246  {
247  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + String(line_number) + " (expected 1 or 2 entries, but " + String(split.size()) + " were present)!", "");
248  }
249  }
250  else if (line.hasPrefix("CHARGE"))
251  {
252  String tmp = line.substr(7);
253  tmp.remove('+');
254  spectrum.getPrecursors()[0].setCharge(tmp.toInt());
255  }
256  else if (line.hasPrefix("RTINSECONDS"))
257  {
258  String tmp = line.substr(12);
259  spectrum.setRT(tmp.toDouble());
260  }
261  else if (line.hasPrefix("TITLE"))
262  {
263  // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
264  if (line.hasSubstring("min"))
265  {
266  try
267  {
268  std::vector<String> split;
269  line.split(',', split);
270  if (!split.empty())
271  {
272  for (Size i = 0; i != split.size(); ++i)
273  {
274  if (split[i].hasSubstring("min"))
275  {
276  std::vector<String> split2;
277  split[i].trim().split(' ', split2);
278  if (!split2.empty())
279  {
280  spectrum.setRT(split2[0].trim().toDouble() * 60.0);
281  }
282  }
283  }
284  }
285  }
286  catch (Exception::BaseException& /*e*/)
287  {
288  // just do nothing and write the whole title to spec
289  std::vector<String> split;
290  if (line.split('=', split))
291  {
292  if (split[1] != "") spectrum.setMetaValue("TITLE", split[1]);
293  }
294  }
295  }
296  else // just write the title as metainfo to the spectrum
297  {
298  std::vector<String> split;
299  line.split('=', split);
300  if (split.size() == 2)
301  {
302  if (split[1] != "") spectrum.setMetaValue("TITLE", split[1]);
303  }
304  // TODO concatenate the other parts if the title contains additional '=' chars
305  }
306  }
307  }
308  }
309  }
310 
311  return false; // found end of file
312  }
313 
314  };
315 
316 } // namespace OpenMS
317 
318 #endif // OPENMS_FORMAT_MASCOTGENERICFILE_H
String & simplify()
merges subsequent whitespaces to one blank character
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between &#39;BEGIN IONS&#39; and &#39;END IONS&#39; of a MGF file ...
Definition: MascotGenericFile.h:160
A more convenient string class.
Definition: String.h:57
void reset()
Resets all internal values.
Definition: MSExperiment.h:709
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Definition: MSExperiment.h:831
unsigned int UInt
Unsigned integer type.
Definition: Types.h:95
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
File not found exception.
Definition: Exception.h:524
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:111
Read/write Mascot generic files (MGF).
Definition: MascotGenericFile.h:62
String & remove(char what)
Remove all occurrences of the character what.
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
std::map< String, String > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e...
Definition: MascotGenericFile.h:140
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
static bool exists(const String &file)
Method used to test if a file exists.
Int toInt() const
Conversion to int.
double toDouble() const
Conversion to double.
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
void setPosition(PositionType const &position)
Mutable access to the position.
Definition: Peak1D.h:150
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
void setMSLevel(UInt ms_level)
Sets the MS level.
Exception base class.
Definition: Exception.h:90
Invalid conversion exception.
Definition: Exception.h:363
void setRT(double rt)
Sets the absolute retention time (in seconds)
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void load(const String &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition: MascotGenericFile.h:93
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)...
Definition: MascotGenericFile.h:137
Parse Error exception.
Definition: Exception.h:623

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:01 using doxygen 1.8.13