OpenMS
Loading...
Searching...
No Matches
MascotGenericFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: Andreas Bertsch, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
16#include <OpenMS/SYSTEM/File.h>
18
19#include <vector>
20#include <fstream>
21
22#ifdef _OPENMP
23#include <omp.h>
24#endif
25
26namespace OpenMS
27{
43 class OPENMS_DLLAPI MascotGenericFile :
44 public ProgressLogger,
46 {
47public:
48
51
54
56 void updateMembers_() override;
57
59 void store(const std::string& filename, const PeakMap& experiment,
60 bool compact = false);
61
63 void store(std::ostream& os, const std::string& filename,
64 const PeakMap& experiment, bool compact = false);
65
73 template <typename MapType>
74 void load(const std::string& filename, MapType& exp)
75 {
76 if (!File::exists(filename))
77 {
78 throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
79 }
80
81 exp.reset();
82
83 std::ifstream is(filename.c_str());
84 // get size of file
85 is.seekg(0, std::ios::end);
86 startProgress(0, is.tellg(), "loading MGF");
87 is.seekg(0, std::ios::beg);
88
89 UInt spectrum_number(0);
90 Size line_number(0); // carry line number for error messages within getNextSpectrum()
91
92 typename MapType::SpectrumType spectrum;
93 spectrum.setMSLevel(2);
94 spectrum.getPrecursors().resize(1);
95 spectrum.setType(SpectrumSettings::SpectrumType::CENTROID); // MGF is always centroided, by definition
96 while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
97 {
98 exp.addSpectrum(spectrum);
99 setProgress(is.tellg());
100 ++spectrum_number;
101 } // next spectrum
102 exp.updateRanges();
103 endProgress();
104 }
105
113 std::pair<std::string, std::string> getHTTPPeakListEnclosure(const std::string& filename) const;
114
116 void writeSpectrum(std::ostream& os, const PeakSpectrum& spec, const std::string& filename, const std::string& native_id_type_accession);
117
118protected:
119
122
124 std::map<std::string, std::string> mod_group_map_;
125
127 void writeParameterHeader_(const std::string& name, std::ostream& os);
128
130 void writeModifications_(const std::vector<std::string>& mods, std::ostream& os,
131 bool variable_mods = false);
132
134 void writeHeader_(std::ostream& os);
135
137 void writeMSExperiment_(std::ostream& os, const std::string& filename, const PeakMap& experiment);
138
140 template <typename SpectrumType>
141 bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
142 {
143 spectrum.resize(0);
144 spectrum.setNativeID(std::string("index=") + (spectrum_number));
145
146 if (spectrum.metaValueExists("TITLE"))
147 {
148 spectrum.removeMetaValue("TITLE");
149 }
150 if (spectrum.metaValueExists("SEQ"))
151 {
152 // SEQ is a per-query field; do not let it bleed across spectra
153 spectrum.removeMetaValue("SEQ");
154 }
155 typename SpectrumType::PeakType p;
156
157 std::string line;
158 // seek to next peak list block
159 while (getline(is, line, '\n'))
160 {
161 ++line_number;
162
163 StringUtils::trim(line); // remove whitespaces, line-endings etc
164
165 // found peak list block?
166 if (line == "BEGIN IONS")
167 {
168 while (getline(is, line, '\n'))
169 {
170 ++line_number;
171 StringUtils::trim(line); // remove whitespaces, line-endings etc
172
173 if (line.empty()) continue;
174
175 if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
176 {
177 std::vector<std::string> split;
178 do
179 {
180 if (line.empty())
181 {
182 continue;
183 }
184
185 StringUtils::simplify(line); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
186 StringUtils::substitute(line, '\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
187 if (StringUtils::split(line, ' ', split, false))
188 {
189 try
190 {
191 p.setPosition(StringUtils::toDouble(split[0]));
192 p.setIntensity(StringUtils::toDouble(split[1]));
193 }
194 catch (Exception::ConversionError& /*e*/)
195 {
196 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + StringUtils::toStr(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
197 }
198 spectrum.push_back(p);
199 }
200 else
201 {
202 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + StringUtils::toStr(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
203 }
204 }
205 while (getline(is, line, '\n') && ++line_number && StringUtils::trim(line) != "END IONS"); // StringUtils::trim(line) is important here!
206
207 if (line == "END IONS")
208 {
209 return true; // found end of spectrum
210 }
211 else
212 {
213 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, R"(Reached end of file. Found "BEGIN IONS" but not the corresponding "END IONS"!)", "");
214 }
215 }
216 else if (StringUtils::hasPrefix(line, "PEPMASS")) // parse precursor position
217 {
218 std::string tmp = StringUtils::substr(line, 8); // copy since we might need the original line for error reporting later
219 StringUtils::substitute(tmp, '\t', ' ');
220 std::vector<std::string> split;
221 StringUtils::split(tmp, ' ', split);
222 if (split.size() == 1)
223 {
224 spectrum.getPrecursors()[0].setMZ(StringUtils::toDouble(StringUtils::trim(split[0])));
225 }
226 else if (split.size() == 2)
227 {
228 spectrum.getPrecursors()[0].setMZ(StringUtils::toDouble(StringUtils::trim(split[0])));
229 spectrum.getPrecursors()[0].setIntensity(StringUtils::toDouble(StringUtils::trim(split[1])));
230 }
231 else
232 {
233 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + StringUtils::toStr(line_number) + " (expected 1 or 2 entries, but " + StringUtils::toStr(split.size()) + " were present)!", "");
234 }
235 }
236 else if (StringUtils::hasPrefix(line, "CHARGE"))
237 {
238 std::string tmp = StringUtils::substr(line, 7);
239 StringUtils::remove(tmp, '+');
240 spectrum.getPrecursors()[0].setCharge(StringUtils::toInt32(tmp));
241 }
242 else if (StringUtils::hasPrefix(line, "RTINSECONDS"))
243 {
244 std::string tmp = StringUtils::substr(line, 12);
245 spectrum.setRT(StringUtils::toDouble(tmp));
246 }
247 else if (StringUtils::hasPrefix(line, "TITLE"))
248 {
249 // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
250 if (StringUtils::hasSubstring(line, "min"))
251 {
252 try
253 {
254 std::vector<std::string> split;
255 StringUtils::split(line, ',', split);
256 if (!split.empty())
257 {
258 for (Size i = 0; i != split.size(); ++i)
259 {
260 if (StringUtils::hasSubstring(split[i], "min"))
261 {
262 std::vector<std::string> split2;
263 StringUtils::trim(split[i]);
264 StringUtils::split(split[i], ' ', split2);
265 if (!split2.empty())
266 {
267 StringUtils::trim(split2[0]);
268 spectrum.setRT(StringUtils::toDouble(split2[0]) * 60.0);
269 }
270 }
271 }
272 }
273 }
274 catch (Exception::BaseException& /*e*/)
275 {
276 // just do nothing and write the whole title to spec
277 std::vector<std::string> split;
278 if (StringUtils::split(line, '=', split))
279 {
280 if (!split[1].empty()) spectrum.setMetaValue("TITLE", split[1]);
281 }
282 }
283 }
284 else // just write the title as metainfo to the spectrum and add native ID to make the titles unique
285 {
286 Size firstEqual = line.find('=', 4);
287 if (firstEqual != std::string::npos)
288 {
289 if (StringUtils::hasSubstring(StringUtils::toStr(spectrum.getMetaValue("TITLE")), spectrum.getNativeID()))
290 {
291 spectrum.setMetaValue("TITLE", StringUtils::substr(line, firstEqual + 1));
292 }
293 else
294 {
295 spectrum.setMetaValue("TITLE", StringUtils::substr(line, firstEqual + 1) + "_" + spectrum.getNativeID());
296 }
297 }
298 }
299 }
300 else if (StringUtils::hasPrefix(line, "NAME"))
301 {
302 std::string tmp = StringUtils::substr(line, 5);
303 spectrum.setMetaValue(Constants::UserParam::MSM_METABOLITE_NAME, tmp);
304 }
305 else if (StringUtils::hasPrefix(line, "COMPOUND_NAME"))
306 {
307 std::string tmp = StringUtils::substr(line, 14);
308 spectrum.setMetaValue(Constants::UserParam::MSM_METABOLITE_NAME, tmp);
309 }
310 else if (StringUtils::hasPrefix(line, "INCHI="))
311 {
312 std::string tmp = StringUtils::substr(line, 6);
313 spectrum.setMetaValue(Constants::UserParam::MSM_INCHI_STRING, tmp);
314 }
315 else if (StringUtils::hasPrefix(line, "SMILES"))
316 {
317 std::string tmp = StringUtils::substr(line, 7);
318 spectrum.setMetaValue(Constants::UserParam::MSM_SMILES_STRING, tmp);
319 }
320 else if (StringUtils::hasPrefix(line, "IONMODE"))
321 {
322 std::string tmp = StringUtils::substr(line, 8);
323 spectrum.setMetaValue("IONMODE", tmp);
324 }
325 else if (StringUtils::hasPrefix(line, "MSLEVEL"))
326 {
327 std::string tmp = StringUtils::substr(line, 8);
328 try
329 {
330 int ms_level = std::stoi(tmp);
331 spectrum.setMSLevel(ms_level);
332 }
333 catch (const std::invalid_argument& /*e*/)
334 {
335 // Default to MS2 if parsing fails
336 spectrum.setMSLevel(2);
337 spectrum.setMetaValue("MSLEVEL", "2");
338 }
339 catch (const std::out_of_range& /*e*/)
340 {
341 spectrum.setMSLevel(2);
342 }
343 }
344 else if (StringUtils::hasPrefix(line, "SOURCE_INSTRUMENT"))
345 {
346 std::string tmp = StringUtils::substr(line, 18);
347 spectrum.setMetaValue("SOURCE_INSTRUMENT", tmp);
348 }
349 else if (StringUtils::hasPrefix(line, "ORGANISM"))
350 {
351 std::string tmp = StringUtils::substr(line, 9);
352 spectrum.setMetaValue("ORGANISM", tmp);
353 }
354 else if (StringUtils::hasPrefix(line, "PI"))
355 {
356 std::string tmp = StringUtils::substr(line, 3);
357 spectrum.setMetaValue("PI", tmp);
358 }
359 else if (StringUtils::hasPrefix(line, "DATACOLLECTOR"))
360 {
361 std::string tmp = StringUtils::substr(line, 14);
362 spectrum.setMetaValue("DATACOLLECTOR", tmp);
363 }
364 else if (StringUtils::hasPrefix(line, "LIBRARYQUALITY"))
365 {
366 std::string tmp = StringUtils::substr(line, 15);
367 spectrum.setMetaValue("LIBRARYQUALITY", tmp);
368 }
369 else if (StringUtils::hasPrefix(line, "SPECTRUMID"))
370 {
371 std::string tmp = StringUtils::substr(line, 11);
372 spectrum.setMetaValue("GNPS_Spectrum_ID", tmp);
373 }
374 else if (StringUtils::hasPrefix(line, "SCANS="))
375 {
376 std::string tmp = StringUtils::substr(line, 6);
377 spectrum.setMetaValue("Scan_ID", tmp);
378 }
379 else if (StringUtils::hasPrefix(line, "SEQ="))
380 {
381 // Mascot sequence-query field: peptide sequence in one-letter code.
382 // Per spec, SEQ may appear multiple times per query (each entry is
383 // an independent sequence filter). Always stored as a StringList
384 // under the "SEQ" key for a stable interface regardless of how
385 // many SEQ lines were present.
386 std::string sequence = StringUtils::substr(line, 4);
387 StringList sequences;
388 if (spectrum.metaValueExists("SEQ"))
389 {
390 sequences = spectrum.getMetaValue("SEQ").toStringList();
391 }
392 sequences.push_back(sequence);
393 spectrum.setMetaValue("SEQ", sequences);
394 }
395 }
396 }
397 }
398
399 return false; // found end of file
400 }
401
402 };
403} // namespace OpenMS
StringList toStringList() const
Explicitly convert DataValue to StringList.
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Exception base class.
Definition Exception.h:63
Invalid conversion exception.
Definition Exception.h:331
File not found exception.
Definition Exception.h:475
Parse Error exception.
Definition Exception.h:593
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
void reset()
Clear all internal data (spectra, chromatograms, ranges, metadata)
void updateRanges()
Updates the m/z, intensity, mobility, and retention time ranges of all spectra and chromatograms.
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
void setMSLevel(UInt ms_level)
Sets the MS level.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Read/write Mascot generic files (MGF).
Definition MascotGenericFile.h:46
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)?
Definition MascotGenericFile.h:121
void writeMSExperiment_(std::ostream &os, const std::string &filename, const PeakMap &experiment)
writes the MSExperiment
void writeHeader_(std::ostream &os)
writes the full header
void store(std::ostream &os, const std::string &filename, const PeakMap &experiment, bool compact=false)
store the experiment data in a MascotGenericFile; the output is written to the given stream,...
void writeModifications_(const std::vector< std::string > &mods, std::ostream &os, bool variable_mods=false)
write a list of (fixed or variable) modifications
void store(const std::string &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
void writeParameterHeader_(const std::string &name, std::ostream &os)
writes a parameter header
~MascotGenericFile() override
destructor
void writeSpectrum(std::ostream &os, const PeakSpectrum &spec, const std::string &filename, const std::string &native_id_type_accession)
writes a spectrum in MGF format to an ostream
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between 'BEGIN IONS' and 'END IONS' of a MGF file
Definition MascotGenericFile.h:141
void updateMembers_() override
docu in base class
void load(const std::string &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition MascotGenericFile.h:74
MascotGenericFile()
constructor
std::pair< std::string, std::string > getHTTPPeakListEnclosure(const std::string &filename) const
enclosing Strings of the peak list body for HTTP submission
std::map< std::string, std::string > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e....
Definition MascotGenericFile.h:124
const DataValue & getMetaValue(const std::string &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
void setMetaValue(const std::string &name, const DataValue &value)
Sets the DataValue corresponding to a name.
void removeMetaValue(const std::string &name)
Removes the DataValue corresponding to name if it exists.
bool metaValueExists(const std::string &name) const
Returns whether an entry with the given name exists.
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition Peak1D.h:86
void setPosition(PositionType const &position)
Mutable access to the position.
Definition Peak1D.h:125
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setType(SpectrumType type)
sets the spectrum type
const std::string & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const std::string &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19