Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SwathFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_SWATHFILE_H
36 #define OPENMS_FORMAT_SWATHFILE_H
37 
38 // Datastructures
42 
45 
46 #include <OpenMS/FORMAT/MzMLFile.h>
48 #ifdef OPENMS_FORMAT_SWATHFILE_MZXMLSUPPORT
50 #endif
51 
53 
54 namespace OpenMS
55 {
56 
69  class OPENMS_DLLAPI SwathFile :
70  public ProgressLogger
71  {
72 public:
73 
75  std::vector<OpenSwath::SwathMap> loadSplit(StringList file_list, String tmp,
76  boost::shared_ptr<ExperimentalSettings>& exp_meta, String readoptions = "normal")
77  {
78  int progress = 0;
79  startProgress(0, file_list.size(), "Loading data");
80 
81  std::vector<OpenSwath::SwathMap> swath_maps(file_list.size());
82 #ifdef _OPENMP
83 #pragma omp parallel for
84 #endif
85  for (SignedSize i = 0; i < boost::numeric_cast<SignedSize>(file_list.size()); ++i)
86  {
87 
88 #ifdef _OPENMP
89 #pragma omp critical (OPENMS_SwathFile_loadSplit)
90 #endif
91  {
92  std::cout << "Loading file " << i << " with name " << file_list[i] << " using readoptions " << readoptions << std::endl;
93  }
94 
95  String tmp_fname = "openswath_tmpfile_" + String(i) + ".mzML";
96 
97  boost::shared_ptr<PeakMap > exp(new PeakMap);
98  OpenSwath::SpectrumAccessPtr spectra_ptr;
99 
100  // Populate meta-data
101  if (i == 0)
102  {
103  exp_meta = populateMetaData_(file_list[i]);
104  }
105 
106  if (readoptions == "normal")
107  {
108  MzMLFile().load(file_list[i], *exp.get());
110  }
111  else if (readoptions == "cache")
112  {
113  // Cache and load the exp (metadata only) file again
114  spectra_ptr = doCacheFile_(file_list[i], tmp, tmp_fname, exp);
115  }
116  else
117  {
118  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
119  "Unknown option " + readoptions);
120  }
121 
122  OpenSwath::SwathMap swath_map;
123 
124  bool ms1 = false;
125  double upper = -1, lower = -1;
126  if (exp->size() == 0)
127  {
128  std::cerr << "WARNING: File " << file_list[i] << "\n does not have any scans - I will skip it" << std::endl;
129  continue;
130  }
131  if (exp->getSpectra()[0].getPrecursors().size() == 0)
132  {
133  std::cout << "NOTE: File " << file_list[i] << "\n does not have any precursors - I will assume it is the MS1 scan." << std::endl;
134  ms1 = true;
135  }
136  else
137  {
138  // Checks that this is really a SWATH map and extracts upper/lower window
139  OpenSwathHelper::checkSwathMap(*exp.get(), lower, upper);
140  }
141 
142  swath_map.sptr = spectra_ptr;
143  swath_map.lower = lower;
144  swath_map.upper = upper;
145  swath_map.ms1 = ms1;
146 #ifdef _OPENMP
147 #pragma omp critical (OPENMS_SwathFile_loadSplit)
148 #endif
149  {
150  LOG_DEBUG << "Adding Swath file " << file_list[i] << " with " << swath_map.lower << " to " << swath_map.upper << std::endl;
151  swath_maps[i] = swath_map;
152  setProgress(progress++);
153  }
154  }
155  endProgress();
156  return swath_maps;
157  }
158 
160  std::vector<OpenSwath::SwathMap> loadMzML(String file, String tmp,
161  boost::shared_ptr<ExperimentalSettings>& exp_meta, String readoptions = "normal")
162  {
163  std::cout << "Loading mzML file " << file << " using readoptions " << readoptions << std::endl;
164  String tmp_fname = "openswath_tmpfile";
165 
166  startProgress(0, 1, "Loading metadata file " + file);
167  boost::shared_ptr<PeakMap> experiment_metadata = populateMetaData_(file);
168  exp_meta = experiment_metadata;
169 
170  // First pass through the file -> get the meta data
171  std::cout << "Will analyze the metadata first to determine the number of SWATH windows and the window sizes." << std::endl;
172  std::vector<int> swath_counter;
173  int nr_ms1_spectra;
174  std::vector<OpenSwath::SwathMap> known_window_boundaries;
175  countScansInSwath_(experiment_metadata->getSpectra(), swath_counter, nr_ms1_spectra, known_window_boundaries);
176  std::cout << "Determined there to be " << swath_counter.size() <<
177  " SWATH windows and in total " << nr_ms1_spectra << " MS1 spectra" << std::endl;
178  endProgress();
179 
180  FullSwathFileConsumer* dataConsumer;
181  boost::shared_ptr<PeakMap> exp(new PeakMap);
182  startProgress(0, 1, "Loading data file " + file);
183  if (readoptions == "normal")
184  {
185  dataConsumer = new RegularSwathFileConsumer(known_window_boundaries);
186  MzMLFile().transform(file, dataConsumer, *exp.get());
187  }
188  else if (readoptions == "cache")
189  {
190  dataConsumer = new CachedSwathFileConsumer(known_window_boundaries, tmp, tmp_fname, nr_ms1_spectra, swath_counter);
191  MzMLFile().transform(file, dataConsumer, *exp.get());
192  }
193  else if (readoptions == "split")
194  {
195  dataConsumer = new MzMLSwathFileConsumer(known_window_boundaries, tmp, tmp_fname, nr_ms1_spectra, swath_counter);
196  MzMLFile().transform(file, dataConsumer, *exp.get());
197  }
198  else
199  {
200  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
201  "Unknown or unsupported option " + readoptions);
202  }
203  LOG_DEBUG << "Finished parsing Swath file " << std::endl;
204  std::vector<OpenSwath::SwathMap> swath_maps;
205  dataConsumer->retrieveSwathMaps(swath_maps);
206  delete dataConsumer;
207 
208  endProgress();
209  return swath_maps;
210  }
211 
213  std::vector<OpenSwath::SwathMap> loadMzXML(String file, String tmp,
214  boost::shared_ptr<ExperimentalSettings>& exp_meta, String readoptions = "normal")
215  {
216  std::cout << "Loading mzXML file " << file << " using readoptions " << readoptions << std::endl;
217  String tmp_fname = "openswath_tmpfile";
218 
219  startProgress(0, 1, "Loading metadata file " + file);
220  boost::shared_ptr<PeakMap > experiment_metadata(new PeakMap);
221  MzXMLFile f;
223  f.getOptions().setFillData(false);
224  f.load(file, *experiment_metadata);
225  exp_meta = experiment_metadata;
226 
227  // First pass through the file -> get the meta data
228  std::cout << "Will analyze the metadata first to determine the number of SWATH windows and the window sizes." << std::endl;
229  std::vector<int> swath_counter;
230  int nr_ms1_spectra;
231  std::vector<OpenSwath::SwathMap> known_window_boundaries;
232  countScansInSwath_(experiment_metadata->getSpectra(), swath_counter, nr_ms1_spectra, known_window_boundaries);
233  std::cout << "Determined there to be " << swath_counter.size() <<
234  " SWATH windows and in total " << nr_ms1_spectra << " MS1 spectra" << std::endl;
235  endProgress();
236 
237  FullSwathFileConsumer* dataConsumer;
238  boost::shared_ptr<PeakMap > exp(new PeakMap);
239  startProgress(0, 1, "Loading data file " + file);
240  if (readoptions == "normal")
241  {
242  dataConsumer = new RegularSwathFileConsumer(known_window_boundaries);
243  MzXMLFile().transform(file, dataConsumer, *exp.get());
244  }
245  else if (readoptions == "cache")
246  {
247  dataConsumer = new CachedSwathFileConsumer(known_window_boundaries, tmp, tmp_fname, nr_ms1_spectra, swath_counter);
248  MzXMLFile().transform(file, dataConsumer, *exp.get());
249  }
250  else if (readoptions == "split")
251  {
252  dataConsumer = new MzMLSwathFileConsumer(known_window_boundaries, tmp, tmp_fname, nr_ms1_spectra, swath_counter);
253  MzXMLFile().transform(file, dataConsumer, *exp.get());
254  }
255  else
256  {
257  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
258  "Unknown or unsupported option " + readoptions);
259  }
260  LOG_DEBUG << "Finished parsing Swath file " << std::endl;
261  std::vector<OpenSwath::SwathMap> swath_maps;
262  dataConsumer->retrieveSwathMaps(swath_maps);
263  delete dataConsumer;
264 
265  endProgress();
266  return swath_maps;
267  }
268 
269 protected:
270 
273  boost::shared_ptr<PeakMap > experiment_metadata)
274  {
275  String cached_file = tmp + tmp_fname + ".cached";
276  String meta_file = tmp + tmp_fname;
277 
278  // Create new consumer, transform infile, write out metadata
279  MSDataCachedConsumer* cachedConsumer = new MSDataCachedConsumer(cached_file, true);
280  MzMLFile().transform(in, cachedConsumer, *experiment_metadata.get());
281  CachedmzML().writeMetadata(*experiment_metadata.get(), meta_file, true);
282  delete cachedConsumer; // ensure that filestream gets closed
283 
284  boost::shared_ptr<PeakMap > exp(new PeakMap);
285  MzMLFile().load(meta_file, *exp.get());
287  }
288 
290  boost::shared_ptr< PeakMap > populateMetaData_(String file)
291  {
292  boost::shared_ptr<PeakMap > experiment_metadata(new PeakMap);
293  MzMLFile f;
295  f.getOptions().setFillData(false);
296  f.load(file, *experiment_metadata);
297  return experiment_metadata;
298  }
299 
301  void countScansInSwath_(const std::vector<MSSpectrum> exp,
302  std::vector<int>& swath_counter, int& nr_ms1_spectra,
303  std::vector<OpenSwath::SwathMap>& known_window_boundaries)
304  {
305  int ms1_counter = 0;
306  for (Size i = 0; i < exp.size(); i++)
307  {
308  const MSSpectrum& s = exp[i];
309  {
310  if (s.getMSLevel() == 1)
311  {
312  ms1_counter++;
313  }
314  else
315  {
316  if (s.getPrecursors().empty())
317  {
318  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
319  "Found SWATH scan (MS level 2 scan) without a precursor. Cannot determine SWATH window.");
320  }
321  const std::vector<Precursor> prec = s.getPrecursors();
322  double center = prec[0].getMZ();
323  bool found = false;
324  for (Size j = 0; j < known_window_boundaries.size(); j++)
325  {
326  // We group by the precursor mz (center of the window) since this
327  // should be present
328  if (std::fabs(center - known_window_boundaries[j].center) < 1e-6)
329  {
330  found = true;
331  swath_counter[j]++;
332  }
333  }
334  if (!found)
335  {
336  // we found a new SWATH scan
337  swath_counter.push_back(1);
338  double lower = prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset();
339  double upper = prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset();
340  OpenSwath::SwathMap boundary;
341  boundary.lower = lower;
342  boundary.upper = upper;
343  boundary.center = center;
344  known_window_boundaries.push_back(boundary);
345 
346  LOG_DEBUG << "Adding Swath centered at " << center
347  << " m/z with an isolation window of " << lower << " to " << upper
348  << " m/z." << std::endl;
349  }
350  }
351  }
352  }
353  nr_ms1_spectra = ms1_counter;
354 
355  std::cout << "Determined there to be " << swath_counter.size() <<
356  " SWATH windows and in total " << nr_ms1_spectra << " MS1 spectra" << std::endl;
357  }
358 
359  };
360 }
361 
362 #endif
A more convenient string class.
Definition: String.h:57
File adapter for MzXML 3.1 files.
Definition: MzXMLFile.h:53
boost::shared_ptr< ISpectrumAccess > SpectrumAccessPtr
Definition: openswathalgo/include/OpenMS/ANALYSIS/OPENSWATH/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:90
static OpenSwath::SpectrumAccessPtr getSpectrumAccessOpenMSPtr(boost::shared_ptr< OpenMS::PeakMap > exp)
Simple Factory method to get a SpectrumAccess Ptr from an MSExperiment.
std::vector< OpenSwath::SwathMap > loadMzML(String file, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a single mzML file.
Definition: SwathFile.h:160
OpenSwath::SpectrumAccessPtr doCacheFile_(String in, String tmp, String tmp_fname, boost::shared_ptr< PeakMap > experiment_metadata)
Cache a file to disk.
Definition: SwathFile.h:272
On-disk mzML implementation of FullSwathFileConsumer.
Definition: SwathFileConsumer.h:546
void retrieveSwathMaps(std::vector< OpenSwath::SwathMap > &maps)
Populate the vector of swath maps after consuming all spectra.
Definition: SwathFileConsumer.h:152
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:135
Transforming and cached writing consumer of MS data.
Definition: MSDataCachedConsumer.h:55
std::vector< OpenSwath::SwathMap > loadMzXML(String file, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a single mzXML file.
Definition: SwathFile.h:213
boost::shared_ptr< PeakMap > populateMetaData_(String file)
Only read the meta data from a file and use it to populate exp_meta.
Definition: SwathFile.h:290
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
Abstract base class which can consume spectra coming from SWATH experiment stored in a single file...
Definition: SwathFileConsumer.h:101
void load(const String &filename, MapType &map)
Loads a map from a MzXML file.
File adapter for MzML files.
Definition: MzMLFile.h:56
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
A method or algorithm argument contains illegal values.
Definition: Exception.h:649
File adapter for Swath files.
Definition: SwathFile.h:69
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
On-disk cached implementation of FullSwathFileConsumer.
Definition: SwathFileConsumer.h:392
Data structure to hold one SWATH map with information about upper / lower isolation window and whethe...
Definition: SwathMap.h:46
OpenSwath::SpectrumAccessPtr sptr
Definition: SwathMap.h:48
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
In-memory implementation of FullSwathFileConsumer.
Definition: SwathFileConsumer.h:334
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:74
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
std::vector< OpenSwath::SwathMap > loadSplit(StringList file_list, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a list of split mzML files.
Definition: SwathFile.h:75
An class that uses on-disk caching to read and write spectra and chromatograms.
Definition: CachedMzML.h:64
static void checkSwathMap(const OpenMS::PeakMap &swath_map, double &lower, double &upper)
Get the lower / upper offset for this SWATH map and do some sanity checks.
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
void transform(const String &filename_in, Interfaces::IMSDataConsumer *consumer, bool skip_full_count=false)
Transforms a map while loading using the supplied MSDataConsumer.
UInt getMSLevel() const
Returns the MS level.
void countScansInSwath_(const std::vector< MSSpectrum > exp, std::vector< int > &swath_counter, int &nr_ms1_spectra, std::vector< OpenSwath::SwathMap > &known_window_boundaries)
Counts the number of scans in a full Swath file (e.g. concatenated non-split file) ...
Definition: SwathFile.h:301
double center
Definition: SwathMap.h:51
void writeMetadata(MapType exp, String out_meta, bool addCacheMetaValue=false)
Write only the meta data of an MSExperiment.
void setAlwaysAppendData(bool only)
sets whether or not to always append the data to the given map (even if a consumer is given) ...
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
void transform(const String &filename_in, Interfaces::IMSDataConsumer *consumer, bool skip_full_count=false, bool skip_first_pass=false)
Transforms a map while loading using the supplied MSDataConsumer.
double upper
Definition: SwathMap.h:50
void setFillData(bool only)
sets whether to fill the actual data into the container (spectrum/chromatogram)
bool ms1
Definition: SwathMap.h:52
double lower
Definition: SwathMap.h:49

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:04 using doxygen 1.8.13