Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MzMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Marc Sturm $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_HANDLERS_MZMLHANDLER_H
36 #define OPENMS_FORMAT_HANDLERS_MZMLHANDLER_H
37 
41 
43 
46 
48 
53 #include <OpenMS/FORMAT/Base64.h>
58 #include <OpenMS/CONCEPT/Helpers.h>
59 
60 #include <OpenMS/SYSTEM/File.h>
61 
62 #include <sstream>
63 #include <boost/shared_ptr.hpp>
64 #include <iostream>
65 
66 #include <QRegExp>
67 
68 //MISSING:
69 // - more than one selected ion per precursor (warning if more than one)
70 // - scanWindowList for each acquisition separately (currently for the whole spectrum only)
71 // - instrumentConfigurationRef attribute for scan (why should the instrument change between scans? - warning if used)
72 // - scanSettingsRef attribute for instrumentConfiguration tag (currently no information there because of missing mapping file entry - warning if used)
73 
74 // xs:id/xs:idref prefix list
75 // - sf_ru : sourceFile (run)
76 // - sf_sp : sourceFile (spectrum)
77 // - sf_pr : sourceFile (precursor)
78 // - sf_ac : sourceFile (acquisition)
79 // - sa : sample
80 // - ic : instrumentConfiguration
81 // - so_dp : software (data processing)
82 // - so_in : software (instrument)
83 // - dp_sp : dataProcessing (spectrum)
84 // - dp_bi : dataProcessing (binary data array)
85 // - dp_ch : dataProcessing (chromatogram)
86 
87 namespace OpenMS
88 {
89  class ControlledVocabulary;
90  namespace Internal
91  {
92 
110  typedef PeakMap MapType;
111  typedef MSSpectrum SpectrumType;
112  typedef MSChromatogram ChromatogramType;
113 
114  class OPENMS_DLLAPI MzMLHandler :
115  public XMLHandler
116  {
117 public:
120 
122  MzMLHandler(MapType& exp, const String& filename, const String& version, ProgressLogger& logger);
123 
125  MzMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
126 
128  virtual ~MzMLHandler();
130 
133 
134  // Docu in base class
135  virtual void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname);
136 
137  // Docu in base class
138  virtual void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes);
139 
140  // Docu in base class
141  virtual void characters(const XMLCh* const chars, const XMLSize_t length);
142 
143  //Docu in base class
144  virtual void writeTo(std::ostream& os);
145 
147 
158 
160  void setOptions(const PeakFileOptions& opt)
161  {
162  options_ = opt;
163  spectrum_data_.reserve(options_.getMaxDataPoolSize());
164  chromatogram_data_.reserve(options_.getMaxDataPoolSize());
165  }
166 
169  {
170  return options_;
171  }
172 
174 
176  void getCounts(Size& spectra_counts, Size& chromatogram_counts)
177  {
178  spectra_counts = scan_count;
179  chromatogram_counts = chromatogram_count;
180  }
181 
184  {
185  consumer_ = consumer;
186  }
187 
188 protected:
189 
198 
200 
201  void writeSpectrum_(std::ostream& os, const SpectrumType& spec, Size s,
202  Internal::MzMLValidator& validator, bool renew_native_ids,
203  std::vector<std::vector< ConstDataProcessingPtr > >& dps);
204 
205  void writeChromatogram_(std::ostream& os, const ChromatogramType& chromatogram, Size c, Internal::MzMLValidator& validator);
206 
207  template <typename ContainerT>
208  void writeContainerData(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, String array_type);
209 
216  void populateSpectraWithData();
217 
224  void populateChromatogramsWithData();
225 
226  void addSpectrumMetaData_(const std::vector<MzMLHandlerHelper::BinaryData>& input_data,
227  const Size n, SpectrumType& spectrum) const;
228 
239  void populateSpectraWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
240  Size& default_arr_length, const PeakFileOptions& peak_file_options,
241  SpectrumType& spectrum);
242 
250  void populateChromatogramsWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
251  Size& default_arr_length, const PeakFileOptions& peak_file_options,
252  ChromatogramType& inp_chromatogram);
253 
254  template <typename DataType>
255  void writeBinaryDataArray(std::ostream& os, const PeakFileOptions& pf_options_, std::vector<DataType> data_to_encode, bool is32bit, String array_type);
256 
257  void writeHeader_(std::ostream& os, const MapType& exp, std::vector<std::vector< ConstDataProcessingPtr > >& dps, Internal::MzMLValidator& validator);
258 
260  MapType* exp_;
262  const MapType* cexp_;
263 
266 
269  SpectrumType spec_;
272  ChromatogramType chromatogram_;
274  std::vector<BinaryData> data_;
295 
304  {
305  std::vector<BinaryData> data;
307  SpectrumType spectrum;
308  bool skip_data;
309  };
310 
312  std::vector<SpectrumData> spectrum_data_;
313 
322  {
323  std::vector<BinaryData> data;
325  ChromatogramType chromatogram;
326  };
327 
329  std::vector<ChromatogramData> chromatogram_data_;
330 
332 
334  std::vector<std::pair<std::string, long> > spectra_offsets;
335  std::vector<std::pair<std::string, long> > chromatograms_offsets;
337 
340 
343 
346 
350 
354 
355  // Remember whether the RT of the spectrum was set or not
356  bool rt_set_;
357 
361  //~ Internal::MzMLValidator validator_;
362 
365 
366  /*
368  void fillData_();
369  */
370 
372  void fillChromatogramData_();
373 
375  void handleCVParam_(const String& parent_parent_tag, const String& parent_tag, /* const String & cvref, */ const String& accession, const String& name, const String& value, const String& unit_accession = "");
376 
378  void handleUserParam_(const String& parent_parent_tag, const String& parent_tag, const String& name, const String& type, const String& value);
379 
381  void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, UInt indent, String path, Internal::MzMLValidator& validator) const;
382 
384  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
385 
387  void writeSoftware_(std::ostream& os, const String& id, const Software& software, Internal::MzMLValidator& validator);
388 
390  void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software, Internal::MzMLValidator& validator);
391 
393  void writeDataProcessing_(std::ostream& os, const String& id, const std::vector< ConstDataProcessingPtr >& dps, Internal::MzMLValidator& validator);
394 
396  void writePrecursor_(std::ostream& os, const Precursor& precursor, Internal::MzMLValidator& validator);
397 
399  void writeProduct_(std::ostream& os, const Product& product, Internal::MzMLValidator& validator);
400 
402  String writeCV_(const ControlledVocabulary::CVTerm& c, const DataValue& metaValue) const;
403 
405  bool validateCV_(const ControlledVocabulary::CVTerm& c, const String& path, const Internal::MzMLValidator& validator) const;
406  };
407 
408  //--------------------------------------------------------------------------------
409 
410  } // namespace Internal
411 } // namespace OpenMS
412 
413 #endif
PeakFileOptions options_
Options that can be set for loading/storing.
Definition: MzMLHandler.h:265
Representation of a CV term.
Definition: ControlledVocabulary.h:61
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition: MzMLHandler.h:312
A more convenient string class.
Definition: String.h:57
Precursor meta information.
Definition: Precursor.h:58
Class to encode and decode Base64.
Definition: Base64.h:64
ControlledVocabulary cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzMLHandler.h:359
Product meta information.
Definition: Product.h:49
The representation of a chromatogram.
Definition: MSChromatogram.h:55
bool rt_set_
Definition: MzMLHandler.h:356
Interfaces::IMSDataConsumer * consumer_
Consumer class to work on spectra.
Definition: MzMLHandler.h:345
Data necessary to generate a single spectrum.
Definition: MzMLHandler.h:303
Semantically validates MzXML files.
Definition: MzMLValidator.h:49
unsigned int UInt
Unsigned integer type.
Definition: Types.h:95
Map< String, std::vector< DataProcessingPtr > > processing_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:292
std::vector< BinaryData > data
Definition: MzMLHandler.h:323
Base class for XML handlers.
Definition: XMLHandler.h:110
Description of a file location, used to store the origin of (meta) data.
Definition: SourceFile.h:47
Binary data representation.
Definition: MzMLHandlerHelper.h:58
Description of the software used for processing.
Definition: Software.h:49
const double c
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
MSChromatogram ChromatogramType
Definition: MzDataHandler.h:63
std::vector< std::pair< std::string, long > > spectra_offsets
Definition: MzMLHandler.h:334
ChromatogramType chromatogram_
The current chromatogram.
Definition: MzMLHandler.h:272
Map< String, SourceFile > source_files_
The source files: id => SourceFile.
Definition: MzMLHandler.h:284
const ProgressLogger & logger_
Progress logger.
Definition: MzMLHandler.h:342
Map< String, Software > software_
The software list: id => Software.
Definition: MzMLHandler.h:288
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:57
Representation of a controlled vocabulary.
Definition: ControlledVocabulary.h:55
Size default_array_length
Definition: MzMLHandler.h:306
Size default_array_length_
The default number of peaks in the current spectrum.
Definition: MzMLHandler.h:276
Data necessary to generate a single chromatogram.
Definition: MzMLHandler.h:321
SpectrumType spectrum
Definition: MzMLHandler.h:307
Definition: MzMLHandler.h:114
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
MapType * exp_
map pointer for reading
Definition: MzMLHandler.h:260
bool skip_spectrum_
Definition: MzMLHandler.h:353
Base64 decoder_
Decoder/Encoder for Base64-data in MzML.
Definition: MzMLHandler.h:339
Map< String, Sample > samples_
The sample list: id => Sample.
Definition: MzMLHandler.h:286
MapType::ChromatogramPeakType ChromatogramPeakType
Chromatogram peak type.
Definition: MzMLHandler.h:193
String default_processing_
id of the default data processing (used when no processing is defined)
Definition: MzMLHandler.h:294
PeakFileOptions & getOptions()
Get the peak file options.
Definition: MzMLHandler.h:168
MSExperiment PeakMap
Two-dimensional map of raw data points or peaks.
Definition: StandardTypes.h:59
CVMappings mapping_
Definition: MzMLHandler.h:360
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
void setMSDataConsumer(Interfaces::IMSDataConsumer *consumer)
Set the IMSDataConsumer consumer which will consume the read data.
Definition: MzMLHandler.h:183
std::vector< BinaryData > data_
The spectrum data (or chromatogram data)
Definition: MzMLHandler.h:274
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:56
bool in_spectrum_list_
Flag that indicates that we&#39;re inside a spectrum (in contrast to a chromatogram)
Definition: MzMLHandler.h:278
void getCounts(Size &spectra_counts, Size &chromatogram_counts)
Get the spectra and chromatogram counts of a file.
Definition: MzMLHandler.h:176
std::vector< ChromatogramData > chromatogram_data_
Vector of chromatogram data stored for later parallel processing.
Definition: MzMLHandler.h:329
PeakMap MapType
XML handler for MzDataFile.
Definition: MzDataHandler.h:61
UInt scan_count
Counting spectra and chromatograms.
Definition: MzMLHandler.h:348
bool skip_chromatogram_
Flag that indicates whether this spectrum should be skipped (due to options)
Definition: MzMLHandler.h:352
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:68
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
UInt selected_ion_count_
Count of selected ions.
Definition: MzMLHandler.h:364
MapType::PeakType PeakType
Peak type.
Definition: MzMLHandler.h:191
void setOptions(const PeakFileOptions &opt)
Set the peak file options.
Definition: MzMLHandler.h:160
A 1-dimensional raw data point or peak for chromatograms.
Definition: ChromatogramPeak.h:55
std::vector< std::pair< std::string, long > > chromatograms_offsets
Definition: MzMLHandler.h:335
MzMLHandlerHelper::BinaryData BinaryData
Definition: MzMLHandler.h:199
Map< String, Instrument > instruments_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:290
String current_id_
Id of the current list. Used for referencing param group, source file, sample, software, ...
Definition: MzMLHandler.h:280
std::vector< BinaryData > data
Definition: MzMLHandler.h:305
Representation of controlled vocabulary mapping rules (for PSI formats)
Definition: CVMappings.h:57
Options for loading files containing peak data.
Definition: PeakFileOptions.h:48
Map< String, std::vector< SemanticValidator::CVTerm > > ref_param_
The referencing param groups: id => array (accession, value)
Definition: MzMLHandler.h:282
MSSpectrum SpectrumType
Spectrum type.
Definition: MzMLHandler.h:195
bool skip_data
Definition: MzMLHandler.h:308
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:51
const MapType * cexp_
map pointer for writing
Definition: MzMLHandler.h:262
MSChromatogram ChromatogramType
Spectrum type.
Definition: MzMLHandler.h:197
ChromatogramType chromatogram
Definition: MzMLHandler.h:325
UInt chromatogram_count
Definition: MzMLHandler.h:349
Size default_array_length
Definition: MzMLHandler.h:324
MSSpectrum SpectrumType
Definition: MzDataHandler.h:62

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:02 using doxygen 1.8.13