OpenMS  2.8.0
MzMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Marc Sturm, Chris Bielow, Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Helpers.h>
39 
41 
44 
47 
51 
52 
53 //MISSING:
54 // - more than one selected ion per precursor (warning if more than one)
55 // - scanWindowList for each acquisition separately (currently for the whole spectrum only)
56 // - instrumentConfigurationRef attribute for scan (why should the instrument change between scans? - warning if used)
57 // - scanSettingsRef attribute for instrumentConfiguration tag (currently no information there because of missing mapping file entry - warning if used)
58 
59 // xs:id/xs:idref prefix list
60 // - sf_ru : sourceFile (run)
61 // - sf_sp : sourceFile (spectrum)
62 // - sf_pr : sourceFile (precursor)
63 // - sf_ac : sourceFile (acquisition)
64 // - sa : sample
65 // - ic : instrumentConfiguration
66 // - so_dp : software (data processing)
67 // - so_in : software (instrument)
68 // - dp_sp : dataProcessing (spectrum)
69 // - dp_bi : dataProcessing (binary data array)
70 // - dp_ch : dataProcessing (chromatogram)
71 
72 namespace OpenMS
73 {
74  namespace Interfaces
75  {
76  class IMSDataConsumer;
77  }
78 
79  namespace Internal
80  {
81  class MzMLValidator;
82 
83  typedef PeakMap MapType;
84  typedef MSSpectrum SpectrumType;
86 
116  class OPENMS_DLLAPI MzMLHandler :
117  public XMLHandler
118  {
119 public:
120 
123 
125  MzMLHandler(MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
126 
128  MzMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
129 
131  ~MzMLHandler() override;
133 
139 
141  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
142 
144  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
145 
147  void characters(const XMLCh* const chars, const XMLSize_t length) override;
148 
150  void writeTo(std::ostream& os) override;
151 
153 
164 
166  void setOptions(const PeakFileOptions& opt);
167 
170 
172 
174  void getCounts(Size& spectra_counts, Size& chromatogram_counts);
175 
185 
189 
191  LOADDETAIL getLoadDetail() const override;
192 
194  void setLoadDetail(const LOADDETAIL d) override;
195 
196 protected:
197 
199  MzMLHandler(const String& filename, const String& version, const ProgressLogger& logger);
200 
209 
211 
216 
224 
232 
238  void addSpectrumMetaData_(const std::vector<MzMLHandlerHelper::BinaryData>& input_data,
239  const Size n,
240  SpectrumType& spectrum) const;
241 
257  void populateSpectraWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
258  Size& length,
259  const PeakFileOptions& peak_file_options,
260  SpectrumType& spectrum);
261 
274  void populateChromatogramsWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
275  Size& length,
276  const PeakFileOptions& peak_file_options,
277  ChromatogramType& chromatogram);
278 
281 
283  void handleCVParam_(const String& parent_parent_tag,
284  const String& parent_tag,
285  const String& accession,
286  const String& name,
287  const String& value,
288  const String& unit_accession = "");
289 
291  void handleUserParam_(const String& parent_parent_tag,
292  const String& parent_tag,
293  const String& name,
294  const String& type,
295  const String& value,
296  const String& unit_accession = "");
298 
304 
306  void writeHeader_(std::ostream& os,
307  const MapType& exp,
308  std::vector<std::vector< ConstDataProcessingPtr > >& dps,
309  const Internal::MzMLValidator& validator);
310 
311 
313  void writeSpectrum_(std::ostream& os,
314  const SpectrumType& spec,
315  Size spec_idx,
316  const Internal::MzMLValidator& validator,
317  bool renew_native_ids,
318  std::vector<std::vector< ConstDataProcessingPtr > >& dps);
319 
321  void writeChromatogram_(std::ostream& os,
322  const ChromatogramType& chromatogram,
323  Size chrom_idx,
324  const Internal::MzMLValidator& validator);
325 
326  template <typename ContainerT>
327  void writeContainerData_(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, String array_type);
328 
341  template <typename DataType>
342  void writeBinaryDataArray_(std::ostream& os,
343  const PeakFileOptions& options,
344  std::vector<DataType>& data,
345  bool is32bit,
346  String array_type);
347 
362  void writeBinaryFloatDataArray_(std::ostream& os,
363  const PeakFileOptions& options,
365  const Size spec_chrom_idx,
366  const Size array_idx,
367  bool is_spectrum,
368  const Internal::MzMLValidator& validator);
369 
371  void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, UInt indent, const String& path, const Internal::MzMLValidator& validator, const std::set<String>& exclude = {}) const;
372 
374  void writeSoftware_(std::ostream& os, const String& id, const Software& software, const Internal::MzMLValidator& validator);
375 
377  void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software, const Internal::MzMLValidator& validator);
378 
380  void writeDataProcessing_(std::ostream& os, const String& id, const std::vector< ConstDataProcessingPtr >& dps, const Internal::MzMLValidator& validator);
381 
383  void writePrecursor_(std::ostream& os, const Precursor& precursor, const Internal::MzMLValidator& validator);
384 
386  void writeProduct_(std::ostream& os, const Product& product, const Internal::MzMLValidator& validator);
387 
389  String writeCV_(const ControlledVocabulary::CVTerm& c, const DataValue& metaValue) const;
390 
392  bool validateCV_(const ControlledVocabulary::CVTerm& c, const String& path, const Internal::MzMLValidator& validator) const;
393 
395  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
396 
398 
399  // MEMBERS
400 
402  MapType* exp_{ nullptr };
403 
405  const MapType* cexp_{ nullptr };
406 
409 
417  std::vector<BinaryData> bin_data_;
421  bool in_spectrum_list_{ false };
423  bool skip_spectrum_{ false };
425  bool skip_chromatogram_{ false };
427  bool rt_set_{ false };
447  UInt selected_ion_count_{ 0 };
448 
457  {
458  std::vector<BinaryData> data;
461  };
462 
464  std::vector<SpectrumData> spectrum_data_;
465 
474  {
475  std::vector<BinaryData> data;
478  };
479 
481  std::vector<ChromatogramData> chromatogram_data_;
482 
484 
492  std::vector<std::pair<std::string, Int64> > spectra_offsets_;
493  std::vector<std::pair<std::string, Int64> > chromatograms_offsets_;
495 
498 
500  Interfaces::IMSDataConsumer* consumer_{ nullptr };
501 
503  UInt scan_count_{ 0 };
504  UInt chromatogram_count_{ 0 };
505  Int scan_count_total_{ -1 };
506  Int chrom_count_total_{ -1 };
508 
512 
513  };
514 
515  //--------------------------------------------------------------------------------
516 
517  } // namespace Internal
518 } // namespace OpenMS
519 
Representation of controlled vocabulary mapping rules (for PSI formats)
Definition: CVMappings.h:57
A 1-dimensional raw data point or peak for chromatograms.
Definition: ChromatogramPeak.h:54
Representation of a controlled vocabulary.
Definition: ControlledVocabulary.h:55
Float data array class.
Definition: DataArrays.h:48
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:70
Handler for mzML file format.
Definition: MzMLHandler.h:118
ControlledVocabulary::CVTerm getChildWithName_(const String &parent_accession, const String &name) const
Helper method to look up a child CV term of parent_accession with the name name. If no such term is f...
Map< String, std::vector< DataProcessingPtr > > processing_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:443
MzMLHandler(const String &filename, const String &version, const ProgressLogger &logger)
delegated constructor for the two public versions
Size default_array_length_
The default number of peaks in the current spectrum.
Definition: MzMLHandler.h:419
ChromatogramType chromatogram_
The current chromatogram.
Definition: MzMLHandler.h:415
MzMLHandler(const MapType &exp, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a write-only handler.
void writeProduct_(std::ostream &os, const Product &product, const Internal::MzMLValidator &validator)
Helper method that write precursor information from spectra and chromatograms.
ChromatogramType chromatogram
Definition: MzMLHandler.h:477
MSChromatogram ChromatogramType
Spectrum type.
Definition: MzMLHandler.h:208
LOADDETAIL getLoadDetail() const override
handler which support partial loading, implement this method
Map< String, Software > software_
The software list: id => Software.
Definition: MzMLHandler.h:437
void writeBinaryDataArray_(std::ostream &os, const PeakFileOptions &options, std::vector< DataType > &data, bool is32bit, String array_type)
Write a single <binaryDataArray> element to the output.
std::vector< ChromatogramData > chromatogram_data_
Vector of chromatogram data stored for later parallel processing.
Definition: MzMLHandler.h:481
Map< String, Sample > samples_
The sample list: id => Sample.
Definition: MzMLHandler.h:435
const ControlledVocabulary & cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzMLHandler.h:510
~MzMLHandler() override
Destructor.
void writeTo(std::ostream &os) override
Docu in base class XMLHandler::writeTo.
MzMLHandler(MapType &exp, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a read-only handler.
std::vector< std::pair< std::string, Int64 > > chromatograms_offsets_
Stores binary offsets for each <chromatogram> tag.
Definition: MzMLHandler.h:493
Map< String, SourceFile > source_files_
The source files: id => SourceFile.
Definition: MzMLHandler.h:433
void writeHeader_(std::ostream &os, const MapType &exp, std::vector< std::vector< ConstDataProcessingPtr > > &dps, const Internal::MzMLValidator &validator)
Write out XML header including (everything up to spectrumList / chromatogramList.
void populateChromatogramsWithData_()
Populate all chromatograms on the stack with data from input.
std::vector< BinaryData > data
Definition: MzMLHandler.h:458
std::vector< std::pair< std::string, Int64 > > spectra_offsets_
Stores binary offsets for each <spectrum> tag.
Definition: MzMLHandler.h:492
CVMappings mapping_
Definition: MzMLHandler.h:511
const ProgressLogger & logger_
Progress logger.
Definition: MzMLHandler.h:497
SpectrumType spec_
The current spectrum.
Definition: MzMLHandler.h:413
Map< String, std::vector< SemanticValidator::CVTerm > > ref_param_
The referencing param groups: id => array (accession, value)
Definition: MzMLHandler.h:431
String current_id_
Id of the current list. Used for referencing param group, source file, sample, software,...
Definition: MzMLHandler.h:429
void setLoadDetail(const LOADDETAIL d) override
handler which support partial loading, implement this method
MSSpectrum SpectrumType
Spectrum type.
Definition: MzMLHandler.h:206
void setMSDataConsumer(Interfaces::IMSDataConsumer *consumer)
Set the IMSDataConsumer consumer which will consume the read data.
PeakFileOptions options_
Options that can be set for loading/storing.
Definition: MzMLHandler.h:408
void getCounts(Size &spectra_counts, Size &chromatogram_counts)
Get the spectra and chromatogram counts of a file.
Size default_array_length
Definition: MzMLHandler.h:459
void handleUserParam_(const String &parent_parent_tag, const String &parent_tag, const String &name, const String &type, const String &value, const String &unit_accession="")
Handles user terms.
MapType::PeakType PeakType
Peak type.
Definition: MzMLHandler.h:202
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition: MzMLHandler.h:464
void populateSpectraWithData_()
Populate all spectra on the stack with data from input.
Map< std::pair< String, String >, bool > cached_terms_
CV terms-path-combinations that have been checked in validateCV_()
Definition: MzMLHandler.h:441
MzMLHandlerHelper::BinaryData BinaryData
Definition: MzMLHandler.h:210
void populateSpectraWithData_(std::vector< MzMLHandlerHelper::BinaryData > &input_data, Size &length, const PeakFileOptions &peak_file_options, SpectrumType &spectrum)
Fill a single spectrum with data from input.
PeakFileOptions & getOptions()
Get the peak file options.
void writeContainerData_(std::ostream &os, const PeakFileOptions &pf_options_, const ContainerT &container, String array_type)
void writeBinaryFloatDataArray_(std::ostream &os, const PeakFileOptions &options, const OpenMS::DataArrays::FloatDataArray &array, const Size spec_chrom_idx, const Size array_idx, bool is_spectrum, const Internal::MzMLValidator &validator)
Write a single <binaryDataArray> element for a float data array to the output.
void writeSpectrum_(std::ostream &os, const SpectrumType &spec, Size spec_idx, const Internal::MzMLValidator &validator, bool renew_native_ids, std::vector< std::vector< ConstDataProcessingPtr > > &dps)
Write out a single spectrum.
void writeSoftware_(std::ostream &os, const String &id, const Software &software, const Internal::MzMLValidator &validator)
Helper method that writes a software.
void fillChromatogramData_()
Fills the current chromatogram with data points and meta data.
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
Docu in base class XMLHandler::startElelement.
void writeChromatogram_(std::ostream &os, const ChromatogramType &chromatogram, Size chrom_idx, const Internal::MzMLValidator &validator)
Write out a single chromatogram.
void handleCVParam_(const String &parent_parent_tag, const String &parent_tag, const String &accession, const String &name, const String &value, const String &unit_accession="")
Handles CV terms.
void writeUserParam_(std::ostream &os, const MetaInfoInterface &meta, UInt indent, const String &path, const Internal::MzMLValidator &validator, const std::set< String > &exclude={}) const
Writes user terms.
std::vector< BinaryData > bin_data_
The spectrum data (or chromatogram data)
Definition: MzMLHandler.h:417
String writeCV_(const ControlledVocabulary::CVTerm &c, const DataValue &metaValue) const
Helper method to write an CV based on a meta value.
bool validateCV_(const ControlledVocabulary::CVTerm &c, const String &path, const Internal::MzMLValidator &validator) const
Helper method to validate if the given CV is allowed in the current location (path)
String default_processing_
id of the default data processing (used when no processing is defined)
Definition: MzMLHandler.h:445
void writeSourceFile_(std::ostream &os, const String &id, const SourceFile &software, const Internal::MzMLValidator &validator)
Helper method that writes a source file.
void characters(const XMLCh *const chars, const XMLSize_t length) override
Docu in base class XMLHandler::characters.
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
Docu in base class XMLHandler::endElement.
void setOptions(const PeakFileOptions &opt)
Set the peak file options.
Map< String, Instrument > instruments_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:439
void addSpectrumMetaData_(const std::vector< MzMLHandlerHelper::BinaryData > &input_data, const Size n, SpectrumType &spectrum) const
Add extra data arrays to a spectrum.
void populateChromatogramsWithData_(std::vector< MzMLHandlerHelper::BinaryData > &input_data, Size &length, const PeakFileOptions &peak_file_options, ChromatogramType &chromatogram)
Fill a single chromatogram with data from input.
MapType::ChromatogramPeakType ChromatogramPeakType
Chromatogram peak type.
Definition: MzMLHandler.h:204
void writePrecursor_(std::ostream &os, const Precursor &precursor, const Internal::MzMLValidator &validator)
Helper method that write precursor information from spectra and chromatograms.
SpectrumType spectrum
Definition: MzMLHandler.h:460
void writeDataProcessing_(std::ostream &os, const String &id, const std::vector< ConstDataProcessingPtr > &dps, const Internal::MzMLValidator &validator)
Helper method that writes a data processing list.
Data necessary to generate a single chromatogram.
Definition: MzMLHandler.h:474
Data necessary to generate a single spectrum.
Definition: MzMLHandler.h:457
Semantically validates MzXML files.
Definition: MzMLValidator.h:50
Base class for XML handlers.
Definition: XMLHandler.h:325
LOADDETAIL
Definition: XMLHandler.h:348
The representation of a chromatogram.
Definition: MSChromatogram.h:57
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:73
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:52
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:61
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
Options for loading files containing peak data.
Definition: PeakFileOptions.h:48
Precursor meta information.
Definition: Precursor.h:61
Product meta information.
Definition: Product.h:50
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
Description of the software used for processing.
Definition: Software.h:50
Description of a file location, used to store the origin of (meta) data.
Definition: SourceFile.h:48
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double c
Definition: Constants.h:209
MSChromatogram ChromatogramType
Definition: MzDataHandler.h:61
MSSpectrum SpectrumType
Definition: MzDataHandler.h:60
PeakMap MapType
XML handler for MzDataFile.
Definition: MzDataHandler.h:59
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Representation of a CV term.
Definition: ControlledVocabulary.h:61
Representation for binary data in mzML.
Definition: MzMLHandlerHelper.h:70