OpenMS  2.4.0
MzMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Marc Sturm, Chris Bielow, Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Helpers.h>
39 
41 
44 
47 
51 
52 
53 //MISSING:
54 // - more than one selected ion per precursor (warning if more than one)
55 // - scanWindowList for each acquisition separately (currently for the whole spectrum only)
56 // - instrumentConfigurationRef attribute for scan (why should the instrument change between scans? - warning if used)
57 // - scanSettingsRef attribute for instrumentConfiguration tag (currently no information there because of missing mapping file entry - warning if used)
58 
59 // xs:id/xs:idref prefix list
60 // - sf_ru : sourceFile (run)
61 // - sf_sp : sourceFile (spectrum)
62 // - sf_pr : sourceFile (precursor)
63 // - sf_ac : sourceFile (acquisition)
64 // - sa : sample
65 // - ic : instrumentConfiguration
66 // - so_dp : software (data processing)
67 // - so_in : software (instrument)
68 // - dp_sp : dataProcessing (spectrum)
69 // - dp_bi : dataProcessing (binary data array)
70 // - dp_ch : dataProcessing (chromatogram)
71 
72 namespace OpenMS
73 {
74  namespace Interfaces
75  {
76  class IMSDataConsumer;
77  }
78  namespace Internal
79  {
80  class MzMLValidator;
81 
99  typedef PeakMap MapType;
100  typedef MSSpectrum SpectrumType;
102 
103  class OPENMS_DLLAPI MzMLHandler :
104  public XMLHandler
105  {
106 public:
109 
111  MzMLHandler(MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
112 
114  MzMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
115 
117  ~MzMLHandler() override;
119 
122 
123  // Docu in base class
124  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
125 
126  // Docu in base class
127  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
128 
129  // Docu in base class
130  void characters(const XMLCh* const chars, const XMLSize_t length) override;
131 
132  //Docu in base class
133  void writeTo(std::ostream& os) override;
134 
136 
147 
149  void setOptions(const PeakFileOptions& opt);
150 
152  PeakFileOptions& getOptions();
153 
155 
157  void getCounts(Size& spectra_counts, Size& chromatogram_counts);
158 
160  void setMSDataConsumer(Interfaces::IMSDataConsumer* consumer);
161 
163  virtual LOADDETAIL getLoadDetail() const override;
164 
166  virtual void setLoadDetail(const LOADDETAIL d) override;
167 
168 protected:
170  MzMLHandler(const String& filename, const String& version, const ProgressLogger& logger);
171 
180 
182 
183  void writeSpectrum_(std::ostream& os, const SpectrumType& spec, Size s,
184  Internal::MzMLValidator& validator, bool renew_native_ids,
185  std::vector<std::vector< ConstDataProcessingPtr > >& dps);
186 
187  void writeChromatogram_(std::ostream& os, const ChromatogramType& chromatogram, Size c, Internal::MzMLValidator& validator);
188 
189  template <typename ContainerT>
190  void writeContainerData_(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, String array_type);
191 
198  void populateSpectraWithData_();
199 
206  void populateChromatogramsWithData_();
207 
208  void addSpectrumMetaData_(const std::vector<MzMLHandlerHelper::BinaryData>& input_data,
209  const Size n, SpectrumType& spectrum) const;
210 
221  void populateSpectraWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
222  Size& default_arr_length, const PeakFileOptions& peak_file_options,
223  SpectrumType& spectrum);
224 
232  void populateChromatogramsWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
233  Size& default_arr_length, const PeakFileOptions& peak_file_options,
234  ChromatogramType& inp_chromatogram);
235 
236  template <typename DataType>
237  void writeBinaryDataArray_(std::ostream& os, const PeakFileOptions& pf_options_, std::vector<DataType> data_to_encode, bool is32bit, String array_type);
238 
239  void writeHeader_(std::ostream& os, const MapType& exp, std::vector<std::vector< ConstDataProcessingPtr > >& dps, Internal::MzMLValidator& validator);
240 
242  void fillChromatogramData_();
243 
245  void handleCVParam_(const String& parent_parent_tag, const String& parent_tag, /* const String & cvref, */ const String& accession, const String& name, const String& value, const String& unit_accession = "");
246 
248  void handleUserParam_(const String& parent_parent_tag, const String& parent_tag, const String& name, const String& type, const String& value);
249 
251  void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, UInt indent, String path, Internal::MzMLValidator& validator) const;
252 
254  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
255 
257  void writeSoftware_(std::ostream& os, const String& id, const Software& software, Internal::MzMLValidator& validator);
258 
260  void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software, Internal::MzMLValidator& validator);
261 
263  void writeDataProcessing_(std::ostream& os, const String& id, const std::vector< ConstDataProcessingPtr >& dps, Internal::MzMLValidator& validator);
264 
266  void writePrecursor_(std::ostream& os, const Precursor& precursor, Internal::MzMLValidator& validator);
267 
269  void writeProduct_(std::ostream& os, const Product& product, Internal::MzMLValidator& validator);
270 
272  String writeCV_(const ControlledVocabulary::CVTerm& c, const DataValue& metaValue) const;
273 
275  bool validateCV_(const ControlledVocabulary::CVTerm& c, const String& path, const Internal::MzMLValidator& validator) const;
276 
277 
278  // MEMBERS
279 
281  MapType* exp_{ nullptr };
283  const MapType* cexp_{ nullptr };
284 
287 
290  SpectrumType spec_;
295  std::vector<BinaryData> bin_data_;
299  bool in_spectrum_list_{ false };
301  bool skip_spectrum_{ false };
303  bool skip_chromatogram_{ false };
305  bool rt_set_{ false };
322 
331  {
332  std::vector<BinaryData> data;
335  bool skip_data;
336  };
337 
339  std::vector<SpectrumData> spectrum_data_;
340 
349  {
350  std::vector<BinaryData> data;
353  };
354 
356  std::vector<ChromatogramData> chromatogram_data_;
357 
359 
361  std::vector<std::pair<std::string, long> > spectra_offsets_;
362  std::vector<std::pair<std::string, long> > chromatograms_offsets_;
364 
367 
369  Interfaces::IMSDataConsumer* consumer_{ nullptr };
370 
372  UInt scan_count_{ 0 }; //< number of scans which pass the options-filter
373  UInt chromatogram_count_{ 0 }; //< number of chromatograms which pass the options-filter
374  Int scan_count_total_{ -1 }; //< total number of scans in mzML file (according to 'count' attribute)
375  Int chrom_count_total_{ -1 }; //< total number of chromatograms in mzML file (according to 'count' attribute)
376 
380 
382  UInt selected_ion_count_{ 0 };
383  };
384 
385  //--------------------------------------------------------------------------------
386 
387  } // namespace Internal
388 } // namespace OpenMS
389 
PeakFileOptions options_
Options that can be set for loading/storing.
Definition: MzMLHandler.h:286
Representation of a CV term.
Definition: ControlledVocabulary.h:60
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition: MzMLHandler.h:339
A more convenient string class.
Definition: String.h:57
Precursor meta information.
Definition: Precursor.h:57
ControlledVocabulary cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzMLHandler.h:378
Product meta information.
Definition: Product.h:48
The representation of a chromatogram.
Definition: MSChromatogram.h:54
Data necessary to generate a single spectrum.
Definition: MzMLHandler.h:330
Semantically validates MzXML files.
Definition: MzMLValidator.h:48
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
Map< String, std::vector< DataProcessingPtr > > processing_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:319
std::vector< std::pair< std::string, long > > spectra_offsets_
Definition: MzMLHandler.h:361
std::vector< BinaryData > data
Definition: MzMLHandler.h:350
std::vector< std::pair< std::string, long > > chromatograms_offsets_
Definition: MzMLHandler.h:362
Base class for XML handlers.
Definition: XMLHandler.h:148
Description of a file location, used to store the origin of (meta) data.
Definition: SourceFile.h:46
Binary data representation.
Definition: MzMLHandlerHelper.h:57
Description of the software used for processing.
Definition: Software.h:48
const double c
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
MSChromatogram ChromatogramType
Definition: MzDataHandler.h:61
ChromatogramType chromatogram_
The current chromatogram.
Definition: MzMLHandler.h:293
Map< String, SourceFile > source_files_
The source files: id => SourceFile.
Definition: MzMLHandler.h:311
const ProgressLogger & logger_
Progress logger.
Definition: MzMLHandler.h:366
Map< String, Software > software_
The software list: id => Software.
Definition: MzMLHandler.h:315
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
Representation of a controlled vocabulary.
Definition: ControlledVocabulary.h:54
Size default_array_length
Definition: MzMLHandler.h:333
Size default_array_length_
The default number of peaks in the current spectrum.
Definition: MzMLHandler.h:297
Data necessary to generate a single chromatogram.
Definition: MzMLHandler.h:348
SpectrumType spectrum
Definition: MzMLHandler.h:334
Definition: MzMLHandler.h:103
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
Map< String, Sample > samples_
The sample list: id => Sample.
Definition: MzMLHandler.h:313
MapType::ChromatogramPeakType ChromatogramPeakType
Chromatogram peak type.
Definition: MzMLHandler.h:175
String default_processing_
id of the default data processing (used when no processing is defined)
Definition: MzMLHandler.h:321
CVMappings mapping_
Definition: MzMLHandler.h:379
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:55
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
std::vector< ChromatogramData > chromatogram_data_
Vector of chromatogram data stored for later parallel processing.
Definition: MzMLHandler.h:356
PeakMap MapType
XML handler for MzDataFile.
Definition: MzDataHandler.h:59
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:67
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
MapType::PeakType PeakType
Peak type.
Definition: MzMLHandler.h:173
A 1-dimensional raw data point or peak for chromatograms.
Definition: ChromatogramPeak.h:54
MzMLHandlerHelper::BinaryData BinaryData
Definition: MzMLHandler.h:181
Map< String, Instrument > instruments_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:317
String current_id_
Id of the current list. Used for referencing param group, source file, sample, software, ...
Definition: MzMLHandler.h:307
std::vector< BinaryData > data
Definition: MzMLHandler.h:332
Representation of controlled vocabulary mapping rules (for PSI formats)
Definition: CVMappings.h:56
std::vector< BinaryData > bin_data_
The spectrum data (or chromatogram data)
Definition: MzMLHandler.h:295
Options for loading files containing peak data.
Definition: PeakFileOptions.h:47
Map< String, std::vector< SemanticValidator::CVTerm > > ref_param_
The referencing param groups: id => array (accession, value)
Definition: MzMLHandler.h:309
MSSpectrum SpectrumType
Spectrum type.
Definition: MzMLHandler.h:177
bool skip_data
Definition: MzMLHandler.h:335
LOADDETAIL
Definition: XMLHandler.h:172
int Int
Signed integer type.
Definition: Types.h:102
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
MSChromatogram ChromatogramType
Spectrum type.
Definition: MzMLHandler.h:179
ChromatogramType chromatogram
Definition: MzMLHandler.h:352
Size default_array_length
Definition: MzMLHandler.h:351
MSSpectrum SpectrumType
Definition: MzDataHandler.h:60