OpenMS
MzIdentMLHandler.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Mathias Walzer $
6 // $Authors: Mathias Walzer, Andreas Bertsch $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
12 
20 
21 #include <vector>
22 #include <map>
23 
24 namespace OpenMS
25 {
26  class ProgressLogger;
27 
28  namespace Internal
29  {
30 
36  class OPENMS_DLLAPI IdentificationHit :
37  public MetaInfoInterface
38  {
39  public:
41 
42  IdentificationHit() = default;
44 
47 
49  virtual ~IdentificationHit() = default;
50 
52  IdentificationHit(IdentificationHit&&) noexcept = default;
53 
55  IdentificationHit& operator=(const IdentificationHit&) = default;
56 
58  IdentificationHit& operator=(IdentificationHit&&) noexcept = default;
60 
62 
63  bool operator==(const IdentificationHit& rhs) const noexcept;
65 
67  bool operator!=(const IdentificationHit& rhs) const noexcept;
69 
71 
72  void setId(const std::string& id) noexcept;
74 
76  const std::string& getId() const noexcept;
77 
79  void setCharge(int charge) noexcept;
80 
82  int getCharge() const noexcept;
83 
85  void setCalculatedMassToCharge(double mz) noexcept;
86 
88  double getCalculatedMassToCharge() const noexcept;
89 
91  void setExperimentalMassToCharge(double mz) noexcept;
92 
94  double getExperimentalMassToCharge() const noexcept;
95 
97  void setName(const std::string& name) noexcept;
98 
100  const std::string& getName() const noexcept;
101 
103  void setPassThreshold(bool pass) noexcept;
104 
106  bool getPassThreshold() const noexcept;
107 
109  void setRank(int rank) noexcept;
110 
112  int getRank() const noexcept;
114 
115  private:
116  std::string id_;
117  int charge_ = 0;
118  double calculated_mass_to_charge_ = 0.0;
119  double experimental_mass_to_charge_ = 0.0;
120  std::string name_;
121  bool pass_threshold_ = true;
122  int rank_ = 0;
123  };
124 
132  class OPENMS_DLLAPI SpectrumIdentification :
133  public MetaInfoInterface
134  {
135 public:
136 
138 
139  SpectrumIdentification() = default;
152  bool operator==(const SpectrumIdentification & rhs) const;
154  bool operator!=(const SpectrumIdentification & rhs) const;
156 
157  // @name Accessors
159  void setHits(const std::vector<IdentificationHit> & hits);
161 
163  void addHit(const IdentificationHit & hit);
164 
166  const std::vector<IdentificationHit> & getHits() const;
168 
169 protected:
170 
172  std::vector<IdentificationHit> hits_;
173  };
174 
182  class OPENMS_DLLAPI Identification :
183  public MetaInfoInterface
184  {
185  public:
186 
188 
189 
191  Identification() = default;
193  Identification(const Identification & source) = default;
197  virtual ~Identification();
198 
200  Identification & operator=(const Identification & source) = default;
203 
205  bool operator==(const Identification & rhs) const;
207  bool operator!=(const Identification & rhs) const;
209 
211 
212  void setCreationDate(const DateTime & date);
214 
216  const DateTime & getCreationDate() const;
217 
219  void setSpectrumIdentifications(const std::vector<SpectrumIdentification> & ids);
220 
223 
225  const std::vector<SpectrumIdentification> & getSpectrumIdentifications() const;
227  protected:
230  std::vector<SpectrumIdentification> spectrum_identifications_;
231  };
232 
242  class OPENMS_DLLAPI MzIdentMLHandler :
243  public XMLHandler
244  {
245 public:
249  MzIdentMLHandler(const std::vector<ProteinIdentification>& pro_id, const std::vector<PeptideIdentification>& pep_id, const String& filename, const String& version, const ProgressLogger& logger);
250 
252  MzIdentMLHandler(std::vector<ProteinIdentification>& pro_id, std::vector<PeptideIdentification>& pep_id, const String& filename, const String& version, const ProgressLogger& logger);
253 
255  ~MzIdentMLHandler() override;
257 
258 
259  // Docu in base class
260  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
261 
262  // Docu in base class
263  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
264 
265  // Docu in base class
266  void characters(const XMLCh* const chars, const XMLSize_t length) override;
267 
268  //Docu in base class
269  void writeTo(std::ostream& os) override;
270 
271 protected:
274 
279 
280  //~ PeakMap* ms_exp_;
281 
284 
288  std::vector<ProteinIdentification>* pro_id_;
290  std::vector<PeptideIdentification>* pep_id_;
291 
293  const std::vector<ProteinIdentification>* cpro_id_;
294  const std::vector<PeptideIdentification>* cpep_id_;
295 
298 
301 
303  void handleCVParam_(const String& parent_parent_tag, const String& parent_tag, const String& accession, /* const String& name, */ /* const String& value, */ const xercesc::Attributes& attributes, const String& cv_ref /* , const String& unit_accession="" */);
304 
306  void handleUserParam_(const String& parent_parent_tag, const String& parent_tag, const String& name, const String& type, const String& value);
307 
309  void writeMetaInfos_(String& s, const MetaInfoInterface& meta, UInt indent) const;
310 
312  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
313 
315  //void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software);
316 
318  void writeEnzyme_(String& s, const DigestionEnzymeProtein& enzy, UInt miss, UInt indent) const;
319 
321  void writeModParam_(String& s, const std::vector<String>& mod_names, bool fixed, UInt indent) const;
322 
324  void writeFragmentAnnotations_(String& s, const std::vector<PeptideHit::PeakAnnotation>& annotations, UInt indent, bool is_ppxl) const;
325 
327  String trimOpenMSfileURI(const String& file) const;
328 
330  void writePeptideHit(const PeptideHit& hit,
331  std::vector<PeptideIdentification>::const_iterator& it,
332  std::map<String, String>& pep_ids,
333  const String& cv_ns, std::set<String>& sen_set,
334  std::map<String, String>& sen_ids,
335  std::map<String, std::vector<String> >& pep_evis,
336  std::map<String, double>& pp_identifier_2_thresh,
337  String& sidres);
338 
341  std::vector<PeptideIdentification>::const_iterator& it,
342  const String& ppxl_linkid, std::map<String, String>& pep_ids,
343  const String& cv_ns, std::set<String>& sen_set,
344  std::map<String, String>& sen_ids,
345  std::map<String, std::vector<String> >& pep_evis,
346  std::map<String, double>& pp_identifier_2_thresh,
347  double ppxl_crosslink_mass,
348  std::map<String, String>& ppxl_specref_2_element,
349  String& sid, bool alpha_peptide);
350 
351 private:
355  std::map<String, AASequence> pep_sequences_;
356  std::map<String, String> pp_identifier_2_sil_;
357  std::map<String, String> sil_2_sdb_;
358  std::map<String, String> sil_2_sdat_;
359  std::map<String, String> ph_2_sdat_;
360  std::map<String, String> sil_2_sip_;
364 
365  };
366  } // namespace Internal
367 } // namespace OpenMS
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
Definition: ControlledVocabulary.h:29
DateTime Class.
Definition: DateTime.h:33
Representation of a digestion enzyme for proteins (protease)
Definition: DigestionEnzymeProtein.h:24
Represents a object which can store the information of an analysisXML instance.
Definition: MzIdentMLHandler.h:38
virtual ~IdentificationHit()=default
Virtual destructor.
IdentificationHit(const IdentificationHit &)=default
Copy constructor.
IdentificationHit(IdentificationHit &&) noexcept=default
Move constructor.
Definition: MzIdentMLHandler.h:184
void addSpectrumIdentification(const SpectrumIdentification &id)
adds a spectrum identification
DateTime creation_date_
Date and time the search was performed.
Definition: MzIdentMLHandler.h:229
Identification(const Identification &source)=default
Copy constructor.
bool operator==(const Identification &rhs) const
Equality operator.
Identification(Identification &&)=default
Move constructor.
virtual ~Identification()
Destructor.
void setSpectrumIdentifications(const std::vector< SpectrumIdentification > &ids)
sets the spectrum identifications
Identification()=default
Default constructor.
const DateTime & getCreationDate() const
returns the date and time the file was created
std::vector< SpectrumIdentification > spectrum_identifications_
Definition: MzIdentMLHandler.h:230
Identification & operator=(Identification &&) &=default
Move assignment operator.
const std::vector< SpectrumIdentification > & getSpectrumIdentifications() const
returns the spectrum identifications stored
Identification & operator=(const Identification &source)=default
Assignment operator.
String id_
Identifier.
Definition: MzIdentMLHandler.h:228
bool operator!=(const Identification &rhs) const
Inequality operator.
XML STREAM handler for MzIdentMLFile.
Definition: MzIdentMLHandler.h:244
ControlledVocabulary::CVTerm getChildWithName_(const String &parent_accession, const String &name) const
Looks up a child CV term of parent_accession with the name name. If no such term is found,...
const Identification * cid_
Definition: MzIdentMLHandler.h:292
ProteinHit actual_protein_
Definition: MzIdentMLHandler.h:363
String trimOpenMSfileURI(const String &file) const
Convenience method to remove the [] from OpenMS internal file uri representation.
std::map< String, String > sil_2_sdb_
mapping spectrumidentificationlist to the search data bases
Definition: MzIdentMLHandler.h:357
void writeTo(std::ostream &os) override
Writes the contents to a stream.
std::map< String, String > sil_2_sip_
mapping spectrumidentificationlist to the search protocol (where the params are at)
Definition: MzIdentMLHandler.h:360
MzIdentMLHandler(const MzIdentMLHandler &rhs)
IdentificationHit current_id_hit_
IdentificationHit Item.
Definition: MzIdentMLHandler.h:300
const std::vector< ProteinIdentification > * cpro_id_
Definition: MzIdentMLHandler.h:293
const ProgressLogger & logger_
Progress logger.
Definition: MzIdentMLHandler.h:273
ControlledVocabulary cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzIdentMLHandler.h:276
String tag_
XML tag parse element.
Definition: MzIdentMLHandler.h:283
std::map< String, String > ph_2_sdat_
mapping identification runs (mapping PeptideIdentifications and ProteinIdentifications via ....
Definition: MzIdentMLHandler.h:359
void writeFragmentAnnotations_(String &s, const std::vector< PeptideHit::PeakAnnotation > &annotations, UInt indent, bool is_ppxl) const
Helper method that writes the FragmentAnnotations section of a spectrum identification.
SpectrumIdentification current_spectrum_id_
SpectrumIdentification Item.
Definition: MzIdentMLHandler.h:297
MzIdentMLHandler & operator=(const MzIdentMLHandler &rhs)
std::vector< ProteinIdentification > * pro_id_
internal Identification Item for proteins
Definition: MzIdentMLHandler.h:288
std::map< String, AASequence > pep_sequences_
Definition: MzIdentMLHandler.h:355
Identification * id_
Identification Item.
Definition: MzIdentMLHandler.h:286
std::vector< PeptideIdentification > * pep_id_
Identification Item for peptides.
Definition: MzIdentMLHandler.h:290
MzIdentMLHandler(const std::vector< ProteinIdentification > &pro_id, const std::vector< PeptideIdentification > &pep_id, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a write-only handler for internal identification structures.
ControlledVocabulary unimod_
Controlled vocabulary for modifications (unimod from OpenMS/share/OpenMS/CV/unimod....
Definition: MzIdentMLHandler.h:278
void writeXLMSPeptideHit(const PeptideHit &hit, std::vector< PeptideIdentification >::const_iterator &it, const String &ppxl_linkid, std::map< String, String > &pep_ids, const String &cv_ns, std::set< String > &sen_set, std::map< String, String > &sen_ids, std::map< String, std::vector< String > > &pep_evis, std::map< String, double > &pp_identifier_2_thresh, double ppxl_crosslink_mass, std::map< String, String > &ppxl_specref_2_element, String &sid, bool alpha_peptide)
Abstraction of PeptideHit loop for XL-MS data from OpenPepXL.
std::map< String, String > pp_identifier_2_sil_
mapping peptide/proteinidentification identifier_ to spectrumidentificationlist
Definition: MzIdentMLHandler.h:356
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
std::map< String, String > sil_2_sdat_
mapping spectrumidentificationlist to the search input
Definition: MzIdentMLHandler.h:358
AASequence actual_peptide_
Definition: MzIdentMLHandler.h:361
~MzIdentMLHandler() override
Destructor.
const std::vector< PeptideIdentification > * cpep_id_
Definition: MzIdentMLHandler.h:294
void writeMetaInfos_(String &s, const MetaInfoInterface &meta, UInt indent) const
Writes user terms.
Int current_mod_location_
Definition: MzIdentMLHandler.h:362
void characters(const XMLCh *const chars, const XMLSize_t length) override
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
void writeEnzyme_(String &s, const DigestionEnzymeProtein &enzy, UInt miss, UInt indent) const
Helper method that writes a source file.
void handleUserParam_(const String &parent_parent_tag, const String &parent_tag, const String &name, const String &type, const String &value)
Handles user terms.
void writePeptideHit(const PeptideHit &hit, std::vector< PeptideIdentification >::const_iterator &it, std::map< String, String > &pep_ids, const String &cv_ns, std::set< String > &sen_set, std::map< String, String > &sen_ids, std::map< String, std::vector< String > > &pep_evis, std::map< String, double > &pp_identifier_2_thresh, String &sidres)
Abstraction of PeptideHit loop for most PeptideHits.
void handleCVParam_(const String &parent_parent_tag, const String &parent_tag, const String &accession, const xercesc::Attributes &attributes, const String &cv_ref)
Handles CV terms.
void writeModParam_(String &s, const std::vector< String > &mod_names, bool fixed, UInt indent) const
Helper method that writes the modification search params (fixed or variable)
MzIdentMLHandler(std::vector< ProteinIdentification > &pro_id, std::vector< PeptideIdentification > &pep_id, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a read-only handler for internal identification structures.
Definition: MzIdentMLHandler.h:134
bool operator==(const SpectrumIdentification &rhs) const
Equality operator.
SpectrumIdentification & operator=(const SpectrumIdentification &)=default
Assignment operator.
bool operator!=(const SpectrumIdentification &rhs) const
Inequality operator.
const std::vector< IdentificationHit > & getHits() const
returns the identification hits of this spectrum identification
SpectrumIdentification & operator=(SpectrumIdentification &&) &=default
Move assignment operator.
virtual ~SpectrumIdentification()
Destructor.
std::vector< IdentificationHit > hits_
Single peptide hits.
Definition: MzIdentMLHandler.h:172
void addHit(const IdentificationHit &hit)
adds a single identification hit to the hits
SpectrumIdentification(SpectrumIdentification &&)=default
Move constructor.
String id_
Identifier.
Definition: MzIdentMLHandler.h:171
SpectrumIdentification(const SpectrumIdentification &)=default
Copy constructor.
Base class for XML handlers.
Definition: XMLHandler.h:302
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:35
Representation of a peptide hit.
Definition: PeptideHit.h:31
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
Representation of a protein hit.
Definition: ProteinHit.h:34
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Representation of a CV term.
Definition: ControlledVocabulary.h:35