OpenMS
Loading...
Searching...
No Matches
MzIdentMLHandler.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Mathias Walzer $
6// $Authors: Mathias Walzer, Andreas Bertsch $
7// --------------------------------------------------------------------------
8
9#pragma once
10
12
21
22#include <vector>
23#include <map>
24#include <set>
25
26namespace OpenMS
27{
28 class ProgressLogger;
29
30 namespace Internal
31 {
32
38 class OPENMS_DLLAPI IdentificationHit :
40 {
41 public:
43
44
45 IdentificationHit() = default;
46
49
51 virtual ~IdentificationHit() = default;
52
54 IdentificationHit(IdentificationHit&&) noexcept = default;
55
57 IdentificationHit& operator=(const IdentificationHit&) = default;
58
60 IdentificationHit& operator=(IdentificationHit&&) noexcept = default;
62
64
65
66 bool operator==(const IdentificationHit& rhs) const noexcept;
67
69 bool operator!=(const IdentificationHit& rhs) const noexcept;
71
73
74
75 void setId(const std::string& id) noexcept;
76
78 const std::string& getId() const noexcept;
79
81 void setCharge(int charge) noexcept;
82
84 int getCharge() const noexcept;
85
87 void setCalculatedMassToCharge(double mz) noexcept;
88
90 double getCalculatedMassToCharge() const noexcept;
91
93 void setExperimentalMassToCharge(double mz) noexcept;
94
96 double getExperimentalMassToCharge() const noexcept;
97
99 void setName(const std::string& name) noexcept;
100
102 const std::string& getName() const noexcept;
103
105 void setPassThreshold(bool pass) noexcept;
106
108 bool getPassThreshold() const noexcept;
109
111 void setRank(int rank) noexcept;
112
114 int getRank() const noexcept;
116
117 private:
118 std::string id_;
119 int charge_ = 0;
120 double calculated_mass_to_charge_ = 0.0;
121 double experimental_mass_to_charge_ = 0.0;
122 std::string name_;
123 bool pass_threshold_ = true;
124 int rank_ = 0;
125 };
126
134 class OPENMS_DLLAPI SpectrumIdentification :
135 public MetaInfoInterface
136 {
137public:
138
140
141
154 bool operator==(const SpectrumIdentification & rhs) const;
156 bool operator!=(const SpectrumIdentification & rhs) const;
158
159 // @name Accessors
161
162 void setHits(const std::vector<IdentificationHit> & hits);
163
165 void addHit(const IdentificationHit & hit);
166
168 const std::vector<IdentificationHit> & getHits() const;
170
171protected:
172
173 std::string id_;
174 std::vector<IdentificationHit> hits_;
175 };
176
184 class OPENMS_DLLAPI Identification :
185 public MetaInfoInterface
186 {
187 public:
188
190
191
193 Identification() = default;
195 Identification(const Identification & source) = default;
200
202 Identification & operator=(const Identification & source) = default;
205
207 bool operator==(const Identification & rhs) const;
209 bool operator!=(const Identification & rhs) const;
211
213
214
215 void setCreationDate(const DateTime & date);
216
218 const DateTime & getCreationDate() const;
219
221 void setSpectrumIdentifications(const std::vector<SpectrumIdentification> & ids);
222
225
227 const std::vector<SpectrumIdentification> & getSpectrumIdentifications() const;
229 protected:
230 std::string id_;
232 std::vector<SpectrumIdentification> spectrum_identifications_;
233 };
234
244 class OPENMS_DLLAPI MzIdentMLHandler :
245 public XMLHandler
246 {
247public:
251 MzIdentMLHandler(const std::vector<ProteinIdentification>& pro_id, const PeptideIdentificationList& pep_id, const std::string& filename, const std::string& version, const ProgressLogger& logger);
252
254 MzIdentMLHandler(std::vector<ProteinIdentification>& pro_id, PeptideIdentificationList& pep_id, const std::string& filename, const std::string& version, const ProgressLogger& logger);
255
259
260
261 // Docu in base class
262 void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
263
264 // Docu in base class
265 void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
266
267 // Docu in base class
268 void characters(const XMLCh* const chars, const XMLSize_t length) override;
269
270 //Docu in base class
271 void writeTo(std::ostream& os) override;
272
273protected:
276
281
282 //~ PeakMap* ms_exp_;
283
285 std::string tag_;
286
290 std::vector<ProteinIdentification>* pro_id_;
293
295 const std::vector<ProteinIdentification>* cpro_id_;
297
300
303
305 void handleCVParam_(const std::string& parent_parent_tag, const std::string& parent_tag, const std::string& accession, /* const std::string& name, */ /* const std::string& value, */ const xercesc::Attributes& attributes, const std::string& cv_ref /* , const std::string& unit_accession="" */);
306
308 void handleUserParam_(const std::string& parent_parent_tag, const std::string& parent_tag, const std::string& name, const std::string& type, const std::string& value);
309
311 void writeMetaInfos_(std::string& s, const MetaInfoInterface& meta, UInt indent) const;
312
314 ControlledVocabulary::CVTerm getChildWithName_(const std::string& parent_accession, const std::string& name) const;
315
317 //void writeSourceFile_(std::ostream& os, const std::string& id, const SourceFile& software);
318
320 void writeEnzyme_(std::string& s, const DigestionEnzymeProtein& enzy, UInt miss, UInt indent) const;
321
323 void writeModParam_(std::string& s, const std::vector<std::string>& mod_names, bool fixed, UInt indent) const;
324
326 void writeFragmentAnnotations_(std::string& s, const std::vector<PeptideHit::PeakAnnotation>& annotations, UInt indent, bool is_ppxl) const;
327
329 std::string trimOpenMSfileURI(const std::string& file) const;
330
334 std::map<std::string, std::string>& pep_ids,
335 const std::string& cv_ns, std::set<std::string>& sen_set,
336 std::map<std::string, std::string>& sen_ids,
337 std::map<std::string, std::vector<std::string> >& pep_evis,
338 std::map<std::string, double>& pp_identifier_2_thresh,
339 std::string& sidres);
340
344 const std::string& ppxl_linkid, std::map<std::string, std::string>& pep_ids,
345 const std::string& cv_ns, std::set<std::string>& sen_set,
346 std::map<std::string, std::string>& sen_ids,
347 std::map<std::string, std::vector<std::string> >& pep_evis,
348 std::map<std::string, double>& pp_identifier_2_thresh,
349 double ppxl_crosslink_mass,
350 std::map<std::string, std::string>& ppxl_specref_2_element,
351 std::string& sid, bool alpha_peptide);
352
353private:
355
360 std::map<std::string, AASequence> pep_sequences_;
361 std::map<std::string, std::string> pp_identifier_2_sil_;
362 std::map<std::string, std::string> sil_2_sdb_;
363 std::map<std::string, std::string> sil_2_sdat_;
364 std::map<std::string, std::string> ph_2_sdat_;
365 std::map<std::string, std::string> sil_2_sip_;
369
371 std::set<std::string> peptide_result_details_;
372
373 };
374 } // namespace Internal
375} // namespace OpenMS
char16_t XMLCh
Definition ClassTest.h:30
Representation of a peptide/protein sequence.
Definition AASequence.h:88
Definition ControlledVocabulary.h:29
DateTime Class.
Definition DateTime.h:31
Representation of a digestion enzyme for proteins (protease)
Definition DigestionEnzymeProtein.h:24
typename VecMember::const_iterator const_iterator
Definition ExposedVector.h:69
Represents a object which can store the information of an analysisXML instance.
Definition MzIdentMLHandler.h:40
virtual ~IdentificationHit()=default
Virtual destructor.
IdentificationHit(const IdentificationHit &)=default
Copy constructor.
IdentificationHit(IdentificationHit &&) noexcept=default
Move constructor.
IdentificationHit()=default
Default constructor.
Definition MzIdentMLHandler.h:186
void addSpectrumIdentification(const SpectrumIdentification &id)
adds a spectrum identification
DateTime creation_date_
Date and time the search was performed.
Definition MzIdentMLHandler.h:231
const DateTime & getCreationDate() const
returns the date and time the file was created
Identification(const Identification &source)=default
Copy constructor.
bool operator==(const Identification &rhs) const
Equality operator.
Identification(Identification &&)=default
Move constructor.
const std::vector< SpectrumIdentification > & getSpectrumIdentifications() const
returns the spectrum identifications stored
virtual ~Identification()
Destructor.
void setSpectrumIdentifications(const std::vector< SpectrumIdentification > &ids)
sets the spectrum identifications
Identification()=default
Default constructor.
Identification & operator=(const Identification &source)=default
Assignment operator.
Identification & operator=(Identification &&) &=default
Move assignment operator.
std::vector< SpectrumIdentification > spectrum_identifications_
Definition MzIdentMLHandler.h:232
void setCreationDate(const DateTime &date)
sets the date and time the file was written
std::string id_
Identifier.
Definition MzIdentMLHandler.h:230
bool operator!=(const Identification &rhs) const
Inequality operator.
XML STREAM handler for MzIdentMLFile.
Definition MzIdentMLHandler.h:246
ControlledVocabulary::CVTerm getChildWithName_(const std::string &parent_accession, const std::string &name) const
Looks up a child CV term of parent_accession with the name name. If no such term is found,...
const Identification * cid_
Definition MzIdentMLHandler.h:294
PeptideIdentificationList * pep_id_
Identification Item for peptides.
Definition MzIdentMLHandler.h:292
void handleCVParam_(const std::string &parent_parent_tag, const std::string &parent_tag, const std::string &accession, const xercesc::Attributes &attributes, const std::string &cv_ref)
Handles CV terms.
std::set< std::string > peptide_result_details_
cached CV child terms for "MS:1001143"
Definition MzIdentMLHandler.h:371
ProteinHit actual_protein_
Definition MzIdentMLHandler.h:368
void writeTo(std::ostream &os) override
Writes the contents to a stream.
std::map< std::string, std::string > sil_2_sdb_
mapping spectrumidentificationlist to the search data bases
Definition MzIdentMLHandler.h:362
MzIdentMLHandler(const MzIdentMLHandler &rhs)
IdentificationHit current_id_hit_
IdentificationHit Item.
Definition MzIdentMLHandler.h:302
const std::vector< ProteinIdentification > * cpro_id_
Definition MzIdentMLHandler.h:295
const ProgressLogger & logger_
Progress logger.
Definition MzIdentMLHandler.h:275
ControlledVocabulary cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition MzIdentMLHandler.h:278
std::map< std::string, std::string > sil_2_sip_
mapping spectrumidentificationlist to the search protocol (where the params are at)
Definition MzIdentMLHandler.h:365
std::map< std::string, AASequence > pep_sequences_
Definition MzIdentMLHandler.h:360
SpectrumIdentification current_spectrum_id_
SpectrumIdentification Item.
Definition MzIdentMLHandler.h:299
std::string trimOpenMSfileURI(const std::string &file) const
Convenience method to remove the [] from OpenMS internal file uri representation.
std::vector< ProteinIdentification > * pro_id_
internal Identification Item for proteins
Definition MzIdentMLHandler.h:290
Identification * id_
Identification Item.
Definition MzIdentMLHandler.h:288
void writePeptideHit(const PeptideHit &hit, PeptideIdentificationList::const_iterator &it, std::map< std::string, std::string > &pep_ids, const std::string &cv_ns, std::set< std::string > &sen_set, std::map< std::string, std::string > &sen_ids, std::map< std::string, std::vector< std::string > > &pep_evis, std::map< std::string, double > &pp_identifier_2_thresh, std::string &sidres)
Abstraction of PeptideHit loop for most PeptideHits.
ControlledVocabulary unimod_
Controlled vocabulary for modifications (unimod from OpenMS/share/OpenMS/CV/unimod....
Definition MzIdentMLHandler.h:280
std::string tag_
XML tag parse element.
Definition MzIdentMLHandler.h:285
MzIdentMLHandler & operator=(const MzIdentMLHandler &rhs)
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
AASequence actual_peptide_
Definition MzIdentMLHandler.h:366
std::map< std::string, std::string > ph_2_sdat_
mapping identification runs (mapping PeptideIdentifications and ProteinIdentifications via ....
Definition MzIdentMLHandler.h:364
void writeEnzyme_(std::string &s, const DigestionEnzymeProtein &enzy, UInt miss, UInt indent) const
Helper method that writes a source file.
void writeXLMSPeptideHit(const PeptideHit &hit, PeptideIdentificationList::const_iterator &it, const std::string &ppxl_linkid, std::map< std::string, std::string > &pep_ids, const std::string &cv_ns, std::set< std::string > &sen_set, std::map< std::string, std::string > &sen_ids, std::map< std::string, std::vector< std::string > > &pep_evis, std::map< std::string, double > &pp_identifier_2_thresh, double ppxl_crosslink_mass, std::map< std::string, std::string > &ppxl_specref_2_element, std::string &sid, bool alpha_peptide)
Abstraction of PeptideHit loop for XL-MS data from OpenPepXL.
~MzIdentMLHandler() override
Destructor.
void initCvCaches_()
Load CVs and precompute the cached CV child-term set; shared by both constructors.
void writeModParam_(std::string &s, const std::vector< std::string > &mod_names, bool fixed, UInt indent) const
Helper method that writes the modification search params (fixed or variable)
void handleUserParam_(const std::string &parent_parent_tag, const std::string &parent_tag, const std::string &name, const std::string &type, const std::string &value)
Handles user terms.
void writeMetaInfos_(std::string &s, const MetaInfoInterface &meta, UInt indent) const
Writes user terms.
MzIdentMLHandler(const std::vector< ProteinIdentification > &pro_id, const PeptideIdentificationList &pep_id, const std::string &filename, const std::string &version, const ProgressLogger &logger)
Constructor for a write-only handler for internal identification structures.
std::map< std::string, std::string > pp_identifier_2_sil_
mapping peptide/proteinidentification identifier_ to spectrumidentificationlist
Definition MzIdentMLHandler.h:361
Int current_mod_location_
Definition MzIdentMLHandler.h:367
void characters(const XMLCh *const chars, const XMLSize_t length) override
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
MzIdentMLHandler(std::vector< ProteinIdentification > &pro_id, PeptideIdentificationList &pep_id, const std::string &filename, const std::string &version, const ProgressLogger &logger)
Constructor for a read-only handler for internal identification structures.
std::map< std::string, std::string > sil_2_sdat_
mapping spectrumidentificationlist to the search input
Definition MzIdentMLHandler.h:363
void writeFragmentAnnotations_(std::string &s, const std::vector< PeptideHit::PeakAnnotation > &annotations, UInt indent, bool is_ppxl) const
Helper method that writes the FragmentAnnotations section of a spectrum identification.
const PeptideIdentificationList * cpep_id_
Definition MzIdentMLHandler.h:296
Definition MzIdentMLHandler.h:136
bool operator==(const SpectrumIdentification &rhs) const
Equality operator.
bool operator!=(const SpectrumIdentification &rhs) const
Inequality operator.
virtual ~SpectrumIdentification()
Destructor.
const std::vector< IdentificationHit > & getHits() const
returns the identification hits of this spectrum identification
SpectrumIdentification & operator=(const SpectrumIdentification &)=default
Assignment operator.
SpectrumIdentification & operator=(SpectrumIdentification &&) &=default
Move assignment operator.
std::vector< IdentificationHit > hits_
Single peptide hits.
Definition MzIdentMLHandler.h:174
void setHits(const std::vector< IdentificationHit > &hits)
sets the identification hits of this spectrum identification (corresponds to single peptide hit in th...
SpectrumIdentification()=default
Default constructor.
void addHit(const IdentificationHit &hit)
adds a single identification hit to the hits
SpectrumIdentification(SpectrumIdentification &&)=default
Move constructor.
std::string id_
Identifier.
Definition MzIdentMLHandler.h:173
SpectrumIdentification(const SpectrumIdentification &)=default
Copy constructor.
Base class for XML handlers.
Definition XMLHandler.h:316
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition MetaInfoInterface.h:35
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Representation of a protein hit.
Definition ProteinHit.h:35
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
STL namespace.
Representation of a CV term.
Definition ControlledVocabulary.h:50