Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MzIdentMLDOMHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Mathias Walzer$
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_HANDLERS_MZIDENTMLDOMHANDLER_H
36 #define OPENMS_FORMAT_HANDLERS_MZIDENTMLDOMHANDLER_H
37 
39 
48 
49 #include <xercesc/dom/DOM.hpp>
50 #include <xercesc/dom/DOMDocument.hpp>
51 #include <xercesc/dom/DOMDocumentType.hpp>
52 #include <xercesc/dom/DOMElement.hpp>
53 #include <xercesc/dom/DOMImplementation.hpp>
54 #include <xercesc/dom/DOMImplementationLS.hpp>
55 #include <xercesc/dom/DOMNodeIterator.hpp>
56 #include <xercesc/dom/DOMNodeList.hpp>
57 #include <xercesc/dom/DOMText.hpp>
58 #include <xercesc/util/OutOfMemoryException.hpp>
59 #include <xercesc/util/XMLString.hpp>
60 #include <xercesc/util/PlatformUtils.hpp>
61 #include <xercesc/framework/LocalFileFormatTarget.hpp>
62 
63 #include <xercesc/parsers/XercesDOMParser.hpp>
64 #include <xercesc/util/XMLUni.hpp>
65 #include <xercesc//framework/psvi/XSValue.hpp>
66 
67 #include <string>
68 #include <stdexcept>
69 #include <vector>
70 #include <map>
71 
72 // Error codes
73 //enum {
74 // ERROR_ARGS = 1,
75 // ERROR_XERCES_INIT,
76 // ERROR_PARSE,
77 // ERROR_EMPTY_DOCUMENT
78 //};
79 
80 namespace OpenMS
81 {
82  class ProgressLogger;
83 
84  namespace Internal
85  {
97  class OPENMS_DLLAPI MzIdentMLDOMHandler
98  {
99 public:
102  MzIdentMLDOMHandler(const std::vector<ProteinIdentification>& pro_id, const std::vector<PeptideIdentification>& pep_id, const String& version, const ProgressLogger& logger);
104 
106  MzIdentMLDOMHandler(std::vector<ProteinIdentification>& pro_id, std::vector<PeptideIdentification>& pep_id, const String& version, const ProgressLogger& logger);
107 
109  virtual ~MzIdentMLDOMHandler();
111 
113  void readMzIdentMLFile(const std::string& mzid_file);
115  void writeMzIdentMLFile(const std::string& mzid_file);
116 
117 protected:
120 
125 
127  std::vector<ProteinIdentification>* pro_id_;
129  std::vector<PeptideIdentification>* pep_id_;
130 
132  const std::vector<ProteinIdentification>* cpro_id_;
134  const std::vector<PeptideIdentification>* cpep_id_;
135 
138 
140  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
141 
144  std::pair<CVTermList, std::map<String, DataValue> > parseParamGroup_(xercesc::DOMNodeList* paramGroup);
145  CVTerm parseCvParam_(xercesc::DOMElement* param);
146  std::pair<String, DataValue> parseUserParam_(xercesc::DOMElement* param);
147  void parseAnalysisSoftwareList_(xercesc::DOMNodeList* analysisSoftwareElements);
148  void parseDBSequenceElements_(xercesc::DOMNodeList* dbSequenceElements);
149  void parsePeptideElements_(xercesc::DOMNodeList* peptideElements);
150  //AASequence parsePeptideSiblings_(xercesc::DOMNodeList* peptideSiblings);
151  AASequence parsePeptideSiblings_(xercesc::DOMElement* peptide);
152  void parsePeptideEvidenceElements_(xercesc::DOMNodeList* peptideEvidenceElements);
153  void parseSpectrumIdentificationElements_(xercesc::DOMNodeList* spectrumIdentificationElements);
154  void parseSpectrumIdentificationProtocolElements_(xercesc::DOMNodeList* spectrumIdentificationProtocolElements);
155  void parseInputElements_(xercesc::DOMNodeList* inputElements);
156  void parseSpectrumIdentificationListElements_(xercesc::DOMNodeList* spectrumIdentificationListElements);
157  void parseSpectrumIdentificationItemSetXLMS(std::set<String>::const_iterator set_it, std::multimap<String, int> xl_val_map, xercesc::DOMElement* element_res, String spectrumID);
158  void parseSpectrumIdentificationItemElement_(xercesc::DOMElement* spectrumIdentificationItemElement, PeptideIdentification& spectrum_identification, String& spectrumIdentificationList_ref);
159  void parseProteinDetectionHypothesisElement_(xercesc::DOMElement* proteinDetectionHypothesisElement, ProteinIdentification& protein_identification);
160  void parseProteinAmbiguityGroupElement_(xercesc::DOMElement* proteinAmbiguityGroupElement, ProteinIdentification& protein_identification);
161  void parseProteinDetectionListElements_(xercesc::DOMNodeList* proteinDetectionListElements);
162  static ProteinIdentification::SearchParameters findSearchParameters_(std::pair<CVTermList, std::map<String, DataValue> > as_params);
164 
166  void buildCvList_(xercesc::DOMElement* cvElements);
167  void buildAnalysisSoftwareList_(xercesc::DOMElement* analysisSoftwareElements);
168  void buildSequenceCollection_(xercesc::DOMElement* sequenceCollectionElements);
169  void buildAnalysisCollection_(xercesc::DOMElement* analysisCollectionElements);
170  void buildAnalysisProtocolCollection_(xercesc::DOMElement* protocolElements);
171  void buildInputDataCollection_(xercesc::DOMElement* inputElements);
172  void buildEnclosedCV_(xercesc::DOMElement* parentElement, String encel, String acc, String name, String cvref);
173  void buildAnalysisDataCollection_(xercesc::DOMElement* analysisElements);
175 
176 
177 private:
180  MzIdentMLDOMHandler& operator=(const MzIdentMLDOMHandler& rhs);
181 
184  {
187  };
190  {
191  int start;
192  int stop;
193  char pre;
194  char post;
195  bool idec;
196  };
198  struct DBSequence
199  {
204  };
207  {
212  };
215  {
217  long double mass_delta;
221  };
224  {
228  std::map<String, DataValue> parameter_ups;
229 // std::vector<ModificationParam> modification_parameter;
231  long double precursor_tolerance;
232  long double fragment_tolerance;
234  std::map<String, DataValue> threshold_ups;
235  };
238  {
243  };
244 
248 
249  xercesc::XercesDOMParser mzid_parser_;
250 
251  //from AnalysisSoftware
254  //mapping from AnalysisSoftware
255  std::map<String, AnalysisSoftware> as_map_; //mapping AnalysisSoftware id -> AnalysisSoftware
256 
257  //mapping from DataCollection Inputs
258  std::map<String, String> sr_map_; //mapping sourcefile id -> sourcefile location
259  std::map<String, String> sd_map_; //mapping spectradata id -> spectradata location
260  std::map<String, DatabaseInput> db_map_; //mapping database id -> DatabaseInput
261 
262  //mapping from SpectrumIdentification - SpectrumIdentification will be the new IdentificationRuns
263  std::map<String, SpectrumIdentification> si_map_; //mapping SpectrumIdentification id -> SpectrumIdentification (id refs)
264  std::map<String, size_t> si_pro_map_; //mapping SpectrumIdentificationList id -> index to ProteinIdentification in pro_id_
265 
266  //mapping from SpectrumIdentificationProtocol
267  std::map<String, SpectrumIdentificationProtocol> sp_map_; //mapping SpectrumIdentificationProtocol id -> SpectrumIdentificationProtocol
268 
269  //mapping from SequenceCollection
270  std::map<String, AASequence> pep_map_; //mapping Peptide id -> Sequence
271  std::map<String, PeptideEvidence> pe_ev_map_; //mapping PeptideEvidence id -> PeptideEvidence
272  std::map<String, String> pv_db_map_; //mapping PeptideEvidence id -> DBSequence id
273  std::multimap<String, String> p_pv_map_; //mapping Peptide id -> PeptideEvidence id, multiple PeptideEvidences can have equivalent Peptides.
274  std::map<String, DBSequence> db_sq_map_; //mapping DBSequence id -> Sequence
275 
276  std::list<std::list<String> > hit_pev_; //writing help only
277 
278  bool xl_ms_search_; //is true when reading a file containing Cross-Linking MS search results
279  std::map<String, String> xl_id_donor_map_; //mapping Peptide id -> cross-link donor value
280  //std::map<String, String> xl_id_acceptor_map_; //mapping Peptide id -> cross-link acceptor value
281  std::map<String, String> xl_id_acceptor_map_; //mapping peptide id of acceptor peptide -> cross-link acceptor value
282  std::map<String, SignedSize> xl_donor_pos_map_; //mapping donor value -> cross-link modification location
283  std::map<String, SignedSize> xl_acceptor_pos_map_; //mapping acceptor value -> cross-link modification location
284  std::map<String, double> xl_mass_map_; //mapping Peptide id -> crosslink mass
285  std::map<String, String> xl_mod_map_; //mapping peptide id -> cross-linking reagent name
286 
287  };
288  } // namespace Internal
289 } // namespace OpenMS
290 
291 #endif
Representation of a protein identification run.
Definition: ProteinIdentification.h:62
std::map< String, DBSequence > db_sq_map_
Definition: MzIdentMLDOMHandler.h:274
std::map< String, DatabaseInput > db_map_
Definition: MzIdentMLDOMHandler.h:260
XMLCh * xml_name_attr_ptr_
Definition: MzIdentMLDOMHandler.h:247
String search_engine_version_
Definition: MzIdentMLDOMHandler.h:253
Struct to hold the PeptideEvidence information.
Definition: MzIdentMLDOMHandler.h:189
Struct to hold the information from the DatabaseInput xml tag.
Definition: MzIdentMLDOMHandler.h:237
std::vector< ProteinIdentification > * pro_id_
Internal +w Identification Item for proteins.
Definition: MzIdentMLDOMHandler.h:127
Representation of a CV term.
Definition: ControlledVocabulary.h:61
A more convenient string class.
Definition: String.h:57
const String schema_version_
Internal version keeping.
Definition: MzIdentMLDOMHandler.h:137
std::map< String, AnalysisSoftware > as_map_
Definition: MzIdentMLDOMHandler.h:255
Representation of controlled vocabulary term list.
Definition: CVTermList.h:53
int start
Definition: MzIdentMLDOMHandler.h:191
bool idec
Definition: MzIdentMLDOMHandler.h:195
std::map< String, SignedSize > xl_donor_pos_map_
Definition: MzIdentMLDOMHandler.h:282
std::vector< PeptideIdentification > * pep_id_
Internal +w Identification Item for peptides.
Definition: MzIdentMLDOMHandler.h:129
Struct to hold the information from the SpectrumIdentificationProtocol xml tag.
Definition: MzIdentMLDOMHandler.h:223
const ProgressLogger & logger_
Progress logger.
Definition: MzIdentMLDOMHandler.h:119
String name
Definition: MzIdentMLDOMHandler.h:239
std::map< String, DataValue > threshold_ups
Definition: MzIdentMLDOMHandler.h:234
String accession
Definition: MzIdentMLDOMHandler.h:202
Representation of a peptide/protein sequence.
Definition: AASequence.h:108
String sequence
Definition: MzIdentMLDOMHandler.h:200
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
int stop
Definition: MzIdentMLDOMHandler.h:192
Search parameters of the DB search.
Definition: ProteinIdentification.h:104
char post
Definition: MzIdentMLDOMHandler.h:194
XMLCh * xml_cvparam_tag_ptr_
Definition: MzIdentMLDOMHandler.h:246
long double mass_delta
Definition: MzIdentMLDOMHandler.h:217
char pre
Definition: MzIdentMLDOMHandler.h:193
CVTermList cvs
Definition: MzIdentMLDOMHandler.h:203
CVTermList modification_parameter
Definition: MzIdentMLDOMHandler.h:230
Representation of a controlled vocabulary.
Definition: ControlledVocabulary.h:55
std::map< String, String > pv_db_map_
Definition: MzIdentMLDOMHandler.h:272
String version
Definition: MzIdentMLDOMHandler.h:186
std::map< String, String > sd_map_
Definition: MzIdentMLDOMHandler.h:259
CVTermList parameter_cvs
Definition: MzIdentMLDOMHandler.h:227
std::map< String, String > xl_id_donor_map_
Definition: MzIdentMLDOMHandler.h:279
bool xl_ms_search_
Definition: MzIdentMLDOMHandler.h:278
std::map< String, String > sr_map_
Definition: MzIdentMLDOMHandler.h:258
std::multimap< String, String > p_pv_map_
Definition: MzIdentMLDOMHandler.h:273
String version
Definition: MzIdentMLDOMHandler.h:241
std::map< String, String > xl_mod_map_
Definition: MzIdentMLDOMHandler.h:285
String residues
Definition: MzIdentMLDOMHandler.h:218
std::map< String, DataValue > parameter_ups
Definition: MzIdentMLDOMHandler.h:228
Struct to hold the used analysis software for that file.
Definition: MzIdentMLDOMHandler.h:183
String database_ref
Definition: MzIdentMLDOMHandler.h:201
String location
Definition: MzIdentMLDOMHandler.h:240
std::map< String, SpectrumIdentification > si_map_
Definition: MzIdentMLDOMHandler.h:263
std::map< String, SignedSize > xl_acceptor_pos_map_
Definition: MzIdentMLDOMHandler.h:283
Struct to hold the information from the SpectrumIdentification xml tag.
Definition: MzIdentMLDOMHandler.h:206
Representation of controlled vocabulary term.
Definition: CVTerm.h:51
CVTermList specificities
Definition: MzIdentMLDOMHandler.h:220
const std::vector< PeptideIdentification > * cpep_id_
Internal -w Identification Item for peptides.
Definition: MzIdentMLDOMHandler.h:134
std::map< String, size_t > si_pro_map_
Definition: MzIdentMLDOMHandler.h:264
long double precursor_tolerance
Definition: MzIdentMLDOMHandler.h:231
long double fragment_tolerance
Definition: MzIdentMLDOMHandler.h:232
const std::vector< ProteinIdentification > * cpro_id_
Internal -w Identification Item for proteins.
Definition: MzIdentMLDOMHandler.h:132
String search_engine_
Definition: MzIdentMLDOMHandler.h:252
DateTime date
Definition: MzIdentMLDOMHandler.h:242
std::map< String, String > xl_id_acceptor_map_
Definition: MzIdentMLDOMHandler.h:281
XML DOM handler for MzIdentMLFile.
Definition: MzIdentMLDOMHandler.h:97
xercesc::XercesDOMParser mzid_parser_
Definition: MzIdentMLDOMHandler.h:249
String spectrum_identification_list_ref
Definition: MzIdentMLDOMHandler.h:211
String spectrum_identification_protocol_ref
Definition: MzIdentMLDOMHandler.h:210
String fixed_mod
Definition: MzIdentMLDOMHandler.h:216
Struct to hold the information from the ModificationParam xml tag.
Definition: MzIdentMLDOMHandler.h:214
String name
Definition: MzIdentMLDOMHandler.h:185
String search_database_ref
Definition: MzIdentMLDOMHandler.h:209
CVTermList threshold_cvs
Definition: MzIdentMLDOMHandler.h:233
DateTime Class.
Definition: DateTime.h:55
std::map< String, AASequence > pep_map_
Definition: MzIdentMLDOMHandler.h:270
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
std::list< std::list< String > > hit_pev_
Definition: MzIdentMLDOMHandler.h:276
std::map< String, double > xl_mass_map_
Definition: MzIdentMLDOMHandler.h:284
ControlledVocabulary cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzIdentMLDOMHandler.h:122
XMLCh * xml_root_tag_ptr_
Definition: MzIdentMLDOMHandler.h:245
CVTerm searchtype
Definition: MzIdentMLDOMHandler.h:225
ControlledVocabulary unimod_
Controlled vocabulary for modifications (unimod from OpenMS/share/OpenMS/CV/unimod.obo)
Definition: MzIdentMLDOMHandler.h:124
Struct to hold the information from the DBSequence xml tag.
Definition: MzIdentMLDOMHandler.h:198
String spectra_data_ref
Definition: MzIdentMLDOMHandler.h:208
std::map< String, PeptideEvidence > pe_ev_map_
Definition: MzIdentMLDOMHandler.h:271
CVTermList modification_param_cvs
Definition: MzIdentMLDOMHandler.h:219
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
std::map< String, SpectrumIdentificationProtocol > sp_map_
Definition: MzIdentMLDOMHandler.h:267

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:02 using doxygen 1.8.13