OpenMS
Loading...
Searching...
No Matches
PepXMLFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow, Hendrik Weisser $
6// $Authors: Chris Bielow, Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
20
21#include <vector>
22#include <map>
23#include <set>
24
25
26namespace OpenMS
27{
38 class OPENMS_DLLAPI PepXMLFile :
39 protected Internal::XMLHandler,
41 {
42public:
43
46
48 ~PepXMLFile() override;
49
62 void load(const String& filename,
63 std::vector<ProteinIdentification>& proteins,
65 const String& experiment_name,
66 const SpectrumMetaDataLookup& lookup);
67
74 void load(const String& filename,
75 std::vector<ProteinIdentification>& proteins,
77 const String& experiment_name = "");
78
84 void store(const String& filename, std::vector<ProteinIdentification>& protein_ids,
85 PeptideIdentificationList& peptide_ids, const String& mz_file = "",
86 const String& mz_name = "", bool peptideprophet_analyzed = false, double rt_tolerance = 0.01);
87
95 void keepNativeSpectrumName(bool keep)
96 {
97 keep_native_name_ = keep;
98 }
99
101 void setPreferredFixedModifications(const std::vector<const ResidueModification*>& mods);
102
104 void setPreferredVariableModifications(const std::vector<const ResidueModification*>& mods);
105
107 void setParseUnknownScores(bool parse_unknown_scores);
108
109protected:
110
112 void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
113
115 void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
116
117private:
118
121
123 void readRTMZCharge_(const xercesc::Attributes& attributes);
124
126 {
127 private:
128
130 double massdiff_;
131 double mass_;
135 bool is_protein_terminus_; // "true" if protein terminus, "false" if peptide terminus
137 std::vector<String> errors_;
139
140 const ResidueModification* lookupModInPreferredMods_(const std::vector<const ResidueModification*>& preferred_fixed_mods,
141 const String& aminoacid,
142 double massdiff,
143 const String& description,
145 double tolerance);
146
147 public:
149
155 const String& aminoacid, const String& massdiff, const String& mass,
156 String variable, const String& description, String terminus, const String& protein_terminus,
157 const std::vector<const ResidueModification*>& preferred_fixed_mods,
158 const std::vector<const ResidueModification*>& preferred_var_mods,
159 double tolerance);
160
162
163 virtual ~AminoAcidModification() = default;
164
166
168
169 const String& getDescription() const;
170
171 bool isVariable() const;
172
174
175 double getMassDiff() const;
176
177 double getMass() const;
178
179 const String& getTerminus() const;
180
181 const String& getAminoAcid() const;
182
183 const std::vector<String>& getErrors() const;
184 };
185
187 std::vector<ProteinIdentification>* proteins_;
188
191
194
197
200
206
208 bool use_precursor_data_{};
209
211 std::map<Size, Size> scan_map_;
212
215
218
221
224
226 bool search_summary_{};
227
229 bool wrong_experiment_{};
230
232 bool seen_experiment_{};
233
235 bool checked_base_name_{};
236
238 bool has_decoys_{};
239
241 bool parse_unknown_scores_{};
242
245
248
250 std::vector<std::vector<ProteinIdentification>::iterator> current_proteins_;
251
254
258
261
264
267
270
272 double rt_{}, mz_{};
273
275 Size scannr_{};
276
278 Int charge_{};
279
281 UInt search_id_{};
282
285
288
290 double hydrogen_mass_{};
291
293 std::vector<std::pair<const ResidueModification*, Size> > current_modifications_;
294
296 std::vector<AminoAcidModification> fixed_modifications_;
297
299 std::vector<AminoAcidModification> variable_modifications_;
300
303 std::vector<const ResidueModification*> preferred_fixed_modifications_;
304
307 std::vector<const ResidueModification*> preferred_variable_modifications_;
308
310
311 static const double mod_tol_;
312 static const double xtandem_artificial_mod_tol_;
313
316 bool lookupAddFromHeader_(double modification_mass,
317 Size modification_position,
318 std::vector<AminoAcidModification> const& header_mods);
319
320 //static std::vector<int> getIsotopeErrorsFromIntSetting_(int intSetting);
321 };
322} // namespace OpenMS
char16_t XMLCh
Definition ClassTest.h:28
DateTime Class.
Definition DateTime.h:35
Representation of an element.
Definition Element.h:34
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition XMLFile.h:23
Base class for XML handlers.
Definition XMLHandler.h:328
Used to load and store PepXML files.
Definition PepXMLFile.h:41
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01)
Stores idXML as PepXML file.
static const double xtandem_artificial_mod_tol_
Definition PepXMLFile.h:312
void load(const String &filename, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const String &experiment_name="")
load function with empty defaults for some parameters (see above)
bool search_score_summary_
Are we currently in an "search_score_summary" element (should be skipped)?
Definition PepXMLFile.h:223
String exp_name_
Name of the associated experiment (filename of the data file, extension will be removed)
Definition PepXMLFile.h:196
void setParseUnknownScores(bool parse_unknown_scores)
sets if during load, unknown scores should be parsed
String current_base_name_
current base name
Definition PepXMLFile.h:247
static const double mod_tol_
Definition PepXMLFile.h:311
String current_sequence_
Sequence of the current peptide hit.
Definition PepXMLFile.h:269
PeptideHit peptide_hit_
PeptideHit instance currently being processed.
Definition PepXMLFile.h:266
std::vector< AminoAcidModification > variable_modifications_
Variable aminoacid modifications as parsed from the header.
Definition PepXMLFile.h:299
void keepNativeSpectrumName(bool keep)
Whether we should keep the native spectrum name of the pepXML.
Definition PepXMLFile.h:95
PeptideIdentification current_peptide_
PeptideIdentification instance currently being processed.
Definition PepXMLFile.h:260
String search_engine_
Set name of search engine.
Definition PepXMLFile.h:199
std::vector< std::pair< const ResidueModification *, Size > > current_modifications_
The modifications of the current peptide hit (position is 1-based)
Definition PepXMLFile.h:293
String enzyme_
Enzyme name associated with the current identification run.
Definition PepXMLFile.h:256
String enzyme_cuttingsite_
Definition PepXMLFile.h:257
std::vector< std::vector< ProteinIdentification >::iterator > current_proteins_
References to currently active ProteinIdentifications.
Definition PepXMLFile.h:250
std::vector< const ResidueModification * > preferred_fixed_modifications_
Definition PepXMLFile.h:303
PeptideIdentificationList * peptides_
Pointer to the list of identified peptides.
Definition PepXMLFile.h:190
~PepXMLFile() override
Destructor.
void load(const String &filename, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
PeptideHit::PepXMLAnalysisResult current_analysis_result_
Analysis result instance currently being processed.
Definition PepXMLFile.h:263
std::vector< const ResidueModification * > preferred_variable_modifications_
Definition PepXMLFile.h:307
bool analysis_summary_
Are we currently in an "analysis_summary" element (should be skipped)?
Definition PepXMLFile.h:217
const SpectrumMetaDataLookup * lookup_
Pointer to wrapper for looking up spectrum meta data.
Definition PepXMLFile.h:193
String native_spectrum_name_
Several optional attributes of spectrum_query.
Definition PepXMLFile.h:202
PepXMLFile()
Constructor.
String prot_id_
Identifier linking PeptideIdentifications and ProteinIdentifications.
Definition PepXMLFile.h:284
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
Docu in base class.
bool keep_native_name_
Whether we should keep the native spectrum name of the pepXML.
Definition PepXMLFile.h:220
std::vector< ProteinIdentification > * proteins_
Pointer to the list of identified proteins.
Definition PepXMLFile.h:187
DateTime date_
Date the pepXML file was generated.
Definition PepXMLFile.h:287
std::vector< AminoAcidModification > fixed_modifications_
Fixed aminoacid modifications as parsed from the header.
Definition PepXMLFile.h:296
String decoy_prefix_
In case it has decoys, what is the prefix?
Definition PepXMLFile.h:244
String swath_assay_
Definition PepXMLFile.h:204
bool lookupAddFromHeader_(double modification_mass, Size modification_position, std::vector< AminoAcidModification > const &header_mods)
Element hydrogen_
Hydrogen data (for mass types)
Definition PepXMLFile.h:214
String experiment_label_
Definition PepXMLFile.h:203
ProteinIdentification::SearchParameters params_
Search parameters of the current identification run.
Definition PepXMLFile.h:253
String status_
Definition PepXMLFile.h:205
void readRTMZCharge_(const xercesc::Attributes &attributes)
Read RT, m/z, charge information from attributes of "spectrum_query".
void setPreferredFixedModifications(const std::vector< const ResidueModification * > &mods)
sets the preferred fixed modifications
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
Docu in base class.
std::map< Size, Size > scan_map_
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment.
Definition PepXMLFile.h:211
void setPreferredVariableModifications(const std::vector< const ResidueModification * > &mods)
sets the preferred variable modifications
void makeScanMap_()
Fill scan_map_.
Analysis Result (containing search engine / prophet results)
Definition PeptideHit.h:201
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:64
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
TermSpecificity
Position where the modification is allowed to occur.
Definition ResidueModification.h:74
Helper class for looking up spectrum meta data.
Definition SpectrumMetaDataLookup.h:118
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::vector< String > errors_
Definition PepXMLFile.h:137
double massdiff_
Definition PepXMLFile.h:130
double mass_
Definition PepXMLFile.h:131
String aminoacid_
Definition PepXMLFile.h:129
const std::vector< String > & getErrors() const
bool is_protein_terminus_
Definition PepXMLFile.h:135
const ResidueModification * getRegisteredMod() const
String description_
Definition PepXMLFile.h:133
const ResidueModification * registered_mod_
Definition PepXMLFile.h:138
AminoAcidModification & operator=(const AminoAcidModification &rhs)=default
AminoAcidModification(const String &aminoacid, const String &massdiff, const String &mass, String variable, const String &description, String terminus, const String &protein_terminus, const std::vector< const ResidueModification * > &preferred_fixed_mods, const std::vector< const ResidueModification * > &preferred_var_mods, double tolerance)
AminoAcidModification(const AminoAcidModification &rhs)=default
const ResidueModification * lookupModInPreferredMods_(const std::vector< const ResidueModification * > &preferred_fixed_mods, const String &aminoacid, double massdiff, const String &description, const ResidueModification::TermSpecificity term_spec, double tolerance)
ResidueModification::TermSpecificity term_spec_
Definition PepXMLFile.h:136
bool is_variable_
Definition PepXMLFile.h:132
String terminus_
Definition PepXMLFile.h:134
Search parameters of the DB search.
Definition ProteinIdentification.h:254