OpenMS
PepXMLFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow, Hendrik Weisser $
6 // $Authors: Chris Bielow, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 #include <OpenMS/FORMAT/XMLFile.h>
19 
20 #include <vector>
21 #include <map>
22 #include <set>
23 
24 
25 namespace OpenMS
26 {
37  class OPENMS_DLLAPI PepXMLFile :
38  protected Internal::XMLHandler,
39  public Internal::XMLFile
40  {
41 public:
42 
45 
47  ~PepXMLFile() override;
48 
61  void load(const String& filename,
62  std::vector<ProteinIdentification>& proteins,
63  std::vector<PeptideIdentification>& peptides,
64  const String& experiment_name,
65  const SpectrumMetaDataLookup& lookup);
66 
73  void load(const String& filename,
74  std::vector<ProteinIdentification>& proteins,
75  std::vector<PeptideIdentification>& peptides,
76  const String& experiment_name = "");
77 
83  void store(const String& filename, std::vector<ProteinIdentification>& protein_ids,
84  std::vector<PeptideIdentification>& peptide_ids, const String& mz_file = "",
85  const String& mz_name = "", bool peptideprophet_analyzed = false, double rt_tolerance = 0.01);
86 
94  void keepNativeSpectrumName(bool keep)
95  {
96  keep_native_name_ = keep;
97  }
98 
100  void setPreferredFixedModifications(const std::vector<const ResidueModification*>& mods);
101 
103  void setPreferredVariableModifications(const std::vector<const ResidueModification*>& mods);
104 
106  void setParseUnknownScores(bool parse_unknown_scores);
107 
108 protected:
109 
111  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
112 
114  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
115 
116 private:
117 
119  void makeScanMap_();
120 
122  void readRTMZCharge_(const xercesc::Attributes& attributes);
123 
125  {
126  private:
127 
129  double massdiff_;
130  double mass_;
134  bool is_protein_terminus_; // "true" if protein terminus, "false" if peptide terminus
136  std::vector<String> errors_;
138 
139  const ResidueModification* lookupModInPreferredMods_(const std::vector<const ResidueModification*>& preferred_fixed_mods,
140  const String& aminoacid,
141  double massdiff,
142  const String& description,
143  const ResidueModification::TermSpecificity term_spec,
144  double tolerance);
145 
146  public:
148 
154  const String& aminoacid, const String& massdiff, const String& mass,
155  String variable, const String& description, String terminus, const String& protein_terminus,
156  const std::vector<const ResidueModification*>& preferred_fixed_mods,
157  const std::vector<const ResidueModification*>& preferred_var_mods,
158  double tolerance);
159 
161 
162  virtual ~AminoAcidModification() = default;
163 
165 
167 
168  const String& getDescription() const;
169 
170  bool isVariable() const;
171 
173 
174  double getMassDiff() const;
175 
176  double getMass() const;
177 
178  const String& getTerminus() const;
179 
180  const String& getAminoAcid() const;
181 
182  const std::vector<String>& getErrors() const;
183  };
184 
186  std::vector<ProteinIdentification>* proteins_;
187 
189  std::vector<PeptideIdentification>* peptides_;
190 
193 
196 
199 
205 
207  bool use_precursor_data_{};
208 
210  std::map<Size, Size> scan_map_;
211 
214 
217 
220 
223 
225  bool search_summary_{};
226 
228  bool wrong_experiment_{};
229 
231  bool seen_experiment_{};
232 
234  bool checked_base_name_{};
235 
237  bool has_decoys_{};
238 
240  bool parse_unknown_scores_{};
241 
244 
247 
249  std::vector<std::vector<ProteinIdentification>::iterator> current_proteins_;
250 
253 
257 
260 
263 
266 
269 
271  double rt_{}, mz_{};
272 
274  Size scannr_{};
275 
277  Int charge_{};
278 
280  UInt search_id_{};
281 
284 
287 
289  double hydrogen_mass_{};
290 
292  std::vector<std::pair<const ResidueModification*, Size> > current_modifications_;
293 
295  std::vector<AminoAcidModification> fixed_modifications_;
296 
298  std::vector<AminoAcidModification> variable_modifications_;
299 
302  std::vector<const ResidueModification*> preferred_fixed_modifications_;
303 
306  std::vector<const ResidueModification*> preferred_variable_modifications_;
307 
309 
310  static const double mod_tol_;
311  static const double xtandem_artificial_mod_tol_;
312 
315  bool lookupAddFromHeader_(double modification_mass,
316  Size modification_position,
317  std::vector<AminoAcidModification> const& header_mods);
318 
319  //static std::vector<int> getIsotopeErrorsFromIntSetting_(int intSetting);
320  };
321 } // namespace OpenMS
DateTime Class.
Definition: DateTime.h:33
Representation of an element.
Definition: Element.h:32
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:23
Base class for XML handlers.
Definition: XMLHandler.h:300
Used to load and store PepXML files.
Definition: PepXMLFile.h:40
static const double xtandem_artificial_mod_tol_
Definition: PepXMLFile.h:311
bool search_score_summary_
Are we currently in an "search_score_summary" element (should be skipped)?
Definition: PepXMLFile.h:222
String exp_name_
Name of the associated experiment (filename of the data file, extension will be removed)
Definition: PepXMLFile.h:195
void setParseUnknownScores(bool parse_unknown_scores)
sets if during load, unknown scores should be parsed
String current_base_name_
current base name
Definition: PepXMLFile.h:246
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01)
Stores idXML as PepXML file.
static const double mod_tol_
Definition: PepXMLFile.h:310
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name="")
load function with empty defaults for some parameters (see above)
String current_sequence_
Sequence of the current peptide hit.
Definition: PepXMLFile.h:268
PeptideHit peptide_hit_
PeptideHit instance currently being processed.
Definition: PepXMLFile.h:265
std::vector< AminoAcidModification > variable_modifications_
Variable aminoacid modifications as parsed from the header.
Definition: PepXMLFile.h:298
void keepNativeSpectrumName(bool keep)
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:94
PeptideIdentification current_peptide_
PeptideIdentification instance currently being processed.
Definition: PepXMLFile.h:259
String search_engine_
Set name of search engine.
Definition: PepXMLFile.h:198
std::vector< std::pair< const ResidueModification *, Size > > current_modifications_
The modifications of the current peptide hit (position is 1-based)
Definition: PepXMLFile.h:292
String enzyme_
Enzyme name associated with the current identification run.
Definition: PepXMLFile.h:255
String enzyme_cuttingsite_
Definition: PepXMLFile.h:256
std::vector< std::vector< ProteinIdentification >::iterator > current_proteins_
References to currently active ProteinIdentifications.
Definition: PepXMLFile.h:249
std::vector< PeptideIdentification > * peptides_
Pointer to the list of identified peptides.
Definition: PepXMLFile.h:189
std::vector< const ResidueModification * > preferred_fixed_modifications_
Definition: PepXMLFile.h:302
~PepXMLFile() override
Destructor.
PeptideHit::PepXMLAnalysisResult current_analysis_result_
Analysis result instance currently being processed.
Definition: PepXMLFile.h:262
std::vector< const ResidueModification * > preferred_variable_modifications_
Definition: PepXMLFile.h:306
bool analysis_summary_
Are we currently in an "analysis_summary" element (should be skipped)?
Definition: PepXMLFile.h:216
const SpectrumMetaDataLookup * lookup_
Pointer to wrapper for looking up spectrum meta data.
Definition: PepXMLFile.h:192
String native_spectrum_name_
Several optional attributes of spectrum_query.
Definition: PepXMLFile.h:201
PepXMLFile()
Constructor.
String prot_id_
Identifier linking PeptideIdentifications and ProteinIdentifications.
Definition: PepXMLFile.h:283
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
Docu in base class.
bool keep_native_name_
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:219
std::vector< ProteinIdentification > * proteins_
Pointer to the list of identified proteins.
Definition: PepXMLFile.h:186
DateTime date_
Date the pepXML file was generated.
Definition: PepXMLFile.h:286
std::vector< AminoAcidModification > fixed_modifications_
Fixed aminoacid modifications as parsed from the header.
Definition: PepXMLFile.h:295
String decoy_prefix_
In case it has decoys, what is the prefix?
Definition: PepXMLFile.h:243
String swath_assay_
Definition: PepXMLFile.h:203
bool lookupAddFromHeader_(double modification_mass, Size modification_position, std::vector< AminoAcidModification > const &header_mods)
Element hydrogen_
Hydrogen data (for mass types)
Definition: PepXMLFile.h:213
String experiment_label_
Definition: PepXMLFile.h:202
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
ProteinIdentification::SearchParameters params_
Search parameters of the current identification run.
Definition: PepXMLFile.h:252
String status_
Definition: PepXMLFile.h:204
void readRTMZCharge_(const xercesc::Attributes &attributes)
Read RT, m/z, charge information from attributes of "spectrum_query".
void setPreferredFixedModifications(const std::vector< const ResidueModification * > &mods)
sets the preferred fixed modifications
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
Docu in base class.
std::map< Size, Size > scan_map_
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment.
Definition: PepXMLFile.h:210
void setPreferredVariableModifications(const std::vector< const ResidueModification * > &mods)
sets the preferred variable modifications
void makeScanMap_()
Fill scan_map_.
Analysis Result (containing search engine / prophet results)
Definition: PeptideHit.h:150
Representation of a peptide hit.
Definition: PeptideHit.h:31
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
Representation of a modification on an amino acid residue.
Definition: ResidueModification.h:53
TermSpecificity
Position where the modification is allowed to occur.
Definition: ResidueModification.h:72
Helper class for looking up spectrum meta data.
Definition: SpectrumMetaDataLookup.h:117
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:76
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
Definition: PepXMLFile.h:125
std::vector< String > errors_
Definition: PepXMLFile.h:136
double massdiff_
Definition: PepXMLFile.h:129
double mass_
Definition: PepXMLFile.h:130
String aminoacid_
Definition: PepXMLFile.h:128
bool is_protein_terminus_
Definition: PepXMLFile.h:134
String description_
Definition: PepXMLFile.h:132
const ResidueModification * registered_mod_
Definition: PepXMLFile.h:137
AminoAcidModification(const String &aminoacid, const String &massdiff, const String &mass, String variable, const String &description, String terminus, const String &protein_terminus, const std::vector< const ResidueModification * > &preferred_fixed_mods, const std::vector< const ResidueModification * > &preferred_var_mods, double tolerance)
AminoAcidModification(const AminoAcidModification &rhs)=default
ResidueModification::TermSpecificity term_spec_
Definition: PepXMLFile.h:135
const ResidueModification * lookupModInPreferredMods_(const std::vector< const ResidueModification * > &preferred_fixed_mods, const String &aminoacid, double massdiff, const String &description, const ResidueModification::TermSpecificity term_spec, double tolerance)
const std::vector< String > & getErrors() const
const ResidueModification * getRegisteredMod() const
AminoAcidModification & operator=(const AminoAcidModification &rhs)=default
bool is_variable_
Definition: PepXMLFile.h:131
String terminus_
Definition: PepXMLFile.h:133
Search parameters of the DB search.
Definition: ProteinIdentification.h:247