OpenMS  2.5.0
PepXMLFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow, Hendrik Weisser $
32 // $Authors: Chris Bielow, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 #include <OpenMS/FORMAT/XMLFile.h>
44 
45 #include <vector>
46 #include <map>
47 #include <set>
48 
49 
50 namespace OpenMS
51 {
62  class OPENMS_DLLAPI PepXMLFile :
63  protected Internal::XMLHandler,
64  public Internal::XMLFile
65  {
66 public:
67 
69  PepXMLFile();
70 
72  ~PepXMLFile() override;
73 
86  void load(const String& filename,
87  std::vector<ProteinIdentification>& proteins,
88  std::vector<PeptideIdentification>& peptides,
89  const String& experiment_name,
90  const SpectrumMetaDataLookup& lookup);
91 
98  void load(const String& filename,
99  std::vector<ProteinIdentification>& proteins,
100  std::vector<PeptideIdentification>& peptides,
101  const String& experiment_name = "");
102 
108  void store(const String& filename, std::vector<ProteinIdentification>& protein_ids,
109  std::vector<PeptideIdentification>& peptide_ids, const String& mz_file = "",
110  const String& mz_name = "", bool peptideprophet_analyzed = false, double rt_tolerance = 0.01);
111 
119  void keepNativeSpectrumName(bool keep)
120  {
121  keep_native_name_ = keep;
122  }
123 
124 protected:
125 
127  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
128 
130  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
131 
132 private:
133 
135  void makeScanMap_();
136 
138  void readRTMZCharge_(const xercesc::Attributes& attributes);
139 
152  void matchModification_(const double mass, const String& origin, String& modification_description);
153 
155  {
158  double mass;
159  bool variable;
162  bool protein_terminus; // "true" if protein terminus, "false" if peptide terminus
163 
165  mass(0),
166  variable(false)
167  {
168  }
169 
171  aminoacid(rhs.aminoacid),
172  massdiff(rhs.massdiff),
173  mass(rhs.mass),
174  variable(rhs.variable),
175  description(rhs.description),
176  terminus(rhs.terminus),
177  protein_terminus(rhs.protein_terminus)
178  {
179  }
180 
182  {
183  }
184 
186  {
187  if (this != &rhs)
188  {
189  aminoacid = rhs.aminoacid;
190  massdiff = rhs.massdiff;
191  mass = rhs.mass;
192  variable = rhs.variable;
193  description = rhs.description;
194  terminus = rhs.terminus;
195  protein_terminus = rhs.protein_terminus;
196  }
197  return *this;
198  }
199 
200  };
201 
203  std::vector<ProteinIdentification>* proteins_;
204 
206  std::vector<PeptideIdentification>* peptides_;
207 
210 
213 
216 
222 
225 
227  std::map<Size, Size> scan_map_;
228 
231 
234 
237 
240 
243 
246 
249 
252 
255 
257  std::vector<std::vector<ProteinIdentification>::iterator> current_proteins_;
258 
261 
264 
267 
270 
273 
276 
278  double rt_, mz_;
279 
282 
285 
288 
291 
294 
296  std::vector<std::pair<String, Size> > current_modifications_;
297 
299  std::vector<AminoAcidModification> fixed_modifications_;
300 
302  std::vector<AminoAcidModification> variable_modifications_;
303 
305 
306  static const double mod_tol_;
307  static const double xtandem_artificial_mod_tol_;
308  };
309 
310 } // namespace OpenMS
OpenMS::PepXMLFile::AminoAcidModification::description
String description
Definition: PepXMLFile.h:160
OpenMS::PepXMLFile::native_spectrum_name_
String native_spectrum_name_
Several optional attributes of spectrum_query.
Definition: PepXMLFile.h:218
OpenMS::PepXMLFile::swath_assay_
String swath_assay_
Definition: PepXMLFile.h:220
OpenMS::PepXMLFile::AminoAcidModification::~AminoAcidModification
virtual ~AminoAcidModification()
Definition: PepXMLFile.h:181
XMLFile.h
OpenMS::PepXMLFile::analysis_summary_
bool analysis_summary_
Are we currently in an "analysis_summary" element (should be skipped)?
Definition: PepXMLFile.h:233
OpenMS::PepXMLFile::search_summary_
bool search_summary_
Are we currently in an "search_summary" element (should be skipped)?
Definition: PepXMLFile.h:242
OpenMS::PepXMLFile::prot_id_
String prot_id_
Identifier linking PeptideIdentifications and ProteinIdentifications.
Definition: PepXMLFile.h:287
OpenMS::Internal::XMLHandler
Base class for XML handlers.
Definition: XMLHandler.h:151
OpenMS::Element
Representation of an element.
Definition: Element.h:53
OpenMS::PeptideHit::PepXMLAnalysisResult
Analysis Result (containing search engine / prophet results)
Definition: PeptideHit.h:210
OpenMS::PepXMLFile::scan_map_
std::map< Size, Size > scan_map_
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment.
Definition: PepXMLFile.h:227
OpenMS::PepXMLFile::current_proteins_
std::vector< std::vector< ProteinIdentification >::iterator > current_proteins_
References to currently active ProteinIdentifications.
Definition: PepXMLFile.h:257
OpenMS::PepXMLFile::mod_tol_
static const double mod_tol_
Definition: PepXMLFile.h:306
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::PepXMLFile::hydrogen_
Element hydrogen_
Hydrogen data (for mass types)
Definition: PepXMLFile.h:230
OpenMS::PepXMLFile::hydrogen_mass_
double hydrogen_mass_
Mass of a hydrogen atom (monoisotopic/average depending on case)
Definition: PepXMLFile.h:293
OpenMS::PepXMLFile::keepNativeSpectrumName
void keepNativeSpectrumName(bool keep)
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:119
OpenMS::PepXMLFile::enzyme_
String enzyme_
Enzyme name associated with the current identification run.
Definition: PepXMLFile.h:263
OpenMS::PepXMLFile::peptide_hit_
PeptideHit peptide_hit_
PeptideHit instance currently being processed.
Definition: PepXMLFile.h:272
OpenMS::PepXMLFile::xtandem_artificial_mod_tol_
static const double xtandem_artificial_mod_tol_
Definition: PepXMLFile.h:307
OpenMS::PepXMLFile::AminoAcidModification
Definition: PepXMLFile.h:154
OpenMS::PepXMLFile::current_peptide_
PeptideIdentification current_peptide_
PeptideIdentification instance currently being processed.
Definition: PepXMLFile.h:266
OpenMS::PepXMLFile::wrong_experiment_
bool wrong_experiment_
Do current entries belong to the experiment of interest (for pepXML files that bundle results from di...
Definition: PepXMLFile.h:245
XMLHandler.h
SpectrumMetaDataLookup.h
OpenMS::PepXMLFile::AminoAcidModification::mass
double mass
Definition: PepXMLFile.h:158
OpenMS::PepXMLFile::date_
DateTime date_
Date the pepXML file was generated.
Definition: PepXMLFile.h:290
Element.h
OpenMS::PepXMLFile::params_
ProteinIdentification::SearchParameters params_
Search parameters of the current identification run.
Definition: PepXMLFile.h:260
OpenMS::PepXMLFile::proteins_
std::vector< ProteinIdentification > * proteins_
Pointer to the list of identified proteins.
Definition: PepXMLFile.h:203
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::PepXMLFile::search_engine_
String search_engine_
Set name of search engine.
Definition: PepXMLFile.h:215
OpenMS::PepXMLFile::lookup_
const SpectrumMetaDataLookup * lookup_
Pointer to wrapper for looking up spectrum meta data.
Definition: PepXMLFile.h:209
OpenMS::PepXMLFile::variable_modifications_
std::vector< AminoAcidModification > variable_modifications_
Variable aminoacid modifications.
Definition: PepXMLFile.h:302
int
OpenMS::PepXMLFile::AminoAcidModification::AminoAcidModification
AminoAcidModification(const AminoAcidModification &rhs)
Definition: PepXMLFile.h:170
OpenMS::PepXMLFile::AminoAcidModification::aminoacid
String aminoacid
Definition: PepXMLFile.h:156
OpenMS::PepXMLFile::checked_base_name_
bool checked_base_name_
Have we checked the "base_name" attribute in the "msms_run_summary" element?
Definition: PepXMLFile.h:251
ProteinIdentification.h
OpenMS::PepXMLFile::AminoAcidModification::AminoAcidModification
AminoAcidModification()
Definition: PepXMLFile.h:164
OpenMS::PepXMLFile::search_score_summary_
bool search_score_summary_
Are we currently in an "search_score_summary" element (should be skipped)?
Definition: PepXMLFile.h:239
OpenMS::PepXMLFile::status_
String status_
Definition: PepXMLFile.h:221
OpenMS::PepXMLFile::charge_
Int charge_
Precursor ion charge.
Definition: PepXMLFile.h:281
OpenMS::PepXMLFile::keep_native_name_
bool keep_native_name_
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:236
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::PepXMLFile::rt_
double rt_
RT and m/z of current PeptideIdentification.
Definition: PepXMLFile.h:278
OpenMS::PepXMLFile::fixed_modifications_
std::vector< AminoAcidModification > fixed_modifications_
Fixed aminoacid modifications.
Definition: PepXMLFile.h:299
OpenMS::PepXMLFile::use_precursor_data_
bool use_precursor_data_
Get RT and m/z for peptide ID from precursor scan (should only matter for RT)?
Definition: PepXMLFile.h:224
OpenMS::PepXMLFile::AminoAcidModification::operator=
AminoAcidModification & operator=(const AminoAcidModification &rhs)
Definition: PepXMLFile.h:185
OpenMS::SpectrumMetaDataLookup
Helper class for looking up spectrum meta data.
Definition: SpectrumMetaDataLookup.h:142
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
OpenMS::PepXMLFile::exp_name_
String exp_name_
Name of the associated experiment (filename of the data file, extension will be removed)
Definition: PepXMLFile.h:212
AASequence.h
OpenMS::PepXMLFile::AminoAcidModification::variable
bool variable
Definition: PepXMLFile.h:159
OpenMS::PepXMLFile::current_base_name_
String current_base_name_
current base name
Definition: PepXMLFile.h:254
OpenMS::PepXMLFile::current_analysis_result_
PeptideHit::PepXMLAnalysisResult current_analysis_result_
Analysis result instance currently being processed.
Definition: PepXMLFile.h:269
OpenMS::PepXMLFile::experiment_label_
String experiment_label_
Definition: PepXMLFile.h:219
OpenMS::Internal::XMLFile
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:48
OpenMS::PepXMLFile::current_sequence_
String current_sequence_
Sequence of the current peptide hit.
Definition: PepXMLFile.h:275
OpenMS::PepXMLFile::seen_experiment_
bool seen_experiment_
Have we seen the experiment of interest at all?
Definition: PepXMLFile.h:248
OpenMS::PepXMLFile
Used to load and store PepXML files.
Definition: PepXMLFile.h:62
OpenMS::PepXMLFile::AminoAcidModification::massdiff
String massdiff
Definition: PepXMLFile.h:157
PeptideIdentification.h
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:221
OpenMS::PepXMLFile::search_id_
UInt search_id_
ID of current search result.
Definition: PepXMLFile.h:284
OpenMS::PepXMLFile::current_modifications_
std::vector< std::pair< String, Size > > current_modifications_
The modifications of the current peptide hit (position is 1-based)
Definition: PepXMLFile.h:296
OpenMS::DateTime
DateTime Class.
Definition: DateTime.h:54
OpenMS::PepXMLFile::AminoAcidModification::terminus
String terminus
Definition: PepXMLFile.h:161
OpenMS::PepXMLFile::peptides_
std::vector< PeptideIdentification > * peptides_
Pointer to the list of identified peptides.
Definition: PepXMLFile.h:206
OpenMS::PepXMLFile::AminoAcidModification::protein_terminus
bool protein_terminus
Definition: PepXMLFile.h:162
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:54