OpenMS  2.4.0
XQuestResultXMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Eugen Netz $
32 // $Authors: Lukas Zimmermann $
33 // --------------------------------------------------------------------------
34 #pragma once
35 
42 
43 namespace OpenMS
44 {
45  namespace Internal
46  {
49  class OPENMS_DLLAPI XQuestResultXMLHandler :
50  public XMLHandler
51  {
52  public:
53 
54  // Maps enzyme_num in xQuest result file to the enzyme name used by OpenMS
55  static std::map< Size, String > enzymes;
56 
57  // Maps String encoding month to the numeric value
58  static std::map<String, UInt> months;
59 
61  XQuestResultXMLHandler(const String & filename,
62  std::vector< PeptideIdentification > & pep_ids,
63  std::vector< ProteinIdentification > & prot_ids
64  );
65 
67  XQuestResultXMLHandler(const std::vector<ProteinIdentification>& pro_id,
68  const std::vector<PeptideIdentification>& pep_id,
69  const String& filename,
70  const String& version
71  );
72 
73  ~XQuestResultXMLHandler() override;
74 
75  // Docu in base class
76  void endElement(const XMLCh * const uri, const XMLCh * const local_name, const XMLCh * const qname) override;
77 
78  // Docu in base class
79  void startElement(const XMLCh * const uri, const XMLCh * const local_name, const XMLCh * const qname, const xercesc::Attributes & attributes) override;
80 
85  double getMinScore() const;
86 
91  double getMaxScore() const;
92 
97  UInt getNumberOfHits() const;
98 
99  //Docu in base class
100  virtual void writeTo(std::ostream& os) override;
101 
102  // TODO move these to StringUtils?
111  static StringList splitByNth(const String& input, const char separator, const Size n);
112 
125  static StringList splitByMiddle(const String& input, const char separator);
126 
127  private:
128 
129 
130  // Decoy string used by xQuest
134 
135  // Main data structures that are populated during loading the file
136  std::vector< PeptideIdentification >* pep_ids_;
137  std::vector< ProteinIdentification >* prot_ids_;
138 
139  // internal ID items for writing files
140  const std::vector<ProteinIdentification>* cpro_id_;
141  const std::vector<PeptideIdentification>* cpep_id_;
142 
143  UInt n_hits_; // Total no. of hits found in the result XML file
144 
145  // Keeps track of the minscore and maxscore encountered
146  double min_score_;
147  double max_score_;
148 
149  // Whether or not current xquest result tag comes from OpenPepXL (xQuest otherwise)
151 
152  // Set of all protein accessions that are within the ProteinHits.
153  std::set< String > accessions_;
154 
155  // The enzyme database for enzyme lookup
157 
158  // Keeps track of the charges of the hits
159  std::set< UInt > charges_;
162 
163  // Current Retention time of spectrum pair
164  double rt_light_;
165  double rt_heavy_;
166 
167  // Current experimental m/z of spectrum pair
168  double mz_light_;
169  double mz_heavy_;
170 
171  // primary MS run path
174 
175  // The current spectrum search
176  std::vector< PeptideIdentification > current_spectrum_search_;
177 
178  // Stores the attributes of a record (peptide identification)
179  std::map<String, DataValue> peptide_id_meta_values_;
180 
186  inline void extractDateTime_(const String & xquest_datetime_string, DateTime & date_time);
187 
194  void addMetaValues_(MetaInfoInterface & meta_info_interface);
195 
201  void getLinkPosition_(const xercesc::Attributes & attributes, std::pair<SignedSize, SignedSize> & pair);
202 
208  void setPeptideEvidence_(const String & prot_string, PeptideHit & pep_hit);
209 
210  };
211  } // namespace Internal
212 } // namespace OpenMS
UInt n_hits_
Definition: XQuestResultXMLHandler.h:143
A more convenient string class.
Definition: String.h:57
double mz_light_
Definition: XQuestResultXMLHandler.h:168
double rt_light_
Definition: XQuestResultXMLHandler.h:164
String spectrum_input_file_
Definition: XQuestResultXMLHandler.h:173
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
XMLHandler for the result files of XQuest.
Definition: XQuestResultXMLHandler.h:49
int spectrum_index_heavy_
Definition: XQuestResultXMLHandler.h:133
double min_score_
Definition: XQuestResultXMLHandler.h:146
Base class for XML handlers.
Definition: XMLHandler.h:148
std::vector< PeptideIdentification > current_spectrum_search_
Definition: XQuestResultXMLHandler.h:176
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
int spectrum_index_light_
Definition: XQuestResultXMLHandler.h:132
String decoy_string_
Definition: XQuestResultXMLHandler.h:131
std::vector< PeptideIdentification > * pep_ids_
Definition: XQuestResultXMLHandler.h:136
ProteaseDB * enzymes_db_
Definition: XQuestResultXMLHandler.h:156
UInt max_precursor_charge_
Definition: XQuestResultXMLHandler.h:161
double mz_heavy_
Definition: XQuestResultXMLHandler.h:169
std::vector< ProteinIdentification > * prot_ids_
Definition: XQuestResultXMLHandler.h:137
Representation of a peptide hit.
Definition: PeptideHit.h:54
double rt_heavy_
Definition: XQuestResultXMLHandler.h:165
const std::vector< ProteinIdentification > * cpro_id_
Definition: XQuestResultXMLHandler.h:140
UInt min_precursor_charge_
Definition: XQuestResultXMLHandler.h:160
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:55
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
bool is_openpepxl_
Definition: XQuestResultXMLHandler.h:150
double max_score_
Definition: XQuestResultXMLHandler.h:147
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
DateTime Class.
Definition: DateTime.h:54
Database for enzymes that digest proteins (proteases)
Definition: ProteaseDB.h:52
StringList ms_run_path_
Definition: XQuestResultXMLHandler.h:172
static std::map< String, UInt > months
Definition: XQuestResultXMLHandler.h:58
std::set< String > accessions_
Definition: XQuestResultXMLHandler.h:153
const std::vector< PeptideIdentification > * cpep_id_
Definition: XQuestResultXMLHandler.h:141
std::map< String, DataValue > peptide_id_meta_values_
Definition: XQuestResultXMLHandler.h:179
static std::map< Size, String > enzymes
Definition: XQuestResultXMLHandler.h:55
std::set< UInt > charges_
Definition: XQuestResultXMLHandler.h:159