OpenMS
Loading...
Searching...
No Matches
UniProtXMLHandler.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
13
14#include <functional>
15#include <string>
16
17namespace OpenMS::Internal
18{
31 class OPENMS_DLLAPI UniProtXMLHandler :
32 public XMLHandler
33 {
34 public:
38 using EntryCallback = std::function<void(UniProtEntry&&)>;
39
42 enum class CaptureTarget
43 {
44 None,
45 Accession,
46 EntryName,
47 FullName,
48 PrimaryGene,
49 TaxName,
50 Sequence,
51 FeatureOriginal,
52 FeatureVariation
53 };
54
56 UniProtXMLHandler(const std::string& filename, EntryCallback callback);
57
60
61 void startElement(const XMLCh* const uri, const XMLCh* const local_name,
62 const XMLCh* const qname, const xercesc::Attributes& attrs) override;
63 void endElement(const XMLCh* const uri, const XMLCh* const local_name,
64 const XMLCh* const qname) override;
65 void characters(const XMLCh* const chars, const XMLSize_t length) override;
66
67 private:
70
72 int depth_{0};
73
77 int entry_depth_{0};
78 int recommended_name_depth_{0};
79 int gene_depth_{0};
80 int organism_depth_{0};
81 int alt_products_depth_{0};
82 int feature_depth_{0};
83 int sequence_depth_{0};
84
87 bool full_name_captured_{false};
88
90 std::string char_buf_;
91
93 CaptureTarget capture_{CaptureTarget::None};
94
97
99 bool gene_name_is_primary_{false};
101 bool organism_name_is_scientific_{false};
102
107
110 static int parsePosition_(const std::string& attr);
111 };
112
113} // namespace OpenMS::Internal
char16_t XMLCh
Definition ClassTest.h:30
SAX handler for UniProtKB XML <entry> documents.
Definition UniProtXMLHandler.h:33
std::string char_buf_
Buffer for character data; appended-to in characters(), consumed in endElement().
Definition UniProtXMLHandler.h:90
void endElement(const XMLCh *const uri, const XMLCh *const local_name, const XMLCh *const qname) override
~UniProtXMLHandler() override
Destructor.
UniProtXMLHandler(const std::string &filename, EntryCallback callback)
Build a handler that delivers each parsed UniProtEntry to callback.
void resetFeature_()
Clear all per-feature state so the next <feature> starts fresh.
std::function< void(UniProtEntry &&)> EntryCallback
Definition UniProtXMLHandler.h:38
static int parsePosition_(const std::string &attr)
CaptureTarget
Definition UniProtXMLHandler.h:43
EntryCallback callback_
Definition UniProtXMLHandler.h:68
void resetEntry_()
Clear all per-entry state so the next <entry> starts fresh.
UniProtEntry current_entry_
Definition UniProtXMLHandler.h:69
UniProtFeature current_feature_
Per-feature working state (used between startElement("feature") and endElement("feature")).
Definition UniProtXMLHandler.h:96
void characters(const XMLCh *const chars, const XMLSize_t length) override
void startElement(const XMLCh *const uri, const XMLCh *const local_name, const XMLCh *const qname, const xercesc::Attributes &attrs) override
Base class for XML handlers.
Definition XMLHandler.h:315
Namespace used to hide implementation details from users.
Definition BayesianProteinInferenceAlgorithm.h:26
A single <entry> from a UniProtKB XML file, in a parser-neutral form.
Definition UniProtXMLFile.h:48
A single <feature> element from a UniProtKB XML entry.
Definition UniProtXMLFile.h:28