OpenMS
Loading...
Searching...
No Matches
UniProtXMLFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
12
13#include <functional>
14#include <string>
15#include <vector>
16
17namespace OpenMS
18{
27 struct OPENMS_DLLAPI UniProtFeature
28 {
29 std::string type;
30 std::string description;
31 bool has_position{false};
32 bool has_range{false};
33 int position{0};
34 int begin{0};
35 int end{0};
36 std::string original;
37 std::string variation;
38 };
39
47 struct OPENMS_DLLAPI UniProtEntry
48 {
49 std::string dataset;
50 std::string accession;
51 std::vector<std::string> alt_accessions;
52 std::string name;
53 std::string full_name;
54 std::string primary_gene;
55 std::string ncbi_tax_id;
56 std::string tax_name;
57 std::string protein_existence;
58 std::string sequence_version;
59 std::string entry_version;
60 std::string sequence;
61 std::vector<UniProtFeature> features;
62 };
63
76 class OPENMS_DLLAPI UniProtXMLFile :
78 {
79 public:
82
84 ~UniProtXMLFile() override;
85
95 void load(const std::string& filename, std::vector<UniProtEntry>& entries);
96
110 void loadStreaming(const std::string& filename, const std::function<void(UniProtEntry&&)>& callback);
111
112 private:
115 };
116
117} // namespace OpenMS
Base class for loading and storing XML files via Xerces, with optional schema validation and transpar...
Definition XMLFile.h:40
Reads UniProtKB XML protein databases (.xml or transparently .xml.gz).
Definition UniProtXMLFile.h:78
UniProtXMLFile()
Default constructor.
void loadStreaming(const std::string &filename, const std::function< void(UniProtEntry &&)> &callback)
Stream entries one at a time via a user-supplied callback.
UniProtXMLFile(const UniProtXMLFile &)=delete
UniProtXMLFile & operator=(const UniProtXMLFile &)=delete
void load(const std::string &filename, std::vector< UniProtEntry > &entries)
Load all entries from a UniProtKB XML file into memory.
~UniProtXMLFile() override
Destructor.
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::string protein_existence
<proteinExistence type="..."> (raw type string, e.g. "evidence at protein level")
Definition UniProtXMLFile.h:57
std::string variation
<variation> text (sequence variant; first occurrence only)
Definition UniProtXMLFile.h:37
std::string entry_version
<entry version="...">
Definition UniProtXMLFile.h:59
std::string description
feature/@description (raw, before any cleanup)
Definition UniProtXMLFile.h:30
std::string tax_name
<organism>/<name type="scientific">
Definition UniProtXMLFile.h:56
std::string primary_gene
<gene>/<name type="primary"> (first occurrence)
Definition UniProtXMLFile.h:54
std::vector< std::string > alt_accessions
second and subsequent <accession> entries
Definition UniProtXMLFile.h:51
std::string type
feature/@type, e.g. "modified residue", "disulfide bond", "sequence variant"
Definition UniProtXMLFile.h:29
std::string dataset
entry/@dataset, e.g. "Swiss-Prot" or "TrEMBL"
Definition UniProtXMLFile.h:49
std::string ncbi_tax_id
<organism>/<dbReference type="NCBI Taxonomy" id="...">
Definition UniProtXMLFile.h:55
std::string sequence
canonical <sequence> text (whitespace stripped); isoform sequences are skipped
Definition UniProtXMLFile.h:60
std::string name
first <name> under <entry> (mnemonic id, e.g. "KSINK_HUMAN")
Definition UniProtXMLFile.h:52
std::string original
<original> text (sequence variant)
Definition UniProtXMLFile.h:36
std::string sequence_version
<sequence version="...">
Definition UniProtXMLFile.h:58
std::string full_name
<protein>/<recommendedName>/<fullName> (first occurrence)
Definition UniProtXMLFile.h:53
std::string accession
first <accession> (primary id)
Definition UniProtXMLFile.h:50
std::vector< UniProtFeature > features
<feature> elements in document order
Definition UniProtXMLFile.h:61
A single <entry> from a UniProtKB XML file, in a parser-neutral form.
Definition UniProtXMLFile.h:48
A single <feature> element from a UniProtKB XML entry.
Definition UniProtXMLFile.h:28