OpenMS
Loading...
Searching...
No Matches
DigestionEnzymeDB.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Xiao Liang $
6// $Authors: Xiao Liang, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
16#include <OpenMS/SYSTEM/File.h>
17
18#include <set>
19#include <map>
20
21namespace OpenMS
22{
32 template<typename DigestionEnzymeType, typename InstanceType> class DigestionEnzymeDB
33 {
34 public:
35
39 typedef typename std::set<const DigestionEnzymeType*>::const_iterator ConstEnzymeIterator;
40 typedef typename std::set<const DigestionEnzymeType*>::iterator EnzymeIterator;
42
44 static InstanceType* getInstance()
45 {
46 static InstanceType* db_ = nullptr;
47 if (db_ == nullptr)
48 {
49 db_ = new InstanceType;
50 }
51 return db_;
52 }
53
59 {
60 for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
61 {
62 delete *it;
63 }
64 }
66
73 const DigestionEnzymeType* getEnzyme(const String& name) const
74 {
75 auto pos = enzyme_names_.find(name);
76 if (pos == enzyme_names_.end())
77 {
78 throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name);
79 }
80 return pos->second;
81 }
82
85 const DigestionEnzymeType* getEnzymeByRegEx(const String& cleavage_regex) const
86 {
87 if (!hasRegEx(cleavage_regex))
88 {
89 // @TODO: why does this use a different exception than "getEnzyme"?
90 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
91 String("Enzyme with regex " + cleavage_regex + " was not registered in Enzyme DB, register first!").c_str());
92 }
93 return enzyme_regex_.at(cleavage_regex);
94 }
95
97 void getAllNames(std::vector<String>& all_names) const
98 {
99 all_names.clear();
100 for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
101 {
102 all_names.push_back((*it)->getName());
103 }
104 }
106
111 bool hasEnzyme(const String& name) const
112 {
113 return (enzyme_names_.find(name) != enzyme_names_.end());
114 }
115
117 bool hasRegEx(const String& cleavage_regex) const
118 {
119 return (enzyme_regex_.find(cleavage_regex) != enzyme_regex_.end());
120 }
121
123 bool hasEnzyme(const DigestionEnzymeType* enzyme) const
124 {
125 return (const_enzymes_.find(enzyme) != const_enzymes_.end() );
126 }
128
132 inline ConstEnzymeIterator beginEnzyme() const { return const_enzymes_.begin(); } // we only allow constant iterators -- this DB is not meant to be modifiable
133 inline ConstEnzymeIterator endEnzyme() const { return const_enzymes_.end(); }
134
136 protected:
137 DigestionEnzymeDB(const String& db_file = "")
138 {
139 if (!db_file.empty())
140 {
141 readEnzymesFromFile_(db_file);
142 }
143 }
144
146 DigestionEnzymeDB(const DigestionEnzymeDB& enzymes_db) = delete;
148
153 DigestionEnzymeDB& operator=(const DigestionEnzymeDB& enzymes_db) = delete;
155
164 {
165 try
166 {
167 readEnzymesFromFile_(filename);
168 return true;
169 }
171 {
172 // file not found - that's OK, we will use built-in enzymes
173 return false;
174 }
175 }
176
178 void readEnzymesFromFile_(const String& filename)
179 {
180 String file = File::find(filename);
181
182 Param param;
183 ParamXMLFile().load(file, param);
184 if (param.empty()) return;
185
186 std::vector<String> split;
187 String(param.begin().getName()).split(':', split);
188 if (split[0] != "Enzymes")
189 {
190 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, split[0], "name 'Enzymes' expected");
191 }
192
193 try
194 {
195 std::map<String, String> values;
196 String previous_enzyme = split[1];
197 // this iterates over all the "ITEM" elements in the XML file:
198 for (Param::ParamIterator it = param.begin(); it != param.end(); ++it)
199 {
200 String(it.getName()).split(':', split);
201 if (split[0] != "Enzymes") break; // unexpected content in the XML file
202 if (split[1] != previous_enzyme)
203 {
204 // add enzyme and reset:
205 addEnzyme_(parseEnzyme_(values));
206 previous_enzyme = split[1];
207 values.clear();
208 }
209 values[it.getName()] = String(it->value.toString());
210 }
211 // add last enzyme
212 addEnzyme_(parseEnzyme_(values));
213 }
214 catch (Exception::BaseException& e)
215 {
216 throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, e.what(), "");
217 }
218 }
219
221 const DigestionEnzymeType* parseEnzyme_(std::map<String, String>& values) const
222 {
223 DigestionEnzymeType* enzy_ptr = new DigestionEnzymeType();
224
225 for (std::map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
226 {
227 const String& key = it->first;
228 const String& value = it->second;
229 if (!enzy_ptr->setValueFromFile(key, value))
230 {
231 OPENMS_LOG_ERROR << "Error while parsing enzymes file: unknown key '" << key << "' with value '" << value << "'" << std::endl;
232 }
233 }
234 return enzy_ptr;
235 }
236
239 void addEnzyme_(const DigestionEnzymeType* enzyme)
240 {
241 String name = enzyme->getName();
242
243 // if an enzyme with the same name exists, remove the old one first
244 auto existing = enzyme_names_.find(name);
245 if (existing != enzyme_names_.end())
246 {
247 const DigestionEnzymeType* old = existing->second;
248 const_enzymes_.erase(old);
249 // remove old name/synonym entries
250 String old_name = old->getName();
251 enzyme_names_.erase(old_name);
252 enzyme_names_.erase(old_name.toLower());
253 for (const auto& syn : old->getSynonyms())
254 {
255 enzyme_names_.erase(syn);
256 }
257 // remove old regex entry
258 if (!old->getRegEx().empty())
259 {
260 enzyme_regex_.erase(old->getRegEx());
261 }
262 delete old;
263 }
264
265 // add to internal storage
266 const_enzymes_.insert(enzyme);
267 // add to internal indices (by name and its synonyms)
268 enzyme_names_[name] = enzyme;
269 enzyme_names_[name.toLower()] = enzyme;
270 for (std::set<String>::const_iterator it = enzyme->getSynonyms().begin(); it != enzyme->getSynonyms().end(); ++it)
271 {
272 enzyme_names_[*it] = enzyme;
273 }
274 // ... and by regex
275 if (enzyme->getRegEx() != "")
276 {
277 enzyme_regex_[enzyme->getRegEx()] = enzyme;
278 }
279 return;
280 }
281
282 std::map<String, const DigestionEnzymeType*> enzyme_names_;
283
284 std::map<String, const DigestionEnzymeType*> enzyme_regex_;
285
286 std::set<const DigestionEnzymeType*> const_enzymes_;
287
288 };
289}
290
#define OPENMS_LOG_ERROR
Macro for non-fatal errors (processing continues)
Definition LogStream.h:546
subpage TOPP_TargetedFileConverter Converts targeted feature or consensus feature files subpage TOPP_FileInfo Shows basic information about the file
Definition TOPP.doxygen:44
Digestion enzyme database (base class)
Definition DigestionEnzymeDB.h:33
const DigestionEnzymeType * parseEnzyme_(std::map< String, String > &values) const
parses an enzyme, given the key/value pairs from an XML file
Definition DigestionEnzymeDB.h:221
static InstanceType * getInstance()
this member function serves as a replacement of the constructor
Definition DigestionEnzymeDB.h:44
DigestionEnzymeDB(const String &db_file="")
Definition DigestionEnzymeDB.h:137
DigestionEnzymeDB(const DigestionEnzymeDB &enzymes_db)=delete
copy constructor
std::map< String, const DigestionEnzymeType * > enzyme_regex_
index by regex
Definition DigestionEnzymeDB.h:284
ConstEnzymeIterator endEnzyme() const
Definition DigestionEnzymeDB.h:133
virtual ~DigestionEnzymeDB()
destructor
Definition DigestionEnzymeDB.h:58
ConstEnzymeIterator beginEnzyme() const
Definition DigestionEnzymeDB.h:132
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition DigestionEnzymeDB.h:73
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition DigestionEnzymeDB.h:97
void readEnzymesFromFile_(const String &filename)
reads enzymes from the given file
Definition DigestionEnzymeDB.h:178
DigestionEnzymeDB & operator=(const DigestionEnzymeDB &enzymes_db)=delete
assignment operator
std::set< constDigestionEnzymeType * >::const_iterator ConstEnzymeIterator
Definition DigestionEnzymeDB.h:39
std::set< constDigestionEnzymeType * >::iterator EnzymeIterator
Definition DigestionEnzymeDB.h:40
bool hasEnzyme(const DigestionEnzymeType *enzyme) const
returns true if the db contains the enzyme of the given pointer
Definition DigestionEnzymeDB.h:123
bool hasEnzyme(const String &name) const
returns true if the db contains a enzyme with the given name (supports synonym names)
Definition DigestionEnzymeDB.h:111
std::set< const DigestionEnzymeType * > const_enzymes_
set of enzymes
Definition DigestionEnzymeDB.h:286
std::map< String, const DigestionEnzymeType * > enzyme_names_
index by names
Definition DigestionEnzymeDB.h:282
const DigestionEnzymeType * getEnzymeByRegEx(const String &cleavage_regex) const
Definition DigestionEnzymeDB.h:85
bool readEnzymesFromFileIfPresent_(const String &filename)
Reads enzymes from the given file if it exists.
Definition DigestionEnzymeDB.h:163
bool hasRegEx(const String &cleavage_regex) const
returns true if the db contains a enzyme with the given regex
Definition DigestionEnzymeDB.h:117
void addEnzyme_(const DigestionEnzymeType *enzyme)
Definition DigestionEnzymeDB.h:239
Exception base class.
Definition Exception.h:63
Element could not be found exception.
Definition Exception.h:654
File not found exception.
Definition Exception.h:475
A method or algorithm argument contains illegal values.
Definition Exception.h:630
Parse Error exception.
Definition Exception.h:593
static String find(const String &filename, StringList directories=StringList())
Looks up the location of the file filename.
The file pendant of the Param class used to load and store the param datastructure as paramXML (i....
Definition ParamXMLFile.h:25
void load(const String &filename, Param &param)
Read XML file.
Forward const iterator for the Param class.
Definition Param.h:170
std::string getName() const
Returns the absolute path of the current element (including all sections)
Management and storage of parameters / INI files.
Definition Param.h:46
ParamIterator begin() const
Begin iterator for the internal tree.
bool empty() const
Returns if there are no entries.
ParamIterator end() const
End iterator for the internal tree.
A more convenient string class.
Definition String.h:34
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & toLower()
Converts the string to lowercase.
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19