OpenMS
DigestionEnzymeDB.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Xiao Liang $
32 // $Authors: Xiao Liang, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 #include <OpenMS/SYSTEM/File.h>
42 
43 #include <set>
44 #include <map>
45 
46 namespace OpenMS
47 {
57  template<typename DigestionEnzymeType, typename InstanceType> class DigestionEnzymeDB
58  {
59  public:
60 
64  typedef typename std::set<const DigestionEnzymeType*>::const_iterator ConstEnzymeIterator;
65  typedef typename std::set<const DigestionEnzymeType*>::iterator EnzymeIterator;
67 
69  static InstanceType* getInstance()
70  {
71  static InstanceType* db_ = nullptr;
72  if (db_ == nullptr)
73  {
74  db_ = new InstanceType;
75  }
76  return db_;
77  }
78 
84  {
85  for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
86  {
87  delete *it;
88  }
89  }
91 
98  const DigestionEnzymeType* getEnzyme(const String& name) const
99  {
100  auto pos = enzyme_names_.find(name);
101  if (pos == enzyme_names_.end())
102  {
103  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name);
104  }
105  return pos->second;
106  }
107 
110  const DigestionEnzymeType* getEnzymeByRegEx(const String& cleavage_regex) const
111  {
112  if (!hasRegEx(cleavage_regex))
113  {
114  // @TODO: why does this use a different exception than "getEnzyme"?
115  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
116  String("Enzyme with regex " + cleavage_regex + " was not registered in Enzyme DB, register first!").c_str());
117  }
118  return enzyme_regex_.at(cleavage_regex);
119  }
120 
122  void getAllNames(std::vector<String>& all_names) const
123  {
124  all_names.clear();
125  for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
126  {
127  all_names.push_back((*it)->getName());
128  }
129  }
131 
136  bool hasEnzyme(const String& name) const
137  {
138  return (enzyme_names_.find(name) != enzyme_names_.end());
139  }
140 
142  bool hasRegEx(const String& cleavage_regex) const
143  {
144  return (enzyme_regex_.find(cleavage_regex) != enzyme_regex_.end());
145  }
146 
148  bool hasEnzyme(const DigestionEnzymeType* enzyme) const
149  {
150  return (const_enzymes_.find(enzyme) != const_enzymes_.end() );
151  }
153 
157  inline ConstEnzymeIterator beginEnzyme() const { return const_enzymes_.begin(); } // we only allow constant iterators -- this DB is not meant to be modifiable
158  inline ConstEnzymeIterator endEnzyme() const { return const_enzymes_.end(); }
159 
161  protected:
162  DigestionEnzymeDB(const String& db_file = "")
163  {
164  if (!db_file.empty())
165  {
166  readEnzymesFromFile_(db_file);
167  }
168  }
169 
171  DigestionEnzymeDB(const DigestionEnzymeDB& enzymes_db) = delete;
173 
178  DigestionEnzymeDB& operator=(const DigestionEnzymeDB& enzymes_db) = delete;
180 
182  void readEnzymesFromFile_(const String& filename)
183  {
184  String file = File::find(filename);
185 
186  Param param;
187  ParamXMLFile().load(file, param);
188  if (param.empty()) return;
189 
190  std::vector<String> split;
191  String(param.begin().getName()).split(':', split);
192  if (split[0] != "Enzymes")
193  {
194  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, split[0], "name 'Enzymes' expected");
195  }
196 
197  try
198  {
199  std::map<String, String> values;
200  String previous_enzyme = split[1];
201  // this iterates over all the "ITEM" elements in the XML file:
202  for (Param::ParamIterator it = param.begin(); it != param.end(); ++it)
203  {
204  String(it.getName()).split(':', split);
205  if (split[0] != "Enzymes") break; // unexpected content in the XML file
206  if (split[1] != previous_enzyme)
207  {
208  // add enzyme and reset:
209  addEnzyme_(parseEnzyme_(values));
210  previous_enzyme = split[1];
211  values.clear();
212  }
213  values[it.getName()] = String(it->value.toString());
214  }
215  // add last enzyme
216  addEnzyme_(parseEnzyme_(values));
217  }
218  catch (Exception::BaseException& e)
219  {
220  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, e.what(), "");
221  }
222  }
223 
225  const DigestionEnzymeType* parseEnzyme_(std::map<String, String>& values) const
226  {
227  DigestionEnzymeType* enzy_ptr = new DigestionEnzymeType();
228 
229  for (std::map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
230  {
231  const String& key = it->first;
232  const String& value = it->second;
233  if (!enzy_ptr->setValueFromFile(key, value))
234  {
235  OPENMS_LOG_ERROR << "Error while parsing enzymes file: unknown key '" << key << "' with value '" << value << "'" << std::endl;
236  }
237  }
238  return enzy_ptr;
239  }
240 
242  void addEnzyme_(const DigestionEnzymeType* enzyme)
243  {
244  // add to internal storage
245  const_enzymes_.insert(enzyme);
246  // add to internal indices (by name and its synonyms)
247  String name = enzyme->getName();
248  enzyme_names_[name] = enzyme;
249  enzyme_names_[name.toLower()] = enzyme;
250  for (std::set<String>::const_iterator it = enzyme->getSynonyms().begin(); it != enzyme->getSynonyms().end(); ++it)
251  {
252  enzyme_names_[*it] = enzyme;
253  }
254  // ... and by regex
255  if (enzyme->getRegEx() != "")
256  {
257  enzyme_regex_[enzyme->getRegEx()] = enzyme;
258  }
259  return;
260  }
261 
262  std::map<String, const DigestionEnzymeType*> enzyme_names_;
263 
264  std::map<String, const DigestionEnzymeType*> enzyme_regex_;
265 
266  std::set<const DigestionEnzymeType*> const_enzymes_;
267 
268  };
269 }
270 
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:465
Digestion enzyme database (base class)
Definition: DigestionEnzymeDB.h:58
std::set< const DigestionEnzymeType * >::iterator EnzymeIterator
Definition: DigestionEnzymeDB.h:65
const DigestionEnzymeType * parseEnzyme_(std::map< String, String > &values) const
parses an enzyme, given the key/value pairs from an XML file
Definition: DigestionEnzymeDB.h:225
DigestionEnzymeDB(const String &db_file="")
Definition: DigestionEnzymeDB.h:162
DigestionEnzymeDB(const DigestionEnzymeDB &enzymes_db)=delete
copy constructor
std::map< String, const DigestionEnzymeType * > enzyme_regex_
index by regex
Definition: DigestionEnzymeDB.h:264
static InstanceType * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
const DigestionEnzymeType * getEnzymeByRegEx(const String &cleavage_regex) const
Definition: DigestionEnzymeDB.h:110
ConstEnzymeIterator endEnzyme() const
Definition: DigestionEnzymeDB.h:158
virtual ~DigestionEnzymeDB()
destructor
Definition: DigestionEnzymeDB.h:83
ConstEnzymeIterator beginEnzyme() const
Definition: DigestionEnzymeDB.h:157
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
std::set< const DigestionEnzymeType * >::const_iterator ConstEnzymeIterator
Definition: DigestionEnzymeDB.h:64
void readEnzymesFromFile_(const String &filename)
reads enzymes from the given file
Definition: DigestionEnzymeDB.h:182
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:98
bool hasEnzyme(const DigestionEnzymeType *enzyme) const
returns true if the db contains the enzyme of the given pointer
Definition: DigestionEnzymeDB.h:148
bool hasEnzyme(const String &name) const
returns true if the db contains a enzyme with the given name (supports synonym names)
Definition: DigestionEnzymeDB.h:136
std::set< const DigestionEnzymeType * > const_enzymes_
set of enzymes
Definition: DigestionEnzymeDB.h:266
DigestionEnzymeDB & operator=(const DigestionEnzymeDB &enzymes_db)=delete
assignment operator
std::map< String, const DigestionEnzymeType * > enzyme_names_
index by names
Definition: DigestionEnzymeDB.h:262
bool hasRegEx(const String &cleavage_regex) const
returns true if the db contains a enzyme with the given regex
Definition: DigestionEnzymeDB.h:142
void addEnzyme_(const DigestionEnzymeType *enzyme)
add to internal data; also update indices for search by name and regex
Definition: DigestionEnzymeDB.h:242
Exception base class.
Definition: Exception.h:91
Element could not be found exception.
Definition: Exception.h:676
A method or algorithm argument contains illegal values.
Definition: Exception.h:650
Parse Error exception.
Definition: Exception.h:624
static String find(const String &filename, StringList directories=StringList())
Looks up the location of the file filename.
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:51
void load(const String &filename, Param &param)
Read XML file.
Forward const iterator for the Param class.
Definition: Param.h:194
std::string getName() const
Returns the absolute path of the current element (including all sections)
Management and storage of parameters / INI files.
Definition: Param.h:70
ParamIterator begin() const
Begin iterator for the internal tree.
bool empty() const
Returns if there are no entries.
ParamIterator end() const
End iterator for the internal tree.
A more convenient string class.
Definition: String.h:60
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & toLower()
Converts the string to lowercase.
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:365
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48