OpenMS  2.4.0
DigestionEnzymeDB.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Xiao Liang $
32 // $Authors: Xiao Liang, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
42 #include <OpenMS/SYSTEM/File.h>
43 
44 #include <boost/unordered_map.hpp>
45 #include <set>
46 
47 namespace OpenMS
48 {
58  template<typename DigestionEnzymeType, typename InstanceType> class DigestionEnzymeDB
59  {
60  public:
61 
65  typedef typename std::set<const DigestionEnzymeType*>::const_iterator ConstEnzymeIterator;
66  typedef typename std::set<const DigestionEnzymeType*>::iterator EnzymeIterator;
68 
70  static InstanceType* getInstance()
71  {
72  static InstanceType* db_ = nullptr;
73  if (db_ == nullptr)
74  {
75  db_ = new InstanceType;
76  }
77  return db_;
78  }
79 
83  virtual ~DigestionEnzymeDB()
85  {
86  for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
87  {
88  delete *it;
89  }
90  }
92 
96  const DigestionEnzymeType* getEnzyme(const String& name) const
100  {
101  if (enzyme_names_.find(name) == enzyme_names_.end())
102  {
103  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name);
104  }
105  return enzyme_names_.at(name);
106  }
107 
110  const DigestionEnzymeType* getEnzymeByRegEx(const String& cleavage_regex) const
111  {
112  if (!enzyme_regex_.has(cleavage_regex))
113  {
114  // @TODO: why does this use a different exception than "getEnzyme"?
115  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
116  String("Enzyme with regex " + cleavage_regex + " was not registered in Enzyme DB, register first!").c_str());
117  }
118  return enzyme_regex_[cleavage_regex];
119  }
120 
122  void getAllNames(std::vector<String>& all_names) const
123  {
124  all_names.clear();
125  for (ConstEnzymeIterator it = const_enzymes_.begin(); it != const_enzymes_.end(); ++it)
126  {
127  all_names.push_back((*it)->getName());
128  }
129  }
131 
135  bool hasEnzyme(const String& name) const
137  {
138  return (enzyme_names_.find(name) != enzyme_names_.end());
139  }
140 
142  bool hasRegEx(const String& cleavage_regex) const
143  {
144  return enzyme_regex_.has(cleavage_regex);
145  }
146 
148  bool hasEnzyme(const DigestionEnzymeType* enzyme) const
149  {
150  return (const_enzymes_.find(enzyme) != const_enzymes_.end() );
151  }
153 
157  inline ConstEnzymeIterator beginEnzyme() const { return const_enzymes_.begin(); } // we only allow constant iterators -- this DB is not meant to be modifiable
158  inline ConstEnzymeIterator endEnzyme() const { return const_enzymes_.end(); }
159 
161  protected:
162  DigestionEnzymeDB(const String& db_file = "")
163  {
164  if (!db_file.empty())
165  {
166  readEnzymesFromFile_(db_file);
167  }
168  }
169 
171  DigestionEnzymeDB(const DigestionEnzymeDB& enzymes_db) = delete;
173 
177  DigestionEnzymeDB& operator=(const DigestionEnzymeDB& enzymes_db) = delete;
180 
182  void readEnzymesFromFile_(const String& filename)
183  {
184  String file = File::find(filename);
185 
186  Param param;
187  ParamXMLFile().load(file, param);
188  if (param.empty()) return;
189 
190  std::vector<String> split;
191  param.begin().getName().split(':', split);
192  if (split[0] != "Enzymes")
193  {
194  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, split[0], "name 'Enzymes' expected");
195  }
196 
197  try
198  {
199  Map<String, String> values;
200  String previous_enzyme = split[1];
201  // this iterates over all the "ITEM" elements in the XML file:
202  for (Param::ParamIterator it = param.begin(); it != param.end(); ++it)
203  {
204  it.getName().split(':', split);
205  if (split[0] != "Enzymes") break; // unexpected content in the XML file
206  if (split[1] != previous_enzyme)
207  {
208  // add enzyme and reset:
209  addEnzyme_(parseEnzyme_(values));
210  previous_enzyme = split[1];
211  values.clear();
212  }
213  values[it.getName()] = it->value;
214  }
215  // add last enzyme
216  addEnzyme_(parseEnzyme_(values));
217  }
218  catch (Exception::BaseException& e)
219  {
220  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, e.what(), "");
221  }
222  }
223 
225  const DigestionEnzymeType* parseEnzyme_(Map<String, String>& values) const
226  {
227  DigestionEnzymeType* enzy_ptr = new DigestionEnzymeType();
228 
229  for (Map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
230  {
231  const String& key = it->first;
232  const String& value = it->second;
233  if (!enzy_ptr->setValueFromFile(key, value))
234  {
235  LOG_ERROR << "Error while parsing enzymes file: unknown key '" << key << "' with value '" << value << "'" << std::endl;
236  }
237  }
238  return enzy_ptr;
239  }
240 
241  // add to internal data; also update indices for search by name and regex
242  void addEnzyme_(const DigestionEnzymeType* enzyme)
243  {
244  // add to internal storage
245  const_enzymes_.insert(enzyme);
246  // add to internal indices (by name and its synonyms)
247  String name = enzyme->getName();
248  enzyme_names_[name] = enzyme;
249  enzyme_names_[name.toLower()] = enzyme;
250  for (std::set<String>::const_iterator it = enzyme->getSynonyms().begin(); it != enzyme->getSynonyms().end(); ++it)
251  {
252  enzyme_names_[*it] = enzyme;
253  }
254  // ... and by regex
255  if (enzyme->getRegEx() != "")
256  {
257  enzyme_regex_[enzyme->getRegEx()] = enzyme;
258  }
259  return;
260  }
261 
262  boost::unordered_map<String, const DigestionEnzymeType*> enzyme_names_; // index by names
263 
265 
266  std::set<const DigestionEnzymeType*> const_enzymes_; // set of enzymes
267 
268  };
269 }
270 
bool hasEnzyme(const DigestionEnzymeType *enzyme) const
returns true if the db contains the enzyme of the given pointer
Definition: DigestionEnzymeDB.h:148
A more convenient string class.
Definition: String.h:57
std::set< const DigestionEnzymeType * >::const_iterator ConstEnzymeIterator
Definition: DigestionEnzymeDB.h:65
bool empty() const
Returns if there are no entries.
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
bool hasEnzyme(const String &name) const
returns true if the db contains a enzyme with the given name (supports synonym names) ...
Definition: DigestionEnzymeDB.h:136
bool hasRegEx(const String &cleavage_regex) const
returns true if the db contains a enzyme with the given regex
Definition: DigestionEnzymeDB.h:142
virtual ~DigestionEnzymeDB()
destructor
Definition: DigestionEnzymeDB.h:84
std::set< const DigestionEnzymeType * > const_enzymes_
Definition: DigestionEnzymeDB.h:266
Map< String, const DigestionEnzymeType * > enzyme_regex_
Definition: DigestionEnzymeDB.h:264
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
static String find(const String &filename, StringList directories=StringList())
Looks up the location of the file filename.
const DigestionEnzymeType * getEnzymeByRegEx(const String &cleavage_regex) const
Definition: DigestionEnzymeDB.h:110
#define LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:446
ParamIterator begin() const
Begin iterator for the internal tree.
void load(const String &filename, Param &param)
Read XML file.
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
String & toLower()
Converts the string to lowercase.
String getName() const
Returns the absolute path of the current element (including all sections)
void addEnzyme_(const DigestionEnzymeType *enzyme)
Definition: DigestionEnzymeDB.h:242
ConstEnzymeIterator beginEnzyme() const
Definition: DigestionEnzymeDB.h:157
Exception base class.
Definition: Exception.h:89
DigestionEnzymeDB & operator=(const DigestionEnzymeDB &enzymes_db)=delete
assignment operator
boost::unordered_map< String, const DigestionEnzymeType * > enzyme_names_
Definition: DigestionEnzymeDB.h:262
Management and storage of parameters / INI files.
Definition: Param.h:74
ConstEnzymeIterator endEnzyme() const
Definition: DigestionEnzymeDB.h:158
void readEnzymesFromFile_(const String &filename)
reads enzymes from the given file
Definition: DigestionEnzymeDB.h:182
const char * what() const noexcept override
Returns the error message of the exception.
The file pendant of the Param class used to load and store the param datastructure as paramXML...
Definition: ParamXMLFile.h:49
Digestion enzyme database (base class)
Definition: DigestionEnzymeDB.h:58
Forward const iterator for the Param class.
Definition: Param.h:181
const DigestionEnzymeType * parseEnzyme_(Map< String, String > &values) const
parses an enzyme, given the key/value pairs from an XML file
Definition: DigestionEnzymeDB.h:225
static InstanceType * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:70
DigestionEnzymeDB(const String &db_file="")
Definition: DigestionEnzymeDB.h:162
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:99
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
std::set< const DigestionEnzymeType * >::iterator EnzymeIterator
Definition: DigestionEnzymeDB.h:66
ParamIterator end() const
End iterator for the internal tree.
Parse Error exception.
Definition: Exception.h:622