OpenMS  2.5.0
EnzymaticDigestion.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow, Xiao Liang $
32 // $Authors: Marc Sturm, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
39 
40 #include <boost/regex.hpp>
41 #include <string>
42 #include <vector>
43 
44 #include <functional> // for std::function
45 
46 namespace OpenMS
47 {
62  class OPENMS_DLLAPI EnzymaticDigestion
63  {
64 public:
67  {
71  SIZE_OF_SPECIFICITY
72  };
74  static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY];
75 
77  static const std::string NoCleavage;
78 
80  static const std::string UnspecificCleavage;
81 
84 
86  virtual ~EnzymaticDigestion();
87 
89  Size getMissedCleavages() const;
90 
92  void setMissedCleavages(Size missed_cleavages);
93 
95  String getEnzymeName() const;
96 
98  virtual void setEnzyme(const DigestionEnzyme* enzyme);
99 
101  Specificity getSpecificity() const;
102 
104  void setSpecificity(Specificity spec);
105 
108  static Specificity getSpecificityByName(const String& name);
109 
121  Size digestUnmodified(const StringView& sequence, std::vector<StringView>& output, Size min_length = 1, Size max_length = 0) const;
122 
136  Size digestUnmodified(const StringView& sequence, std::vector<std::pair<Size,Size>>& output, Size min_length = 1, Size max_length = 0) const;
137 
149  bool isValidProduct(const String& sequence, int pos, int length, bool ignore_missed_cleavages = true) const;
150 
158  bool filterByMissedCleavages(const String& sequence, std::function<bool(const Int)> filter) const;
159 
160 protected:
161 
168  bool isValidProduct_(const String& sequence,
169  int pos,
170  int length,
171  bool ignore_missed_cleavages,
172  bool allow_nterm_protein_cleavage,
173  bool allow_random_asp_pro_cleavage) const;
189  std::vector<int> tokenize_(const String& sequence, int start = 0, int end = -1) const;
190 
199  Size digestAfterTokenize_(const std::vector<int>& fragment_positions, const StringView& sequence, std::vector<StringView>& output, Size min_length = 0, Size max_length = -1) const;
200  Size digestAfterTokenize_(const std::vector<int>& fragment_positions, const StringView& sequence, std::vector<std::pair<Size,Size>>& output, Size min_length = 0, Size max_length = -1) const;
201 
210  Size countMissedCleavages_(const std::vector<int>& cleavage_positions, Size seq_start, Size seq_end) const;
211 
214 
218  boost::regex re_;
219 
222  };
223 
224 } // namespace OpenMS
225 
226 
OpenMS::EnzymaticDigestion::Specificity
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition: EnzymaticDigestion.h:66
Types.h
OpenMS::EnzymaticDigestion::specificity_
Specificity specificity_
specificity of enzyme
Definition: EnzymaticDigestion.h:221
OpenMS::EnzymaticDigestion::missed_cleavages_
Size missed_cleavages_
Number of missed cleavages.
Definition: EnzymaticDigestion.h:213
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::EnzymaticDigestion
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
OpenMS::EnzymaticDigestion::UnspecificCleavage
static const std::string UnspecificCleavage
Name for unspecific cleavage.
Definition: EnzymaticDigestion.h:80
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::EnzymaticDigestion::SPEC_FULL
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:68
OpenMS::EnzymaticDigestion::SPEC_NONE
no requirements on start / end
Definition: EnzymaticDigestion.h:70
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::EnzymaticDigestion::SPEC_SEMI
semi specific, i.e., one of the two cleavage sites must fulfill requirements
Definition: EnzymaticDigestion.h:69
int
OpenMS::EnzymaticDigestion::re_
boost::regex re_
Regex for tokenizing (huge speedup by making this a member instead of stack object in tokenize_())
Definition: EnzymaticDigestion.h:218
OpenMS::StringView
StringView provides a non-owning view on an existing string.
Definition: String.h:488
OpenMS::DigestionEnzyme
Abstract base class for digestion enzymes.
Definition: DigestionEnzyme.h:52
OpenMS::EnzymaticDigestion::NoCleavage
static const std::string NoCleavage
Name for no cleavage.
Definition: EnzymaticDigestion.h:77
DigestionEnzyme.h
OpenMS::EnzymaticDigestion::enzyme_
const DigestionEnzyme * enzyme_
Used enzyme.
Definition: EnzymaticDigestion.h:216