OpenMS  2.4.0
LibSVMEncoder.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Nico Pfeifer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <svm.h>
40 
41 #include <vector>
42 #include <utility>
43 
44 namespace OpenMS
45 {
54  class OPENMS_DLLAPI LibSVMEncoder
55  {
56 public:
58  LibSVMEncoder();
60  ~LibSVMEncoder();
61 
71  void encodeCompositionVector(const String & sequence, std::vector<std::pair<Int, double> > & encoded_vector, const String & allowed_characters = "ACDEFGHIKLMNPQRSTVWY");
72 
82  void encodeCompositionVectors(const std::vector<String> & sequences, const String & allowed_characters, std::vector<std::vector<std::pair<Int, double> > > & composition_vectors);
84  svm_node * encodeLibSVMVector(const std::vector<std::pair<Int, double> > & feature_vector);
85 
87  void encodeLibSVMVectors(const std::vector<std::vector<std::pair<Int, double> > > & feature_vectors, std::vector<svm_node *> & libsvm_vectors);
88 
90  svm_problem * encodeLibSVMProblem(const std::vector<svm_node *> & vectors,
91  std::vector<double> & labels);
92 
94  svm_problem * encodeLibSVMProblemWithCompositionVectors(const std::vector<String> & sequences,
95  std::vector<double> & labels,
96  const String & allowed_characters);
97 
99  svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector<String> & sequences,
100  std::vector<double> & labels,
101  const String & allowed_characters,
102  UInt maximum_sequence_length);
103 
105  svm_problem * encodeLibSVMProblemWithCompositionLengthAndWeightVectors(const std::vector<String> & sequences,
106  std::vector<double> & labels,
107  const String & allowed_characters);
108 
110  bool storeLibSVMProblem(const String & filename, const svm_problem * problem) const;
111 
113  svm_problem * loadLibSVMProblem(const String & filename);
114 
116  void encodeOligoBorders(String sequence,
117  UInt k_mer_length,
118  const String & allowed_characters,
119  UInt border_length,
120  std::vector<std::pair<Int, double> > & libsvm_vector,
121  bool strict = false,
122  bool unpaired = false,
123  bool length_encoding = false);
124 
126  svm_problem * encodeLibSVMProblemWithOligoBorderVectors(const std::vector<String> & sequences,
127  std::vector<double> & labels,
128  UInt k_mer_length,
129  const String & allowed_characters,
130  UInt border_length,
131  bool strict = false,
132  bool unpaired = false,
133  bool length_encoding = false);
134 
136  void encodeProblemWithOligoBorderVectors(const std::vector<AASequence> & sequences,
137  UInt k_mer_length,
138  const String & allowed_characters,
139  UInt border_length,
140  std::vector<std::vector<std::pair<Int, double> > > & vectors);
141 
148  void libSVMVectorToString(svm_node * vector, String & output);
149 
156  void libSVMVectorsToString(svm_problem * vector, String & output);
157 
164  void encodeOligo(const AASequence & sequence,
165  UInt k_mer_length,
166  const String & allowed_characters,
167  std::vector<std::pair<Int, double> > & values,
168  bool is_right_border = false);
169 
175  static void destroyProblem(svm_problem * problem);
176 
177 private:
179  static bool cmpOligos_(std::pair<Int, double> a,
180  std::pair<Int, double> b);
181 
182  };
183 
184 } // namespace OpenMS
185 
A more convenient string class.
Definition: String.h:57
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:54
Representation of a peptide/protein sequence.
Definition: AASequence.h:107
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46