OpenMS  2.5.0
LibSVMEncoder.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Nico Pfeifer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 #include <svm.h>
41 
42 #include <vector>
43 #include <utility>
44 
45 namespace OpenMS
46 {
55  class OPENMS_DLLAPI LibSVMEncoder
56  {
57 public:
59  LibSVMEncoder();
61  ~LibSVMEncoder();
62 
72  void encodeCompositionVector(const String & sequence, std::vector<std::pair<Int, double> > & encoded_vector, const String & allowed_characters = "ACDEFGHIKLMNPQRSTVWY");
73 
83  void encodeCompositionVectors(const std::vector<String> & sequences, const String & allowed_characters, std::vector<std::vector<std::pair<Int, double> > > & composition_vectors);
85  svm_node * encodeLibSVMVector(const std::vector<std::pair<Int, double> > & feature_vector);
86 
88  void encodeLibSVMVectors(const std::vector<std::vector<std::pair<Int, double> > > & feature_vectors, std::vector<svm_node *> & libsvm_vectors);
89 
91  svm_problem * encodeLibSVMProblem(const std::vector<svm_node *> & vectors,
92  std::vector<double> & labels);
93 
95  svm_problem * encodeLibSVMProblemWithCompositionVectors(const std::vector<String> & sequences,
96  std::vector<double> & labels,
97  const String & allowed_characters);
98 
104  svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector<String> & sequences,
105  std::vector<double> & labels,
106  const String & allowed_characters,
107  UInt maximum_sequence_length);
108 
114  svm_problem * encodeLibSVMProblemWithCompositionLengthAndWeightVectors(const std::vector<String> & sequences,
115  std::vector<double> & labels,
116  const String & allowed_characters);
117 
119  bool storeLibSVMProblem(const String & filename, const svm_problem * problem) const;
120 
122  svm_problem * loadLibSVMProblem(const String & filename);
123 
125  void encodeOligoBorders(String sequence,
126  UInt k_mer_length,
127  const String & allowed_characters,
128  UInt border_length,
129  std::vector<std::pair<Int, double> > & libsvm_vector,
130  bool strict = false,
131  bool unpaired = false,
132  bool length_encoding = false);
133 
135  svm_problem * encodeLibSVMProblemWithOligoBorderVectors(const std::vector<String> & sequences,
136  std::vector<double> & labels,
137  UInt k_mer_length,
138  const String & allowed_characters,
139  UInt border_length,
140  bool strict = false,
141  bool unpaired = false,
142  bool length_encoding = false);
143 
145  void encodeProblemWithOligoBorderVectors(const std::vector<AASequence> & sequences,
146  UInt k_mer_length,
147  const String & allowed_characters,
148  UInt border_length,
149  std::vector<std::vector<std::pair<Int, double> > > & vectors);
150 
157  void libSVMVectorToString(svm_node * vector, String & output);
158 
165  void libSVMVectorsToString(svm_problem * vector, String & output);
166 
173  void encodeOligo(const AASequence & sequence,
174  UInt k_mer_length,
175  const String & allowed_characters,
176  std::vector<std::pair<Int, double> > & values,
177  bool is_right_border = false);
178 
184  static void destroyProblem(svm_problem * problem);
185 
186  static std::vector<double> predictPeptideRT(const std::vector<String> & sequences,
187  SVMWrapper& svm,
188  const String & allowed_characters = "ACDEFGHIKLMNPQRSTVWY",
189  UInt maximum_sequence_length = 50)
190  {
191  std::vector<double> predicted_retention_times;
192 
193  LibSVMEncoder encoder;
194  std::vector<double> temp_rts;
195  temp_rts.resize(sequences.size(), 0);
196  svm_problem * prediction_data =
198  temp_rts,
199  allowed_characters,
200  maximum_sequence_length);
201  svm.predict(prediction_data, predicted_retention_times);
202  LibSVMEncoder::destroyProblem(prediction_data);
203  return predicted_retention_times;
204  }
205 
206 private:
208  static bool cmpOligos_(std::pair<Int, double> a,
209  std::pair<Int, double> b);
210 
211  };
212 
213 } // namespace OpenMS
214 
OpenMS::String
A more convenient string class.
Definition: String.h:58
SVMWrapper.h
OpenMS::LibSVMEncoder
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:55
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::LibSVMEncoder::encodeLibSVMProblemWithCompositionAndLengthVectors
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
OpenMS::LibSVMEncoder::destroyProblem
static void destroyProblem(svm_problem *problem)
frees all the memory of the svm_problem instance
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::SVMWrapper
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:79
AASequence.h
OpenMS::SVMWrapper::predict
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
String.h
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:113
OpenMS::LibSVMEncoder::predictPeptideRT
static std::vector< double > predictPeptideRT(const std::vector< String > &sequences, SVMWrapper &svm, const String &allowed_characters="ACDEFGHIKLMNPQRSTVWY", UInt maximum_sequence_length=50)
Definition: LibSVMEncoder.h:186