OpenMS  2.4.0
SVMWrapper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Nico Pfeifer, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <svm.h>
38 
39 #include <OpenMS/CONCEPT/Types.h>
42 #include <OpenMS/FORMAT/TextFile.h>
43 #include <OpenMS/SYSTEM/File.h>
44 
45 #include <string>
46 #include <vector>
47 #include <map>
48 #include <cmath>
49 
50 namespace OpenMS
51 {
52 
54  struct OPENMS_DLLAPI SVMData
55  {
56  std::vector<std::vector<std::pair<Int, double> > > sequences;
57  std::vector<double> labels;
58 
59  SVMData();
60 
61  SVMData(std::vector<std::vector<std::pair<Int, double> > >& seqs, std::vector<double>& lbls);
62 
63  bool operator==(const SVMData& rhs) const;
64 
65  bool store(const String& filename) const;
66 
67  bool load(const String& filename);
68 
69  };
70 
79  class OPENMS_DLLAPI SVMWrapper :
80  public ProgressLogger
81  {
82 public:
83 
91  {
95  C,
96  NU,
97  P,
101  BORDER_LENGTH
102  };
103 
106  {
107  OLIGO = 19,
108  OLIGO_COMBINED
109  };
110 
112  SVMWrapper();
113 
115  virtual ~SVMWrapper();
116 
158  void setParameter(SVM_parameter_type type, Int value);
159 
166  void setParameter(SVM_parameter_type type, double value);
167 
173  Int train(struct svm_problem* problem);
174 
180  Int train(SVMData& problem);
181 
192  void saveModel(std::string modelFilename) const;
193 
202  void loadModel(std::string modelFilename);
203 
209  void predict(struct svm_problem* problem, std::vector<double>& predicted_labels);
210 
216  void predict(const SVMData& problem, std::vector<double>& results);
217 
257  Int getIntParameter(SVM_parameter_type type);
258 
286  double getDoubleParameter(SVM_parameter_type type);
287 
293  static void createRandomPartitions(svm_problem* problem, Size number, std::vector<svm_problem*>& partitions);
294 
300  static void createRandomPartitions(const SVMData& problem,
301  Size number,
302  std::vector<SVMData>& problems);
306  static svm_problem* mergePartitions(const std::vector<svm_problem*>& problems, Size except);
307 
311  static void mergePartitions(const std::vector<SVMData>& problems,
312  Size except,
313  SVMData& merged_problem);
314 
321  void predict(const std::vector<svm_node*>& vectors, std::vector<double>& predicted_rts);
322 
327  static void getLabels(svm_problem* problem, std::vector<double>& labels);
328 
333  double performCrossValidation(svm_problem* problem_ul,
334  const SVMData& problem_l,
335  const bool is_labeled,
336  const std::map<SVM_parameter_type, double>& start_values_map,
337  const std::map<SVM_parameter_type, double>& step_sizes_map,
338  const std::map<SVM_parameter_type, double>& end_values_map,
339  Size number_of_partitions,
340  Size number_of_runs,
341  std::map<SVM_parameter_type, double>& best_parameters,
342  bool additive_step_sizes = true,
343  bool output = false,
344  String performances_file_name = "performances.txt",
345  bool mcc_as_performance_measure = false);
346 
347 
357  double getSVRProbability();
358 
374  static double kernelOligo(const std::vector<std::pair<int, double> >& x,
375  const std::vector<std::pair<int, double> >& y,
376  const std::vector<double>& gauss_table,
377  int max_distance = -1);
378 
386  static double kernelOligo(const svm_node* x, const svm_node* y, const std::vector<double>& gauss_table, double sigma_square = 0, Size max_distance = 50);
387 
391  void getSignificanceBorders(svm_problem* data, std::pair<double, double>& borders, double confidence = 0.95, Size number_of_runs = 5, Size number_of_partitions = 5, double step_size = 0.01, Size max_iterations = 1000000);
392 
396  void getSignificanceBorders(const SVMData& data,
397  std::pair<double, double>& sigmas,
398  double confidence = 0.95,
399  Size number_of_runs = 5,
400  Size number_of_partitions = 5,
401  double step_size = 0.01,
402  Size max_iterations = 1000000);
403 
410  double getPValue(double sigma1, double sigma2, std::pair<double, double> point);
411 
421  void getDecisionValues(svm_problem* data, std::vector<double>& decision_values);
422 
429  void scaleData(svm_problem* data, Int max_scale_value = -1);
430 
431  static void calculateGaussTable(Size border_length, double sigma, std::vector<double>& gauss_table);
432 
440  svm_problem* computeKernelMatrix(svm_problem* problem1, svm_problem* problem2);
441 
449  svm_problem* computeKernelMatrix(const SVMData& problem1, const SVMData& problem2);
450 
455  void setTrainingSample(svm_problem* training_sample);
456 
460  void setTrainingSample(SVMData& training_sample);
461 
471  void getSVCProbabilities(struct svm_problem* problem, std::vector<double>& probabilities, std::vector<double>& prediction_labels);
472 
476  void setWeights(const std::vector<Int>& weight_labels, const std::vector<double>& weights);
477 
478 private:
485  bool nextGrid_(const std::vector<double>& start_values,
486  const std::vector<double>& step_sizes,
487  const std::vector<double>& end_values,
488  const bool additive_step_sizes,
489  std::vector<double>& actual_values);
490 
491  Size getNumberOfEnclosedPoints_(double m1, double m2, const std::vector<std::pair<double, double> >& points);
492 
496  void initParameters_();
497 
503  static void printToVoid_(const char* /*s*/);
504 
505  svm_parameter* param_; // the parameters for the svm
506  svm_model* model_; // the learned svm discriminant
507  double sigma_; // for the oligo kernel (amount of positional smearing)
508  std::vector<double> sigmas_; // for the combined oligo kernel (amount of positional smearing)
509  std::vector<double> gauss_table_; // lookup table for fast computation of the oligo kernel
510  std::vector<std::vector<double> > gauss_tables_; // lookup table for fast computation of the combined oligo kernel
511  Size kernel_type_; // the actual kernel type
512  Size border_length_; // the actual kernel type
513  svm_problem* training_set_; // the training set
514  svm_problem* training_problem_; // the training set
515  SVMData training_data_; // the training set (different encoding)
516  };
517 
518 } // namespace OpenMS
519 
the C parameter of the svm
Definition: SVMWrapper.h:95
A more convenient string class.
Definition: String.h:57
SVMData training_data_
Definition: SVMWrapper.h:515
svm_model * model_
Definition: SVMWrapper.h:506
svm_problem * training_problem_
Definition: SVMWrapper.h:514
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:79
svm_problem * training_set_
Definition: SVMWrapper.h:513
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:97
SVM_kernel_type
Kernel type.
Definition: SVMWrapper.h:105
bool operator==(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:806
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:56
Size border_length_
Definition: SVMWrapper.h:512
Size kernel_type_
Definition: SVMWrapper.h:511
svm_parameter * param_
Definition: SVMWrapper.h:505
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:54
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:92
std::vector< double > gauss_table_
Definition: SVMWrapper.h:509
Definition: SVMWrapper.h:100
the gamma parameter of the POLY, RBF and SIGMOID kernel
Definition: SVMWrapper.h:98
Definition: SVMWrapper.h:99
double sigma_
Definition: SVMWrapper.h:507
the degree for the polynomial- kernel
Definition: SVMWrapper.h:94
std::vector< double > labels
Definition: SVMWrapper.h:57
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< std::vector< double > > gauss_tables_
Definition: SVMWrapper.h:510
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
int Int
Signed integer type.
Definition: Types.h:102
std::vector< double > sigmas_
Definition: SVMWrapper.h:508
the nu parameter for nu-SVR
Definition: SVMWrapper.h:96
the kernel type
Definition: SVMWrapper.h:93
SVM_parameter_type
Parameters for the svm to be set from outside.
Definition: SVMWrapper.h:90