OpenMS
2.8.0
|
Serves as a wrapper for the libsvm. More...
#include <OpenMS/ANALYSIS/SVM/SVMWrapper.h>
Public Types | |
enum | SVM_parameter_type { SVM_TYPE , KERNEL_TYPE , DEGREE , C , NU , P , GAMMA , PROBABILITY , SIGMA , BORDER_LENGTH } |
Parameters for the svm to be set from outside. More... | |
enum | SVM_kernel_type { OLIGO = 19 , OLIGO_COMBINED } |
Kernel type. More... | |
Public Types inherited from ProgressLogger | |
enum | LogType { CMD , GUI , NONE } |
Possible log types. More... | |
Public Member Functions | |
SVMWrapper () | |
standard constructor More... | |
~SVMWrapper () override | |
destructor More... | |
void | setParameter (SVM_parameter_type type, Int value) |
You can set the parameters of the svm: More... | |
void | setParameter (SVM_parameter_type type, double value) |
sets the double parameters of the svm More... | |
Int | train (struct svm_problem *problem) |
trains the svm More... | |
Int | train (SVMData &problem) |
trains the svm More... | |
void | saveModel (std::string modelFilename) const |
saves the svm model More... | |
void | loadModel (std::string modelFilename) |
loads the model More... | |
void | predict (struct svm_problem *problem, std::vector< double > &predicted_labels) |
predicts the labels using the trained model More... | |
void | predict (const SVMData &problem, std::vector< double > &results) |
predicts the labels using the trained model More... | |
Int | getIntParameter (SVM_parameter_type type) |
You can get the actual int- parameters of the svm. More... | |
double | getDoubleParameter (SVM_parameter_type type) |
You can get the actual double- parameters of the svm. More... | |
void | createRandomPartitions (svm_problem *problem, Size number, std::vector< svm_problem * > &partitions) |
You can create 'number' equally sized random partitions. More... | |
void | createRandomPartitions (const SVMData &problem, Size number, std::vector< SVMData > &problems) |
You can create 'number' equally sized random partitions. More... | |
void | predict (const std::vector< svm_node * > &vectors, std::vector< double > &predicted_rts) |
predicts the labels using the trained model More... | |
double | performCrossValidation (svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false) |
Performs a CV for the data given by 'problem'. More... | |
double | getSVRProbability () |
Returns the probability parameter sigma of the fitted Laplace model. More... | |
void | getSignificanceBorders (svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000) |
calculates the significance borders of the error model and stores them in 'sigmas' More... | |
void | getSignificanceBorders (const SVMData &data, std::pair< double, double > &sigmas, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000) |
calculates the significance borders of the error model and stores them in 'sigmas' More... | |
double | getPValue (double sigma1, double sigma2, std::pair< double, double > point) |
calculates a p-value for a given data point using the model parameters More... | |
void | getDecisionValues (svm_problem *data, std::vector< double > &decision_values) |
stores the prediction values for the encoded data in 'decision_values' More... | |
void | scaleData (svm_problem *data, Int max_scale_value=-1) |
Scales the data such that every column is scaled to [-1, 1]. More... | |
svm_problem * | computeKernelMatrix (svm_problem *problem1, svm_problem *problem2) |
computes the kernel matrix using the actual svm parameters and the given data More... | |
svm_problem * | computeKernelMatrix (const SVMData &problem1, const SVMData &problem2) |
computes the kernel matrix using the actual svm parameters and the given data More... | |
void | setTrainingSample (svm_problem *training_sample) |
This is used for being able to perform predictions with non libsvm standard kernels. More... | |
void | setTrainingSample (SVMData &training_sample) |
This is used for being able to perform predictions with non libsvm standard kernels. More... | |
void | getSVCProbabilities (struct svm_problem *problem, std::vector< double > &probabilities, std::vector< double > &prediction_labels) |
This function fills probabilities with the probability estimates for the first class. More... | |
void | setWeights (const std::vector< Int > &weight_labels, const std::vector< double > &weights) |
Sets weights for the classes in C_SVC (see libsvm documentation for further details) More... | |
Public Member Functions inherited from ProgressLogger | |
ProgressLogger () | |
Constructor. More... | |
virtual | ~ProgressLogger () |
Destructor. More... | |
ProgressLogger (const ProgressLogger &other) | |
Copy constructor. More... | |
ProgressLogger & | operator= (const ProgressLogger &other) |
Assignment Operator. More... | |
void | setLogType (LogType type) const |
Sets the progress log that should be used. The default type is NONE! More... | |
LogType | getLogType () const |
Returns the type of progress log being used. More... | |
void | startProgress (SignedSize begin, SignedSize end, const String &label) const |
Initializes the progress display. More... | |
void | setProgress (SignedSize value) const |
Sets the current progress. More... | |
void | endProgress () const |
Ends the progress display. More... | |
void | nextProgress () const |
increment progress by 1 (according to range begin-end) More... | |
Static Public Member Functions | |
static svm_problem * | mergePartitions (const std::vector< svm_problem * > &problems, Size except) |
You can merge partitions excluding the partition with index 'except'. More... | |
static void | mergePartitions (const std::vector< SVMData > &problems, Size except, SVMData &merged_problem) |
You can merge partitions excluding the partition with index 'except'. More... | |
static void | getLabels (svm_problem *problem, std::vector< double > &labels) |
Stores the stored labels of the encoded SVM data at 'labels'. More... | |
static double | kernelOligo (const std::vector< std::pair< int, double > > &x, const std::vector< std::pair< int, double > > &y, const std::vector< double > &gauss_table, int max_distance=-1) |
returns the value of the oligo kernel for sequences 'x' and 'y' More... | |
static double | kernelOligo (const svm_node *x, const svm_node *y, const std::vector< double > &gauss_table, double sigma_square=0, Size max_distance=50) |
calculates the oligo kernel value for the encoded sequences 'x' and 'y' More... | |
static void | calculateGaussTable (Size border_length, double sigma, std::vector< double > &gauss_table) |
Private Member Functions | |
bool | nextGrid_ (const std::vector< double > &start_values, const std::vector< double > &step_sizes, const std::vector< double > &end_values, const bool additive_step_sizes, std::vector< double > &actual_values) |
find next grid search parameter combination More... | |
Size | getNumberOfEnclosedPoints_ (double m1, double m2, const std::vector< std::pair< double, double > > &points) |
void | initParameters_ () |
Initializes the svm with standard parameters. More... | |
Static Private Member Functions | |
static void | printToVoid_ (const char *) |
This function is passed to lib svm for output control. More... | |
Private Attributes | |
svm_parameter * | param_ |
the parameters for the svm More... | |
svm_model * | model_ |
the learned svm discriminant More... | |
double | sigma_ |
for the oligo kernel (amount of positional smearing) More... | |
std::vector< double > | sigmas_ |
for the combined oligo kernel (amount of positional smearing) More... | |
std::vector< double > | gauss_table_ |
lookup table for fast computation of the oligo kernel More... | |
std::vector< std::vector< double > > | gauss_tables_ |
lookup table for fast computation of the combined oligo kernel More... | |
Size | kernel_type_ |
the actual kernel type More... | |
Size | border_length_ |
the actual kernel type More... | |
svm_problem * | training_set_ = nullptr |
the training set More... | |
svm_problem * | training_problem_ = nullptr |
the training set More... | |
SVMData | training_data_ |
the training set (different encoding) More... | |
Math::RandomShuffler | shuffler_ |
random shuffler to create training partitions More... | |
Additional Inherited Members | |
Static Protected Member Functions inherited from ProgressLogger | |
static String | logTypeToFactoryName_ (LogType type) |
Return the name of the factory product used for this log type. More... | |
Protected Attributes inherited from ProgressLogger | |
LogType | type_ |
time_t | last_invoke_ |
ProgressLoggerImpl * | current_logger_ |
Static Protected Attributes inherited from ProgressLogger | |
static int | recursion_depth_ |
Serves as a wrapper for the libsvm.
This class can be used for svm predictions. You can either perform classification or regression and choose certain kernel functions and additional parameters. Furthermore the models can be saved and loaded and we support also a new kernel function that was specially designed for learning with small sequences of different lengths.
enum SVM_kernel_type |
enum SVM_parameter_type |
Parameters for the svm to be set from outside.
This type is used to specify the kind of parameter that is to be set or retrieved by the set/getParameter methods.
SVMWrapper | ( | ) |
standard constructor
|
override |
destructor
|
static |
computes the kernel matrix using the actual svm parameters and the given data
This function can be used to compute a kernel matrix. 'problem1' and 'problem2' are used together wit the oligo kernel function (could be extended if you want to use your own kernel functions).
svm_problem* computeKernelMatrix | ( | svm_problem * | problem1, |
svm_problem * | problem2 | ||
) |
computes the kernel matrix using the actual svm parameters and the given data
This function can be used to compute a kernel matrix. 'problem1' and 'problem2' are used together wit the oligo kernel function (could be extended if you want to use your own kernel functions).
void createRandomPartitions | ( | const SVMData & | problem, |
Size | number, | ||
std::vector< SVMData > & | problems | ||
) |
You can create 'number' equally sized random partitions.
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
void createRandomPartitions | ( | svm_problem * | problem, |
Size | number, | ||
std::vector< svm_problem * > & | partitions | ||
) |
You can create 'number' equally sized random partitions.
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
void getDecisionValues | ( | svm_problem * | data, |
std::vector< double > & | decision_values | ||
) |
stores the prediction values for the encoded data in 'decision_values'
This function can be used to get the prediction values of the data if a model is already trained by the train() method. For regression the result is the same as for the method predict. For classification this function returns the distance from the separating hyperplane. For multiclass classification the decision_values vector will be empty.
double getDoubleParameter | ( | SVM_parameter_type | type | ) |
You can get the actual double- parameters of the svm.
Parameter types | |
---|---|
C | the C parameter of the svm |
P | the P parameter of the svm (sets the epsilon in epsilon-svr) |
NU | the nu parameter in nu-SVR |
GAMMA | for POLY, RBF and SIGMOID |
type | The parameter that should be returned. |
Int getIntParameter | ( | SVM_parameter_type | type | ) |
You can get the actual int- parameters of the svm.
Parameter types | ||
---|---|---|
KERNEL_TYPE | LINEAR | for the linear kernel |
RBF | for the rbf kernel | |
POLY | for the polynomial kernel | |
SIGMOID | for the sigmoid kernel | |
DEGREE | the degree for the polynomial- kernel and the locality- improved kernel | |
SVM_TYPE | he SVM type of the svm: can be NU_SVR or EPSILON_SVR |
type | The parameter that should be returned. |
|
static |
Stores the stored labels of the encoded SVM data at 'labels'.
|
private |
double getPValue | ( | double | sigma1, |
double | sigma2, | ||
std::pair< double, double > | point | ||
) |
calculates a p-value for a given data point using the model parameters
Uses the model parameters to calculate the p-value for 'point' which has the data entries: measured, predicted retention time.
void getSignificanceBorders | ( | const SVMData & | data, |
std::pair< double, double > & | sigmas, | ||
double | confidence = 0.95 , |
||
Size | number_of_runs = 5 , |
||
Size | number_of_partitions = 5 , |
||
double | step_size = 0.01 , |
||
Size | max_iterations = 1000000 |
||
) |
calculates the significance borders of the error model and stores them in 'sigmas'
void getSignificanceBorders | ( | svm_problem * | data, |
std::pair< double, double > & | borders, | ||
double | confidence = 0.95 , |
||
Size | number_of_runs = 5 , |
||
Size | number_of_partitions = 5 , |
||
double | step_size = 0.01 , |
||
Size | max_iterations = 1000000 |
||
) |
calculates the significance borders of the error model and stores them in 'sigmas'
void getSVCProbabilities | ( | struct svm_problem * | problem, |
std::vector< double > & | probabilities, | ||
std::vector< double > & | prediction_labels | ||
) |
This function fills probabilities with the probability estimates for the first class.
The libSVM function svm_predict_probability is called to get probability estimates for the positive class. Since this is only used for binary classification it is sufficient for every test example to report the probability of the test example belonging to the positive class. Probability estimates have to be turned on during training (svm.setParameter(PROBABILITY, 1)), otherwise this method will fill the 'probabilities' vector with -1s.
double getSVRProbability | ( | ) |
Returns the probability parameter sigma of the fitted Laplace model.
The libsvm is used to fit a Laplace model to the prediction values by performing an internal cv using the training set if setParameter(PROBABILITY, 1) was invoked before using train. Look for your libsvm documentation for more details. The model parameter sigma is returned by this method. If no model was fitted during training zero is returned.
|
private |
Initializes the svm with standard parameters.
|
static |
returns the value of the oligo kernel for sequences 'x' and 'y'
This function computes the kernel value of the oligo kernel, which was introduced by Meinicke et al. in 2004. 'x' and 'y' are encoded by encodeOligo and 'gauss_table' has to be constructed by calculateGaussTable.
'max_distance' can be used to speed up the computation even further by restricting the maximum distance between a k_mer at position i in sequence 'x' and a k_mer at position j in sequence 'y'. If i - j > 'max_distance' the value is not added to the kernel value. This approximation is switched off by default (max_distance < 0).
|
static |
calculates the oligo kernel value for the encoded sequences 'x' and 'y'
This kernel function calculates the oligo kernel value [Meinicke 04] for the sequences 'x' and 'y' that had been encoded by the encodeOligoBorder... function of the LibSVMEncoder class.
void loadModel | ( | std::string | modelFilename | ) |
loads the model
The svm- model is loaded. After this, the svm is ready for prediction.
modelFilename | The name of the model file that should be loaded. |
|
static |
You can merge partitions excluding the partition with index 'except'.
|
static |
You can merge partitions excluding the partition with index 'except'.
|
private |
find next grid search parameter combination
The current grid cell is given in actual_values
. The result is returned in actual_values
.
double performCrossValidation | ( | svm_problem * | problem_ul, |
const SVMData & | problem_l, | ||
const bool | is_labeled, | ||
const std::map< SVM_parameter_type, double > & | start_values_map, | ||
const std::map< SVM_parameter_type, double > & | step_sizes_map, | ||
const std::map< SVM_parameter_type, double > & | end_values_map, | ||
Size | number_of_partitions, | ||
Size | number_of_runs, | ||
std::map< SVM_parameter_type, double > & | best_parameters, | ||
bool | additive_step_sizes = true , |
||
bool | output = false , |
||
String | performances_file_name = "performances.txt" , |
||
bool | mcc_as_performance_measure = false |
||
) |
Performs a CV for the data given by 'problem'.
void predict | ( | const std::vector< svm_node * > & | vectors, |
std::vector< double > & | predicted_rts | ||
) |
predicts the labels using the trained model
The prediction process is started and the results are stored in 'predicted_rts'.
void predict | ( | const SVMData & | problem, |
std::vector< double > & | results | ||
) |
predicts the labels using the trained model
The prediction process is started and the results are stored in 'predicted_labels'.
void predict | ( | struct svm_problem * | problem, |
std::vector< double > & | predicted_labels | ||
) |
predicts the labels using the trained model
The prediction process is started and the results are stored in 'predicted_labels'.
Referenced by LibSVMEncoder::predictPeptideRT().
|
staticprivate |
This function is passed to lib svm for output control.
The intention is to discard the output, as we don't need it.
void saveModel | ( | std::string | modelFilename | ) | const |
saves the svm model
The model of the trained svm is saved into 'modelFilename'. Throws an exception if the model cannot be saved.
Exception::UnableToCreateFile |
modelFilename | The file name where the model will be saved. |
void scaleData | ( | svm_problem * | data, |
Int | max_scale_value = -1 |
||
) |
Scales the data such that every column is scaled to [-1, 1].
Scales the x[][].value values of the svm_problem* structure. If the second parameter is omitted, the data is scaled to [-1, 1]. Otherwise the data is scaled to [0, max_scale_value]
void setParameter | ( | SVM_parameter_type | type, |
double | value | ||
) |
sets the double parameters of the svm
type | The type of parameter to set. |
value | The new value for parameter type . |
void setParameter | ( | SVM_parameter_type | type, |
Int | value | ||
) |
You can set the parameters of the svm:
Parameter types | ||
---|---|---|
KERNEL_TYPE | LINEAR | for the linear kernel |
RBF | for the rbf kernel | |
POLY | for the polynomial kernel | |
SIGMOID | for the sigmoid kernel | |
DEGREE | the degree for the polynomial- kernel and the locality- improved kernel | |
C | the C parameter of the svm |
type | The type of parameter to set. |
value | The new value for parameter type . |
void setTrainingSample | ( | svm_problem * | training_sample | ) |
This is used for being able to perform predictions with non libsvm standard kernels.
void setTrainingSample | ( | SVMData & | training_sample | ) |
This is used for being able to perform predictions with non libsvm standard kernels.
void setWeights | ( | const std::vector< Int > & | weight_labels, |
const std::vector< double > & | weights | ||
) |
Sets weights for the classes in C_SVC (see libsvm documentation for further details)
Int train | ( | struct svm_problem * | problem | ) |
trains the svm
The svm is trained with the data stored in the 'svm_problem' structure.
trains the svm
The svm is trained with the data stored in the 'SVMData' structure.
|
private |
the actual kernel type
|
private |
lookup table for fast computation of the oligo kernel
|
private |
lookup table for fast computation of the combined oligo kernel
|
private |
the actual kernel type
|
private |
the learned svm discriminant
|
private |
the parameters for the svm
|
private |
random shuffler to create training partitions
|
private |
for the oligo kernel (amount of positional smearing)
|
private |
for the combined oligo kernel (amount of positional smearing)
|
private |
the training set (different encoding)
|
private |
the training set
|
private |
the training set