|
OpenMS
2.6.0
|
Go to the documentation of this file.
95 bool isRTColumnOn()
const;
98 void wrapSVM(std::vector<AASequence>& peptide_sequences, std::vector<double>& predicted_retention_times);
107 void setDefaultParams_();
162 void updateMembers_()
override;
SimTypes::SimCoordinateType total_gradient_time_
Total gradient time.
Definition: RTSimulation.h:135
the nu parameter for nu-SVR
Definition: SVMWrapper.h:96
the kernel type
Definition: SVMWrapper.h:93
Base class for TOPP applications.
Definition: TOPPBase.h:144
SimTypes::SimCoordinateType rt_sampling_rate_
bin size in rt dimension
Definition: RTSimulation.h:145
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
the C parameter of the svm
Definition: SVMWrapper.h:95
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:56
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:97
Simulates/Predicts retention times for peptides or peptide separation.
Definition: RTSimulation.h:54
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:59
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
with N-terminus and C-terminus
Definition: Residue.h:153
void getSignificanceBorders(svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000)
calculates the significance borders of the error model and stores them in 'sigmas'
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:49
void saveModel(std::string modelFilename) const
saves the svm model
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
boost::shared_ptr< SimRandomNumberGenerator > MutableSimRandomNumberGeneratorPtr
Definition: SimTypes.h:174
double egh_tau_scale_
EGH tau scale parameter of the lorentzian variation.
Definition: RTSimulation.h:150
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
double egh_variance_location_
EGH sigma value.
Definition: RTSimulation.h:153
Int getIntParameter(SVM_parameter_type type)
You can get the actual int- parameters of the svm.
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:55
void setParameter(SVM_parameter_type type, Int value)
You can set the parameters of the svm:
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
bool store(const String &filename) const
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
any text format, which has only loose definition of what it actually contains – thus it is usually ha...
Definition: FileTypes.h:95
double egh_variance_scale_
EGH sigma scale parameter of the lorentzian variation.
Definition: RTSimulation.h:155
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
bool toBool() const
Conversion to bool.
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:92
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
bool exists(const String &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
SimTypes::MutableSimRandomNumberGeneratorPtr rnd_gen_
Random number generator.
Definition: RTSimulation.h:159
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
Definition: SVMWrapper.h:100
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:379
std::vector< double > labels
Definition: SVMWrapper.h:57
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
static void destroyProblem(svm_problem *problem)
frees all the memory of the svm_problem instance
double egh_tau_location_
EGH tau value.
Definition: RTSimulation.h:148
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:54
double getPValue(double sigma1, double sigma2, std::pair< double, double > point)
calculates a p-value for a given data point using the model parameters
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
Int train(struct svm_problem *problem)
trains the svm
void loadModel(std::string modelFilename)
loads the model
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:79
String toString(const T &i)
fallback template for general purpose using Boost::Karma; more specializations below
Definition: StringUtils.h:127
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
A container for features.
Definition: FeatureMap.h:97
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
OpenMS::String rt_model_file_
Definition: RTSimulation.h:132
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
SimTypes::SimCoordinateType gradient_max_
Maximal observed gradient time.
Definition: RTSimulation.h:142
double getDoubleParameter(SVM_parameter_type type)
You can get the actual double- parameters of the svm.
bool load(const String &filename)
Definition: SVMWrapper.h:101
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
the degree for the polynomial- kernel
Definition: SVMWrapper.h:94
Management and storage of parameters / INI files.
Definition: Param.h:73
void store(const String &filename, const Param ¶m) const
Write XML file.
double performCrossValidation(svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false)
Performs a CV for the data given by 'problem'.
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:822
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
SimTypes::SimCoordinateType gradient_min_
gradient ranges
Definition: RTSimulation.h:140
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:922
void setTrainingSample(svm_problem *training_sample)
This is used for being able to perform predictions with non libsvm standard kernels.
void load(const String &filename, Param ¶m)
Read XML file.
void setMZ(double mz)
sets the MZ of the MS2 spectrum
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
Definition: SVMWrapper.h:107
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Used to load and store idXML files.
Definition: IdXMLFile.h:63
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:505
Representation of a peptide hit.
Definition: PeptideHit.h:55