OpenMS  2.6.0
RTSimulation.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: Stephan Aiche, Chris Bielow$
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 namespace OpenMS
42 {
54  class OPENMS_DLLAPI RTSimulation :
55  public DefaultParamHandler
56  {
57 
58 
59 public:
63 
65  RTSimulation();
66 
69 
71  RTSimulation(const RTSimulation& source);
72 
74  ~RTSimulation() override;
76 
78  RTSimulation& operator=(const RTSimulation& source);
79 
85  void predictRT(SimTypes::FeatureMapSim& features);
86 
90  void predictContaminantsRT(SimTypes::FeatureMapSim&);
91 
95  bool isRTColumnOn() const;
96 
98  void wrapSVM(std::vector<AASequence>& peptide_sequences, std::vector<double>& predicted_retention_times);
99 
100  SimTypes::SimCoordinateType getGradientTime() const;
101 
103  void createExperiment(SimTypes::MSSimExperiment& experiment);
104 
105 private:
107  void setDefaultParams_();
108 
110  void noRTColumn_(SimTypes::FeatureMapSim&);
111 
113  void smoothRTDistortion_(SimTypes::MSSimExperiment& experiment);
114 
122  void calculateMT_(SimTypes::FeatureMapSim& features, std::vector<double>& predicted_retention_times);
123 
124  void getChargeContribution_(Map<String, double>& q_cterm,
125  Map<String, double>& q_nterm,
126  Map<String, double>& q_aa_basic,
127  Map<String, double>& q_aa_acidic);
128 
129  // MEMBERS:
130 
131  // Name of the svm model file
133 
136 
138 
143 
146 
151 
156 
157 protected:
160 
162  void updateMembers_() override;
163 
164  };
165 
166 }
167 
LogStream.h
DefaultParamHandler.h
OpenMS::RTSimulation::total_gradient_time_
SimTypes::SimCoordinateType total_gradient_time_
Total gradient time.
Definition: RTSimulation.h:135
OpenMS::SVMWrapper::NU
the nu parameter for nu-SVR
Definition: SVMWrapper.h:96
OpenMS::SVMWrapper::KERNEL_TYPE
the kernel type
Definition: SVMWrapper.h:93
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
FileHandler.h
SimTypes.h
TextFile.h
OpenMS::RTSimulation::rt_sampling_rate_
SimTypes::SimCoordinateType rt_sampling_rate_
bin size in rt dimension
Definition: RTSimulation.h:145
double
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::SVMWrapper::C
the C parameter of the svm
Definition: SVMWrapper.h:95
OpenMS::PeptideHit::getSequence
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
OpenMS::SVMData::sequences
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:56
EnzymaticDigestion.h
OpenMS::SVMWrapper::P
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:97
OpenMS::RTSimulation
Simulates/Predicts retention times for peptides or peptide separation.
Definition: RTSimulation.h:54
OpenMS::FASTAFile
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
OpenMS::FileHandler::getTypeByFileName
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
OpenMS::String
A more convenient string class.
Definition: String.h:59
ParamXMLFile.h
SVMWrapper.h
OpenMS::PeptideIdentification::setRT
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
LibSVMEncoder.h
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
OpenMS::Residue::Full
with N-terminus and C-terminus
Definition: Residue.h:153
OpenMS::SVMWrapper::getSignificanceBorders
void getSignificanceBorders(svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000)
calculates the significance borders of the error model and stores them in 'sigmas'
OpenMS::ParamXMLFile
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:49
StatisticFunctions.h
OpenMS::SVMWrapper::saveModel
void saveModel(std::string modelFilename) const
saves the svm model
OpenMS::Param::getValue
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
OpenMS::SimTypes::MutableSimRandomNumberGeneratorPtr
boost::shared_ptr< SimRandomNumberGenerator > MutableSimRandomNumberGeneratorPtr
Definition: SimTypes.h:174
OpenMS::RTSimulation::egh_tau_scale_
double egh_tau_scale_
EGH tau scale parameter of the lorentzian variation.
Definition: RTSimulation.h:150
OpenMS::AASequence::toUnmodifiedString
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
Map.h
OpenMS::RTSimulation::egh_variance_location_
double egh_variance_location_
EGH sigma value.
Definition: RTSimulation.h:153
OpenMS::SVMWrapper::getIntParameter
Int getIntParameter(SVM_parameter_type type)
You can get the actual int- parameters of the svm.
OpenMS::LibSVMEncoder
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:55
OpenMS::SVMWrapper::setParameter
void setParameter(SVM_parameter_type type, Int value)
You can set the parameters of the svm:
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::PeptideIdentification::setHits
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::SVMData::store
bool store(const String &filename) const
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::FileTypes::TXT
any text format, which has only loose definition of what it actually contains – thus it is usually ha...
Definition: FileTypes.h:95
OpenMS::RTSimulation::egh_variance_scale_
double egh_variance_scale_
EGH sigma scale parameter of the lorentzian variation.
Definition: RTSimulation.h:155
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::DataValue::toBool
bool toBool() const
Conversion to bool.
OpenMS::SVMWrapper::SVM_TYPE
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:92
OpenMS::LibSVMEncoder::encodeLibSVMProblemWithCompositionAndLengthVectors
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
OpenMS::Param::exists
bool exists(const String &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
OpenMS::RTSimulation::rnd_gen_
SimTypes::MutableSimRandomNumberGeneratorPtr rnd_gen_
Random number generator.
Definition: RTSimulation.h:159
FASTAFile.h
OpenMS::IDFilter::filterPeptidesByRTPredictPValue
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
int
OpenMS::SVMWrapper::SIGMA
Definition: SVMWrapper.h:100
OpenMS::IDFilter::keepBestPeptideHits
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
OpenMS::Math::meanSquareError
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:379
ProteinIdentification.h
OpenMS::SVMData::labels
std::vector< double > labels
Definition: SVMWrapper.h:57
OpenMS::String::split
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
OpenMS::LibSVMEncoder::destroyProblem
static void destroyProblem(svm_problem *problem)
frees all the memory of the svm_problem instance
OpenMS::RTSimulation::egh_tau_location_
double egh_tau_location_
EGH tau value.
Definition: RTSimulation.h:148
OpenMS::SVMData
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:54
OpenMS::SVMWrapper::getPValue
double getPValue(double sigma1, double sigma2, std::pair< double, double > point)
calculates a p-value for a given data point using the model parameters
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::DataValue::EMPTY
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
OpenMS::SVMWrapper::train
Int train(struct svm_problem *problem)
trains the svm
OpenMS::SVMWrapper::loadModel
void loadModel(std::string modelFilename)
loads the model
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::SVMWrapper
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:79
OpenMS::StringConversions::toString
String toString(const T &i)
fallback template for general purpose using Boost::Karma; more specializations below
Definition: StringUtils.h:127
OpenMS::SignedSize
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:97
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
OpenMS::FASTAFile::load
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
OpenMS::RTSimulation::rt_model_file_
OpenMS::String rt_model_file_
Definition: RTSimulation.h:132
OpenMS::SVMWrapper::predict
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
OpenMS::RTSimulation::gradient_max_
SimTypes::SimCoordinateType gradient_max_
Maximal observed gradient time.
Definition: RTSimulation.h:142
OpenMS::SVMWrapper::getDoubleParameter
double getDoubleParameter(SVM_parameter_type type)
You can get the actual double- parameters of the svm.
OpenMS::SVMData::load
bool load(const String &filename)
OpenMS::SVMWrapper::BORDER_LENGTH
Definition: SVMWrapper.h:101
OpenMS::LibSVMEncoder::encodeProblemWithOligoBorderVectors
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
OpenMS::SVMWrapper::DEGREE
the degree for the polynomial- kernel
Definition: SVMWrapper.h:94
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:73
IDFilter.h
OpenMS::ParamXMLFile::store
void store(const String &filename, const Param &param) const
Write XML file.
OpenMS::SVMWrapper::performCrossValidation
double performCrossValidation(svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false)
Performs a CV for the data given by 'problem'.
OpenMS::Map
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
OpenMS::AASequence::fromString
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
OpenMS::IDFilter::filterHitsByScore
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:822
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
OpenMS::RTSimulation::gradient_min_
SimTypes::SimCoordinateType gradient_min_
gradient ranges
Definition: RTSimulation.h:140
OpenMS::IDFilter::removeDecoyHits
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:922
OpenMS::SVMWrapper::setTrainingSample
void setTrainingSample(svm_problem *training_sample)
This is used for being able to perform predictions with non libsvm standard kernels.
OpenMS::ParamXMLFile::load
void load(const String &filename, Param &param)
Read XML file.
OpenMS::PeptideIdentification::setMZ
void setMZ(double mz)
sets the MZ of the MS2 spectrum
OpenMS::TextFile::ConstIterator
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
OpenMS::SVMWrapper::OLIGO
Definition: SVMWrapper.h:107
OpenMS::ProgressLogger::setLogType
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
TOPPBase.h
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63
OpenMS::Math::pearsonCorrelationCoefficient
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:505
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55
StringListUtils.h