OpenMS  2.6.0
DecoyGenerator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
38 
39 #include <boost/random/mersenne_twister.hpp>
40 #include <boost/random/uniform_int.hpp>
41 
42 namespace OpenMS
43 {
44  class AASequence;
45  class DigestionEnzymeProtein;
46 
50  class OPENMS_DLLAPI DecoyGenerator
51  {
52  public:
53  // initalizes random generator
55 
56  // destructor
57  ~DecoyGenerator() = default;
58 
59  // random seed for shuffling
60  void setSeed(UInt64 seed);
61 
62  /*
63  @brief reverses the protein sequence.
64  note: modifications are discarded
65  */
66  AASequence reverseProtein(const AASequence& protein) const;
67 
68  /*
69  @brief reverses the protein's peptide sequences between enzymatic cutting positions.
70  note: modifications are discarded
71  */
72  AASequence reversePeptides(const AASequence& protein, const String& protease) const;
73 
74  /*
75  @brief shuffle the protein's peptide sequences between enzymatic cutting positions.
76  each peptide is shuffled @param max_attempts times to minimize sequence identity.
77  note: modifications are discarded
78  */
79  AASequence shufflePeptides(
80  const AASequence& aas,
81  const String& protease,
82  const int max_attempts = 100
83  );
84 
85  private:
86  // sequence identity by matching AAs
87  static double SequenceIdentity_(const String& decoy, const String& target);
88 
89  // portable shuffle
90  template <class RandomAccessIterator>
91  void shuffle_ (RandomAccessIterator first, RandomAccessIterator last)
92  {
93  for (auto i = (last-first)-1; i > 0; --i) // OMS_CODING_TEST_EXCLUDE
94  {
95  boost::uniform_int<decltype(i)> d(0, i);
96  std::swap(first[i], first[d(rng_)]);
97  }
98  }
99 
100  boost::mt19937_64 rng_;
101  };
102 }
103 
ConsensusXMLFile.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::Param::copy
Param copy(const String &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
OpenMS::FeatureDeconvolution
An algorithm to decharge features (i.e. as found by FeatureFinder).
Definition: FeatureDeconvolution.h:59
OpenMS::UInt64
OPENMS_UINT64_TYPE UInt64
Unsigned integer type (64bit)
Definition: Types.h:77
OpenMS::ConsensusXMLFile::store
void store(const String &filename, const ConsensusMap &consensus_map)
Stores a consensus map to file.
OpenMS::ProteaseDigestion::setEnzyme
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
Types.h
OpenMS::DecoyGenerator
Methods to generate isobaric decoy sequences for DDA target-decoy searches.
Definition: DecoyGenerator.h:50
StopWatch.h
OpenMS::FASTAFile::FASTAEntry::identifier
String identifier
Definition: FASTAFile.h:78
OpenMS::DigestionEnzymeDB< DigestionEnzymeProtein, ProteaseDB >::getInstance
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
OpenMS::TargetedExperimentHelper::Peptide
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:370
OpenMS::FASTAFile
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
OpenMS::FASTAFile::FASTAEntry::sequence
String sequence
Definition: FASTAFile.h:80
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
OpenMS::FeatureDeconvolution::compute
void compute(const FeatureMapType &fm_in, FeatureMapType &fm_out, ConsensusMap &cons_map, ConsensusMap &cons_map_p)
Compute a zero-charge feature map from a set of charged features.
OpenMS::String
A more convenient string class.
Definition: String.h:59
OpenMS::MRMDecoy
This class generates a TargetedExperiment object with decoys based on a TargetedExperiment object.
Definition: MRMDecoy.h:88
OpenMS::String::trim
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
ConsensusMap.h
OpenMS::DigestionEnzymeDB::getAllNames
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
FeatureXMLFile.h
OpenMS::Param::getValue
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::FASTAFile::FASTAEntry
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
OpenMS::DecoyGenerator::rng_
boost::mt19937_64 rng_
Definition: DecoyGenerator.h:100
OpenMS::ListUtils::concatenate
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:193
OpenMS::DecoyGenerator::shuffle_
void shuffle_(RandomAccessIterator first, RandomAccessIterator last)
Definition: DecoyGenerator.h:91
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::DataValue::toBool
bool toBool() const
Conversion to bool.
OpenMS::TargetedExperimentHelper::Peptide::sequence
String sequence
Definition: TargetedExperimentHelper.h:437
ProteaseDigestion.h
ProteaseDB.h
OpenMS::MRMDecoy::reversePeptide
static OpenMS::TargetedExperiment::Peptide reversePeptide(const OpenMS::TargetedExperiment::Peptide &peptide, const bool keepN, const bool keepC, const String &const_pattern=String())
Reverse a peptide sequence (with its modifications)
FASTAFile.h
int
OpenMS::FeatureXMLFile::load
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
FeatureMap.h
ProteinIdentification.h
OpenMS::StopWatch::start
void start()
Start the stop watch.
OpenMS::ProteaseDigestion::digest
Size digest(const AASequence &protein, std::vector< AASequence > &output, Size min_length=1, Size max_length=0) const
: Performs the enzymatic digestion of a protein.
OpenMS::StopWatch::stop
void stop()
Stop the stop watch (can be resumed later). If the stop watch was not running an exception is thrown.
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::FASTAFile::writeNext
void writeNext(const FASTAEntry &protein)
Stores the data given by protein. Call writeStart() once before calling writeNext().
OpenMS::DefaultParamHandler::getDefaults
const Param & getDefaults() const
Non-mutable access to the default parameters.
OpenMS::DefaultParamHandler::getParameters
const Param & getParameters() const
Non-mutable access to the parameters.
MRMDecoy.h
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:80
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::FeatureXMLFile::store
void store(const String &filename, const FeatureMap &feature_map)
stores the map feature_map in file with name filename.
OpenMS::StringConversions::append
void append(const T &i, String &target)
Definition: StringUtils.h:119
OpenMS::DataProcessing::CHARGE_DECONVOLUTION
Charge deconvolution.
Definition: DataProcessing.h:61
OpenMS::FASTAFile::writeStart
void writeStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed writing using writeNext().
OpenMS::StopWatch
This class is used to determine the current process' CPU (user and/or kernel) and wall time.
Definition: StopWatch.h:65
OpenMS::IdentificationDataInternal::RNA
Definition: MetaData.h:67
FeatureDeconvolution.h
ModificationsDB.h
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::ConsensusMap::getColumnHeaders
const ColumnHeaders & getColumnHeaders() const
Non-mutable access to the file descriptions.
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:97
OpenMS::ProteaseDigestion
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
OpenMS::FASTAFile::readNext
bool readNext(FASTAEntry &protein)
Reads the next FASTA entry from file.
OpenMS::FASTAFile::readStart
void readStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed reading using readNext().
OpenMS::FeatureXMLFile
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:68
DigestionEnzyme.h
OpenMS::String::reverse
String & reverse()
inverts the direction of the string
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:73
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:111
OpenMS::AASequence::fromString
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
OpenMS::Param::insert
void insert(const String &prefix, const Param &param)
OpenMS::Internal::ClassTest::infile
std::ifstream infile
Questionable file tested by TEST_FILE_EQUAL.
TOPPBase.h
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:62