OpenMS
Loading...
Searching...
No Matches
DecoyGenerator.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
13
14#include <unordered_map>
15
16namespace OpenMS
17{
18 class AASequence;
19 class DigestionEnzymeProtein;
20
24 class OPENMS_DLLAPI DecoyGenerator
25 {
26 public:
27 // initializes random generator
29
30 // destructor
31 ~DecoyGenerator() = default;
32
33 // random seed for shuffling
34 void setSeed(UInt64 seed);
35
36 /*
37 @brief reverses the protein sequence.
38 note: modifications are discarded
39 */
40 AASequence reverseProtein(const AASequence& protein) const;
41
42 /*
43 @brief reverses the protein's peptide sequences between enzymatic cutting positions.
44 note: modifications are discarded
45 */
46 AASequence reversePeptides(const AASequence& protein, const String& protease) const;
47
67 std::vector<AASequence> shuffle(const AASequence& protein, const String& protease, int decoy_factor = 1);
68
69 /*
70 @brief shuffle the protein's peptide sequences between enzymatic cutting positions.
71 each peptide is shuffled @p max_attempts times to minimize sequence identity.
72
73 Note:
74 - Generated decoys are retrieved from a cache to prevent that same peptide (in different proteins)
75 leads to different decoys.
76 - modifications are discarded
77 */
79 const AASequence& aas,
80 const String& protease,
81 const int max_attempts = 100
82 );
83
84 private:
85 // sequence identity by matching AAs
86 static double SequenceIdentity_(const String& decoy, const String& target);
87
88 // portable shuffle
90
91 // ensures that shuffling same peptide (in different proteins) leads to same decoy
92 std::unordered_map<std::string, std::string> td_cache_;
93 };
94}
95
Representation of a peptide/protein sequence.
Definition AASequence.h:88
Methods to generate isobaric decoy sequences for DDA target-decoy searches.
Definition DecoyGenerator.h:25
std::vector< AASequence > shuffle(const AASequence &protein, const String &protease, int decoy_factor=1)
Generate decoy protein sequences using shuffle algorithm.
void setSeed(UInt64 seed)
static double SequenceIdentity_(const String &decoy, const String &target)
std::unordered_map< std::string, std::string > td_cache_
Definition DecoyGenerator.h:92
AASequence shufflePeptides(const AASequence &aas, const String &protease, const int max_attempts=100)
Math::RandomShuffler shuffler_
Definition DecoyGenerator.h:89
AASequence reverseProtein(const AASequence &protein) const
AASequence reversePeptides(const AASequence &protein, const String &protease) const
Definition MathFunctions.h:477
A more convenient string class.
Definition String.h:34
uint64_t UInt64
Unsigned integer type (64bit)
Definition Types.h:47
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19