OpenMS
CompNovoIdentificationBase.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Andreas Bertsch $
33 // --------------------------------------------------------------------------
34 
35 
36 #pragma once
37 
38 // OpenMS includes
45 
46 // stl includes
47 #include <vector>
48 #include <map>
49 
50 namespace OpenMS
51 {
57  class OPENMS_DLLAPI CompNovoIdentificationBase :
58  public DefaultParamHandler
59  {
60 
61 public:
62 
68 
71 
75 
78 
83  virtual void getIdentifications(std::vector<PeptideIdentification> & ids, const PeakMap & exp) = 0;
85 
87 
88 protected:
89 
91  void updateMembers_() override;
92 
94  void filterPermuts_(std::set<String> & permut) const;
95 
97  void selectPivotIons_(std::vector<Size> & pivots, Size left, Size right, std::map<double, IonScore> & CID_nodes, const PeakSpectrum & CID_orig_spec, double precursor_weight, bool full_range = false);
98 
100  void filterDecomps_(std::vector<MassDecomposition> & decomps);
101 
103  void getDecompositions_(std::vector<MassDecomposition> & decomps, double mass, bool no_caching = false);
104 
106  static void permute_(const String& prefix, String s, std::set<String> & permutations);
107 
108  Size countMissedCleavagesTryptic_(const String & peptide) const;
109 
111  void getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, double prefix, double suffix);
112 
114  void getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, double prefix = 0.0, double suffix = 0.0);
115 
117  void getETDSpectrum_(PeakSpectrum & spec, const String &sequence, Size /* charge */, double prefix = 0.0, double suffix = 0.0);
118 
121 
123  void windowMower_(PeakSpectrum & spec, double windowsize, Size no_peaks);
124 
126  double compareSpectra_(const PeakSpectrum & s1, const PeakSpectrum & s2) const;
127 
130 
133 
135  std::map<char, const Residue *> name_to_residue_;
136 
138  std::map<const Residue *, char> residue_to_name_;
139 
141  std::map<char, double> aa_to_weight_;
142 
144 
145  double min_aa_weight_{};
146 
148 
149  std::map<Size, std::map<Size, std::set<String> > > subspec_to_sequences_;
150 
152 
154 
156 
158 
160 
161  double max_mz_;
162 
163  double min_mz_;
164 
166 
168 
170 
171  std::map<double, std::vector<MassDecomposition> > decomp_cache_;
172 
173  std::map<String, std::set<String> > permute_cache_;
174 
175  private:
177  std::map<Size, std::vector<double> > isotope_distributions_;
178 
179 public:
180 
186  class Permut
187  {
188 private:
189 
190  Permut() :
191  score_(0)
192  {
193  }
194 
195 public:
196 
197  Permut(const std::set<String>::const_iterator & permut, double s) :
198  permut_(permut),
199  score_(s)
200  {
201  }
202 
203  Permut(const Permut & rhs) = default;
204  virtual ~Permut() = default;
205 
206  Permut & operator=(const Permut & rhs)
207  {
208  if (&rhs != this)
209  {
210  permut_ = rhs.permut_;
211  score_ = rhs.score_;
212  }
213  return *this;
214  }
215 
216  const std::set<String>::const_iterator & getPermut() const
217  {
218  return permut_;
219  }
220 
221  void setPermut(const std::set<String>::const_iterator & it)
222  {
223  permut_ = it;
224  }
225 
226  double getScore() const
227  {
228  return score_;
229  }
230 
231  void setScore(double score)
232  {
233  score_ = score;
234  }
235 
236 protected:
237 
238  std::set<String>::const_iterator permut_;
239  double score_;
240  };
241 
242  };
243 
244  namespace Internal
245  {
247  }
248 }
249 
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
Simple class to store permutations and a score.
Definition: CompNovoIdentificationBase.h:187
double getScore() const
Definition: CompNovoIdentificationBase.h:226
Permut & operator=(const Permut &rhs)
Definition: CompNovoIdentificationBase.h:206
const std::set< String >::const_iterator & getPermut() const
Definition: CompNovoIdentificationBase.h:216
Permut(const std::set< String >::const_iterator &permut, double s)
Definition: CompNovoIdentificationBase.h:197
void setPermut(const std::set< String >::const_iterator &it)
Definition: CompNovoIdentificationBase.h:221
std::set< String >::const_iterator permut_
Definition: CompNovoIdentificationBase.h:238
Permut()
Definition: CompNovoIdentificationBase.h:190
double score_
Definition: CompNovoIdentificationBase.h:239
void setScore(double score)
Definition: CompNovoIdentificationBase.h:231
run with CompNovoIdentificationBase
Definition: CompNovoIdentificationBase.h:59
Size max_isotope_
Definition: CompNovoIdentificationBase.h:169
std::map< char, double > aa_to_weight_
masses of the amino acids
Definition: CompNovoIdentificationBase.h:141
Size max_number_aa_per_decomp_
Definition: CompNovoIdentificationBase.h:151
double max_mz_
Definition: CompNovoIdentificationBase.h:161
bool tryptic_only_
Definition: CompNovoIdentificationBase.h:153
void getCIDSpectrumLight_(PeakSpectrum &spec, const String &sequence, double prefix, double suffix)
fills the spec with b and y ions, no other ion types or doubly charged variants are used
void getETDSpectrum_(PeakSpectrum &spec, const String &sequence, Size, double prefix=0.0, double suffix=0.0)
fills the spectrum with c and z type ions
static void permute_(const String &prefix, String s, std::set< String > &permutations)
permutes the String s adds the prefix and stores the results in permutations
void filterPermuts_(std::set< String > &permut) const
filters the permutations
MassDecompositionAlgorithm mass_decomp_algorithm_
Definition: CompNovoIdentificationBase.h:143
Size countMissedCleavagesTryptic_(const String &peptide) const
Size max_number_pivot_
Definition: CompNovoIdentificationBase.h:157
std::map< char, const Residue * > name_to_residue_
mapping for the internal representation character to the actual residue
Definition: CompNovoIdentificationBase.h:135
CompNovoIdentificationBase & operator=(const CompNovoIdentificationBase &source)
assignment operator
String getModifiedStringFromAASequence_(const AASequence &sequence)
returns the internal representation of a given AASequence
void initIsotopeDistributions_()
initializes the score distribution pre-calculated for the use in spectrum generation
void windowMower_(PeakSpectrum &spec, double windowsize, Size no_peaks)
keep for each window of size windowsize in the m/z range of the spectrum exactly no_peaks
void selectPivotIons_(std::vector< Size > &pivots, Size left, Size right, std::map< double, IonScore > &CID_nodes, const PeakSpectrum &CID_orig_spec, double precursor_weight, bool full_range=false)
selects pivot ion of the given range using the scores given in CID_nodes
double max_decomp_weight_
Definition: CompNovoIdentificationBase.h:165
void filterDecomps_(std::vector< MassDecomposition > &decomps)
filters the decomps by the amino acid frequencies
CompNovoIonScoringBase::IonScore IonScore
Definition: CompNovoIdentificationBase.h:86
CompNovoIdentificationBase(const CompNovoIdentificationBase &source)
copy constructor
virtual void getIdentifications(std::vector< PeptideIdentification > &ids, const PeakMap &exp)=0
performs an ProteinIdentification run on a PeakMap
AASequence getModifiedAASequence_(const String &sequence)
returns a modified AASequence from a given internal representation
void getCIDSpectrum_(PeakSpectrum &spec, const String &sequence, Size charge, double prefix=0.0, double suffix=0.0)
fills the spectrum with b,y ions, multiple charged variants; if prefix and suffix weights are given,...
Size max_subscore_number_
Definition: CompNovoIdentificationBase.h:167
std::map< Size, std::map< Size, std::set< String > > > subspec_to_sequences_
Definition: CompNovoIdentificationBase.h:149
std::map< double, std::vector< MassDecomposition > > decomp_cache_
Definition: CompNovoIdentificationBase.h:171
double decomp_weights_precision_
Definition: CompNovoIdentificationBase.h:159
double compareSpectra_(const PeakSpectrum &s1, const PeakSpectrum &s2) const
compares two spectra
~CompNovoIdentificationBase() override
destructor
CompNovoIdentificationBase()
default constructor
std::map< const Residue *, char > residue_to_name_
mapping of the actual residue to the internal representing character
Definition: CompNovoIdentificationBase.h:138
void updateMembers_() override
update members method from DefaultParamHandler to update the members
void getDecompositions_(std::vector< MassDecomposition > &decomps, double mass, bool no_caching=false)
produces mass decompositions using the given mass
double min_mz_
Definition: CompNovoIdentificationBase.h:163
ZhangSimilarityScore zhang_
Definition: CompNovoIdentificationBase.h:147
double fragment_mass_tolerance_
Definition: CompNovoIdentificationBase.h:155
std::map< String, std::set< String > > permute_cache_
Definition: CompNovoIdentificationBase.h:173
std::map< Size, std::vector< double > > isotope_distributions_
Definition: CompNovoIdentificationBase.h:177
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
Mass decomposition algorithm, given a mass it suggests possible compositions.
Definition: MassDecompositionAlgorithm.h:70
A more convenient string class.
Definition: String.h:60
Similarity score of Zhang.
Definition: ZhangSimilarityScore.h:56
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool PermutScoreComparator(const CompNovoIdentificationBase::Permut &p1, const CompNovoIdentificationBase::Permut &p2)
static String suffix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:156
static String prefix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:147
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Definition: CompNovoIonScoringBase.h:68