OpenMS
Loading...
Searching...
No Matches
Residue.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Andreas Bertsch, Jang Jang Jin$
7// --------------------------------------------------------------------------
8//
9
10#pragma once
11
17
18#include <array>
19#include <functional>
20#include <iosfwd>
21#include <set>
22#include <vector>
23
24namespace OpenMS
25{
26 class ResidueModification;
27
40 class OPENMS_DLLAPI Residue
41 {
42 friend class ResidueDB;
43
44public:
45
62
63 inline static const EmpiricalFormula& getInternalToFull()
64 {
65 static const EmpiricalFormula to_full = EmpiricalFormula("H2O");
66 return to_full;
67 }
68
69 inline static const EmpiricalFormula& getInternalToNTerm()
70 {
71 static const EmpiricalFormula to_full = EmpiricalFormula("H");
72 return to_full;
73 }
74
75 inline static const EmpiricalFormula& getInternalToCTerm()
76 {
77 static const EmpiricalFormula to_full = EmpiricalFormula("OH");
78 return to_full;
79 }
80
81 inline static const EmpiricalFormula& getInternalToAIon()
82 {
83 // Mind the "-"
84 static const EmpiricalFormula to_full =
85 getInternalToNTerm() - EmpiricalFormula("CHO");
86 return to_full;
87 }
88
89 inline static const EmpiricalFormula& getInternalToBIon()
90 {
91 // Mind the "-"
92 static const EmpiricalFormula to_full =
93 getInternalToNTerm() - EmpiricalFormula("H");
94 return to_full;
95 }
96
97 inline static const EmpiricalFormula& getInternalToCIon()
98 {
99 static const EmpiricalFormula to_full =
100 getInternalToNTerm() + EmpiricalFormula("NH2");
101 return to_full;
102 }
103
104 inline static const EmpiricalFormula& getInternalToXIon()
105 {
106 // Mind the "-"
107 static const EmpiricalFormula to_full =
108 getInternalToCTerm() + EmpiricalFormula("CO") - EmpiricalFormula("H");
109 return to_full;
110 }
111
112 inline static const EmpiricalFormula& getInternalToYIon()
113 {
114 static const EmpiricalFormula to_full =
115 getInternalToCTerm() + EmpiricalFormula("H");
116 return to_full;
117 }
118
119 inline static const EmpiricalFormula& getInternalToZIon()
120 {
121 // Mind the "-"
122 static const EmpiricalFormula to_full =
123 getInternalToCTerm() - EmpiricalFormula("NH2");
124 return to_full;
125 }
126
128 {
129 // Mind the "-"
130 static const EmpiricalFormula to_full =
131 getInternalToCTerm() - EmpiricalFormula("NH");
132 return to_full;
133 }
134
136 {
137 // Mind the "-"
138 static const EmpiricalFormula to_full =
139 getInternalToCTerm() - EmpiricalFormula("N");
140 return to_full;
141 }
142
144
148
176
178 static inline std::array<std::string_view, Residue::ResidueType::SizeOfResidueType> names_of_residuetype {
179 "full",
180 "internal",
181 "N-terminal",
182 "C-terminal",
183 "a-ion",
184 "b-ion",
185 "c-ion",
186 "x-ion",
187 "y-ion",
188 "z-ion",
189 "z+1-ion",
190 "z+2-ion",
191 "precursor-ion",
192 "b-H2O-ion",
193 "y-H2O-ion",
194 "b-NH3-ion",
195 "y-NH3-ion",
196 "Non-identified ion",
197 "unannotated"
198 };
199
201 static std::string getResidueTypeName(const ResidueType res_type);
202
206
209
211 Residue(const Residue&) = default;
212
214 Residue(Residue&&) = default;
215
216 // Detailed constructor
217 Residue(const std::string& name,
218 const std::string& three_letter_code,
219 const std::string& one_letter_code,
220 const EmpiricalFormula& formula,
221 double pka = 0,
222 double pkb = 0,
223 double pkc = -1,
224 double gb_sc = 0,
225 double gb_bb_l = 0,
226 double gb_bb_r = 0,
227 const std::set<std::string>& synonyms = std::set<std::string>());
228
230 virtual ~Residue();
232
236
238 Residue& operator=(const Residue&) = default;
239
241 Residue& operator=(Residue&&) & = default;
243
248 void setName(const std::string& name);
249
251 const std::string& getName() const;
252
254 void setSynonyms(const std::set<std::string>& synonyms);
255
257 void addSynonym(const std::string& synonym);
258
260 const std::set<std::string>& getSynonyms() const;
261
263 void setThreeLetterCode(const std::string& three_letter_code);
264
266 const std::string& getThreeLetterCode() const;
267
269 void setOneLetterCode(const std::string& one_letter_code);
270
272 const std::string& getOneLetterCode() const;
273
276
278 void setLossFormulas(const std::vector<EmpiricalFormula>&);
279
282
284 void setNTermLossFormulas(const std::vector<EmpiricalFormula>&);
285
287 const std::vector<EmpiricalFormula>& getLossFormulas() const;
288
290 const std::vector<EmpiricalFormula>& getNTermLossFormulas() const;
291
293 void setLossNames(const std::vector<std::string>& name);
294
296 void setNTermLossNames(const std::vector<std::string>& name);
297
299 void addLossName(const std::string& name);
300
302 void addNTermLossName(const std::string& name);
303
305 const std::vector<std::string>& getLossNames() const;
306
308 const std::vector<std::string>& getNTermLossNames() const;
309
311 void setFormula(const EmpiricalFormula& formula);
312
315
317 void setAverageWeight(double weight);
318
320 double getAverageWeight(ResidueType res_type = Full) const;
321
323 void setMonoWeight(double weight);
324
326 double getMonoWeight(ResidueType res_type = Full) const;
327
330
332 void setModification(const std::string& name);
333
336
340
343 void setModificationByDiffMonoMass(double diffMonoMass);
344
346 const std::string& getModificationName() const;
347
349 void setLowMassIons(const std::vector<EmpiricalFormula>& low_mass_ions);
350
352 const std::vector<EmpiricalFormula>& getLowMassIons() const;
353
355 void setResidueSets(const std::set<std::string>& residues_sets);
356
358 void addResidueSet(const std::string& residue_sets);
359
361 const std::set<std::string>& getResidueSets() const;
362
364 double getPka() const;
365
367 double getPkb() const;
368
370 double getPkc() const;
371
373 double getPiValue() const;
374
376 void setPka(double value);
377
379 void setPkb(double value);
380
382 void setPkc(double value);
383
385 double getSideChainBasicity() const;
386
388 void setSideChainBasicity(double gb_sc);
389
392
394 void setBackboneBasicityLeft(double gb_bb_l);
395
398
400 void setBackboneBasicityRight(double gb_bb_r);
402
407 bool hasNeutralLoss() const;
408
411
413 bool operator==(const Residue& residue) const;
414
416 bool operator!=(const Residue& residue) const;
417
419 bool operator==(char one_letter_code) const;
420
422 bool operator!=(char one_letter_code) const;
423
425 bool isModified() const;
426
428 bool isInResidueSet(const std::string& residue_set);
430
432 static std::string residueTypeToIonLetter(const ResidueType& res_type);
433
436 std::string toString() const;
437
439 friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const Residue& residue);
440
451
452protected:
453
455 std::string name_ = "unknown";
456
457 std::set<std::string> synonyms_;
458
460
461 std::string one_letter_code_;
462
464
466
467 double average_weight_ = 0;
468
469 double mono_weight_ = 0;
470
472 const ResidueModification* modification_ = nullptr;
473
474 // loss
475 std::vector<std::string> loss_names_;
476
477 std::vector<EmpiricalFormula> loss_formulas_;
478
479 std::vector<std::string> NTerm_loss_names_;
480
481 std::vector<EmpiricalFormula> NTerm_loss_formulas_;
482
484 std::vector<EmpiricalFormula> low_mass_ions_;
485
486 // pka values
487 double pka_ = 0;
488
489 // pkb values
490 double pkb_ = 0;
491
492 // pkc values
493 double pkc_ = -1.0;
494
496 double gb_sc_ = 0;
497
499 double gb_bb_l_ = 0;
500
502 double gb_bb_r_ = 0;
503
505 std::set<std::string> residue_sets_;
506
507 // pre-calculated residue type delta weights for more efficient weight calculation
508 static const double internal_to_full_monoweight_;
509 static const double internal_to_nterm_monoweight_;
510 static const double internal_to_cterm_monoweight_;
511 static const double internal_to_a_monoweight_;
512 static const double internal_to_b_monoweight_;
513 static const double internal_to_c_monoweight_;
514 static const double internal_to_x_monoweight_;
515 static const double internal_to_y_monoweight_;
516 static const double internal_to_z_monoweight_;
517 static const double internal_to_zp1_monoweight_;
518 static const double internal_to_zp2_monoweight_;
519 };
520
521 // write 'name threelettercode onelettercode formula'
522 OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const Residue& residue);
523
524} // namespace OpenMS
525
526namespace std
527{
530 template<>
531 struct hash<OpenMS::Residue>
532 {
533 std::size_t operator()(const OpenMS::Residue& r) const noexcept
534 {
535 std::size_t seed = 0;
536
537 // Hash name_
539
540 // Hash synonyms_ (std::set<std::string>)
541 for (const auto& syn : r.getSynonyms())
542 {
544 }
545
546 // Hash three_letter_code_
547 OpenMS::hash_combine(seed, OpenMS::fnv1a_hash_string(r.getThreeLetterCode()));
548
549 // Hash one_letter_code_
550 OpenMS::hash_combine(seed, OpenMS::fnv1a_hash_string(r.getOneLetterCode()));
551
552 // Hash formula_
553 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(r.getFormula()));
554
555 // Hash average_weight_
556 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getAverageWeight()));
557
558 // Hash mono_weight_
559 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getMonoWeight()));
560
561 // Hash modification_ (pointer comparison in operator==)
562 OpenMS::hash_combine(seed, OpenMS::hash_int(reinterpret_cast<std::uintptr_t>(r.getModification())));
563
564 // Hash loss_names_ (std::vector<std::string>)
565 for (const auto& name : r.getLossNames())
566 {
568 }
569
570 // Hash loss_formulas_ (std::vector<EmpiricalFormula>)
571 for (const auto& formula : r.getLossFormulas())
572 {
573 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(formula));
574 }
575
576 // Hash NTerm_loss_names_ (std::vector<std::string>)
577 for (const auto& name : r.getNTermLossNames())
578 {
580 }
581
582 // Hash NTerm_loss_formulas_ (std::vector<EmpiricalFormula>)
583 for (const auto& formula : r.getNTermLossFormulas())
584 {
585 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(formula));
586 }
587
588 // Hash low_mass_ions_ (std::vector<EmpiricalFormula>)
589 for (const auto& formula : r.getLowMassIons())
590 {
591 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(formula));
592 }
593
594 // Hash pka_
595 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getPka()));
596
597 // Hash pkb_
598 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getPkb()));
599
600 // Hash pkc_
601 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getPkc()));
602
603 // Hash gb_sc_
604 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getSideChainBasicity()));
605
606 // Hash gb_bb_l_
607 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getBackboneBasicityLeft()));
608
609 // Hash gb_bb_r_
610 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getBackboneBasicityRight()));
611
612 // Hash residue_sets_ (std::set<std::string>)
613 for (const auto& rs : r.getResidueSets())
614 {
616 }
617
618 return seed;
619 }
620 };
621} // namespace std
Representation of an empirical formula.
Definition EmpiricalFormula.h:62
OpenMS stores a central database of all residues in the ResidueDB. All (unmodified) residues are adde...
Definition ResidueDB.h:40
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
Representation of an amino acid residue.
Definition Residue.h:41
static const double internal_to_b_monoweight_
Definition Residue.h:512
static const double internal_to_x_monoweight_
Definition Residue.h:514
void setPkb(double value)
sets the pkb of the residue
static const EmpiricalFormula & getInternalToZp2Ion()
Definition Residue.h:135
void setResidueSets(const std::set< std::string > &residues_sets)
sets the residue sets the amino acid is contained in (e.g. Natural20)
bool hasNTermNeutralLosses() const
true if N-terminal neutral losses are set
static const double internal_to_z_monoweight_
Definition Residue.h:516
std::string toString() const
double getBackboneBasicityRight() const
returns the C-terminal direction backbone basicity
static const double internal_to_a_monoweight_
Definition Residue.h:511
static const double internal_to_zp1_monoweight_
Definition Residue.h:517
const std::vector< EmpiricalFormula > & getLowMassIons() const
returns a vector of formulas with the low mass markers of the residue
Residue()
Default constructor (needed by pyOpenMS)
std::vector< EmpiricalFormula > NTerm_loss_formulas_
Definition Residue.h:481
std::set< std::string > synonyms_
Definition Residue.h:457
bool operator==(char one_letter_code) const
equality operator for one letter code
static const double internal_to_nterm_monoweight_
Definition Residue.h:509
static const double internal_to_zp2_monoweight_
Definition Residue.h:518
void setSynonyms(const std::set< std::string > &synonyms)
sets the synonyms
bool hasNeutralLoss() const
true if the residue has neutral loss
const std::string & getName() const
returns the name of the residue
std::vector< std::string > loss_names_
Definition Residue.h:475
EmpiricalFormula getFormula(ResidueType res_type=Full) const
returns the empirical formula of the residue
const std::string & getThreeLetterCode() const
returns the name of the residue as three letter code (std::string of size 3)
double getPiValue() const
calculates the isoelectric point using the pk* values
const ResidueModification * getModification() const
returns a pointer to the modification, or a null pointer if none is set
void setPkc(double value)
sets the pkc of the residue
void addSynonym(const std::string &synonym)
adds a synonym
static const double internal_to_full_monoweight_
Definition Residue.h:508
static const double internal_to_y_monoweight_
Definition Residue.h:515
Residue & operator=(const Residue &)=default
Assignment operator.
static const double internal_to_cterm_monoweight_
Definition Residue.h:510
const std::vector< EmpiricalFormula > & getLossFormulas() const
returns the neutral loss formulas
bool operator==(const Residue &residue) const
equality operator
bool isModified() const
true if the residue is a modified one
const std::vector< std::string > & getLossNames() const
gets neutral loss name (if there is one, else returns an empty string)
bool isInResidueSet(const std::string &residue_set)
true if the residue is contained in the set
void setPka(double value)
sets the pka of the residue
std::vector< EmpiricalFormula > loss_formulas_
Definition Residue.h:477
double getPkb() const
returns the pkb of the residue
ResidueType
Definition Residue.h:153
@ CTerminal
only C-terminus
Definition Residue.h:157
@ YIon
MS:1001220 peptide bond up to the C-terminus.
Definition Residue.h:162
@ XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition Residue.h:161
@ Zp2Ion
MS:1001230 C-alpha/carbonyl carbon bond (free radical, z+2 "ion" with additional abstracted hydrogen)...
Definition Residue.h:165
@ ZIon
MS:1001230 C-alpha/carbonyl carbon bond [CID fragment].
Definition Residue.h:163
@ BIonMinusH20
MS:1001222 b ion without water.
Definition Residue.h:167
@ NTerminal
only N-terminus
Definition Residue.h:156
@ BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition Residue.h:169
@ AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition Residue.h:158
@ Precursor
MS:1001523 Precursor ion.
Definition Residue.h:166
@ YIonMinusH20
MS:1001223 y ion without water.
Definition Residue.h:168
@ NonIdentified
MS:1001240 Non-identified ion.
Definition Residue.h:171
@ BIon
MS:1001224 N-terminus up to the peptide bond.
Definition Residue.h:159
@ Zp1Ion
MS:1001230 C-alpha/carbonyl carbon bond (free radical, z+1 "ion") [main EAD fragment].
Definition Residue.h:164
@ CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition Residue.h:160
@ YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition Residue.h:170
@ Internal
internal residue, without any termini
Definition Residue.h:155
@ Unannotated
no stored annotation
Definition Residue.h:172
void setBackboneBasicityRight(double gb_bb_r)
sets the C-terminal direction backbone basicity
friend std::ostream & operator<<(std::ostream &os, const Residue &residue)
ostream iterator to write the residue to a stream
void setSideChainBasicity(double gb_sc)
sets the side chain basicity
Residue & operator=(Residue &&) &=default
Move assignment operator.
std::vector< EmpiricalFormula > low_mass_ions_
low mass markers like immonium ions
Definition Residue.h:484
void setOneLetterCode(const std::string &one_letter_code)
sets the name as one letter code (std::string of size 1)
void setModification(const ResidueModification &mod)
void setLowMassIons(const std::vector< EmpiricalFormula > &low_mass_ions)
sets the low mass marker ions as a vector of formulas
void setLossNames(const std::vector< std::string > &name)
set the neutral loss molecule name
static const EmpiricalFormula & getInternalToZp1Ion()
Definition Residue.h:127
double getSideChainBasicity() const
returns the side chain basicity
static std::string residueTypeToIonLetter(const ResidueType &res_type)
helper for mapping residue types to letters for Text annotations and labels
Residue(Residue &&)=default
Move constructor.
static const double internal_to_c_monoweight_
Definition Residue.h:513
virtual ~Residue()
Destructor.
const std::vector< EmpiricalFormula > & getNTermLossFormulas() const
returns N-terminal loss formulas
std::vector< std::string > NTerm_loss_names_
Definition Residue.h:479
double getHydrophobicity(const HydrophobicityScaleMethod scale) const
returns the hydrophobicity value of the residue
std::string one_letter_code_
Definition Residue.h:461
static const EmpiricalFormula & getInternalToYIon()
Definition Residue.h:112
void setName(const std::string &name)
sets the name of the residue
void addResidueSet(const std::string &residue_sets)
adds a residue set to the residue sets (e.g. Natural20)
void setModification(const std::string &name)
sets the modification by name; the mod should be present in ModificationsDB
const std::set< std::string > & getResidueSets() const
returns the residue sets this residue is contained in (e.g. Natural20)
static const EmpiricalFormula & getInternalToAIon()
Definition Residue.h:81
void setThreeLetterCode(const std::string &three_letter_code)
sets the name of the residue as three letter code (std::string of size 3)
Residue(const std::string &name, const std::string &three_letter_code, const std::string &one_letter_code, const EmpiricalFormula &formula, double pka=0, double pkb=0, double pkc=-1, double gb_sc=0, double gb_bb_l=0, double gb_bb_r=0, const std::set< std::string > &synonyms=std::set< std::string >())
double getBackboneBasicityLeft() const
returns the backbone basicity if located in N-terminal direction
void addLossName(const std::string &name)
add neutral loss molecule name
void setModification(const ResidueModification *mod)
sets the modification by existing ResMod (make sure it exists in ModificationsDB)
static const EmpiricalFormula & getInternalToZIon()
Definition Residue.h:119
void setModificationByDiffMonoMass(double diffMonoMass)
void setAverageWeight(double weight)
sets average weight of the residue (must be full, with N and C-terminus)
static const EmpiricalFormula & getInternalToNTerm()
Definition Residue.h:69
void addNTermLossName(const std::string &name)
adds a N-terminal loss name
double getPka() const
returns the pka of the residue
const std::vector< std::string > & getNTermLossNames() const
returns the N-terminal loss names
static const EmpiricalFormula & getInternalToFull()
Definition Residue.h:63
static const EmpiricalFormula & getInternalToCIon()
Definition Residue.h:97
bool operator!=(char one_letter_code) const
equality operator for one letter code
void addNTermLossFormula(const EmpiricalFormula &)
adds N-terminal losses
void addLossFormula(const EmpiricalFormula &)
adds a neutral loss formula
double getAverageWeight(ResidueType res_type=Full) const
returns average weight of the residue
Residue(const Residue &)=default
Copy constructor.
EmpiricalFormula internal_formula_
Definition Residue.h:465
void setNTermLossFormulas(const std::vector< EmpiricalFormula > &)
sets the N-terminal losses
EmpiricalFormula formula_
Definition Residue.h:463
const std::set< std::string > & getSynonyms() const
returns the synonyms
std::set< std::string > residue_sets_
residue sets this amino acid is contained in
Definition Residue.h:505
void setBackboneBasicityLeft(double gb_bb_l)
sets the N-terminal direction backbone basicity
static const EmpiricalFormula & getInternalToCTerm()
Definition Residue.h:75
void setNTermLossNames(const std::vector< std::string > &name)
sets the N-terminal loss names
static const EmpiricalFormula & getInternalToBIon()
Definition Residue.h:89
const std::string & getModificationName() const
returns the name (ID) of the modification, or an empty string if none is set
double getPkc() const
returns the pkc of the residue if it exists otherwise -1
void setFormula(const EmpiricalFormula &formula)
set empirical formula of the residue (must be full, with N and C-terminus)
double getMonoWeight(ResidueType res_type=Full) const
returns monoisotopic weight of the residue
static std::string getResidueTypeName(const ResidueType res_type)
returns the ion name given as a residue type
void setLossFormulas(const std::vector< EmpiricalFormula > &)
sets the neutral loss formulas
void setMonoWeight(double weight)
sets monoisotopic weight of the residue (must be full, with N and C-terminus)
bool operator!=(const Residue &residue) const
inequality operator
const std::string & getOneLetterCode() const
returns the name as one letter code (std::string of size 1)
static const EmpiricalFormula & getInternalToXIon()
Definition Residue.h:104
std::string three_letter_code_
Definition Residue.h:459
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
std::size_t hash_int(T value) noexcept
Hash for an integer type.
Definition HashUtils.h:107
void hash_combine(std::size_t &seed, std::size_t value) noexcept
Combine a hash value with additional data using golden ratio mixing.
Definition HashUtils.h:87
std::size_t hash_float(T value) noexcept
Hash for a floating point type (float or double).
Definition HashUtils.h:142
std::size_t fnv1a_hash_string(const std::string &s) noexcept
FNV-1a hash for a string.
Definition HashUtils.h:70
HydrophobicityScaleMethod
Enum for different hydrophobicity scales.
Definition CommonEnums.h:50
STL namespace.
std::size_t operator()(const OpenMS::Residue &r) const noexcept
Definition Residue.h:533