OpenMS
Loading...
Searching...
No Matches
FragmentIndex.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: $
6// $Authors: $
7// --------------------------------------------------------------------------
8
9#pragma once
10
18
19
20#include <array>
21#include <mutex>
22#include <vector>
23#include <functional>
24#include <algorithm> // std::max (used by inline static isOpenSearchMode)
25
26namespace OpenMS
27{
34 class OPENMS_DLLAPI FragmentIndex : public DefaultParamHandler
35 {
36 public:
37
38
52 struct Peptide {
53
54 // We need a constructor in order to emplace back
55 Peptide(UInt32 protein_idx, uint32_t mod_bitmask, std::pair<uint16_t , uint16_t> sequence, float precursor_mz):
56 protein_idx(protein_idx),
57 mod_bitmask_(mod_bitmask),
58 sequence_(sequence),
59 precursor_mz_(precursor_mz)
60 {}
61
63 uint32_t mod_bitmask_;
64 std::pair<uint16_t , uint16_t> sequence_;
66 };
67
72 {
73 uint32_t num_matched_{};
74 uint32_t subset_bitmask_{};
75 float sigma_delta_{};
76 uint16_t precursor_charge_{};
77 int16_t isotope_error_{};
78 size_t peptide_idx_{};
79 };
80
81
86 {
87 std::vector<SpectrumMatch> hits_;
88
89
91
99 {
100
101 this->hits_.insert(this->hits_.end(), other.hits_.begin(), other.hits_.end());
102 return *this;
103 }
104
105 void clear()
106 {
107 hits_.clear();
108
109 }
110 };
121
128 ~FragmentIndex() override = default;
129
139 bool isBuild() const;
140
153 const std::vector<Peptide>& getPeptides() const;
154
156 Size getNumFragments() const noexcept { return fi_fragments_.size(); }
157
158#ifdef DEBUG_FRAGMENT_INDEX
182 void addSpecialPeptide(AASequence& peptide, Size source_idx);
183#endif
184
191 void build(const std::vector<FASTAFile::FASTAEntry> & fasta_entries);
192
194 void clear();
195
196
208 std::pair<size_t, size_t> getPeptidesInMassWindow(float precursor_mass,
209 const std::pair<float, float>& window) const;
210
215 static bool isOpenSearchMode(double lower_magnitude,
216 double upper_magnitude,
217 bool unit_ppm) noexcept
218 {
219 const double threshold = unit_ppm ? 1000.0 : 1.0;
220 return std::max(lower_magnitude, upper_magnitude) > threshold;
221 }
222
242 static constexpr uint32_t SNES_KIND_BIT_MASK = 1u << 31;
243 static constexpr uint32_t SNES_SLOT_MASK = ~SNES_KIND_BIT_MASK;
244
248 enum class SnesAnchor
249 {
250 NONE,
251 PROT_NTERM,
252 PROT_CTERM
253 };
254
257 static bool isSingleCMother(uint32_t mod_bitmask) noexcept
258 {
259 return (mod_bitmask & SNES_KIND_BIT_MASK) != 0;
260 }
262 static bool isSingleNMother(uint32_t mod_bitmask) noexcept
263 {
264 return (mod_bitmask & SNES_KIND_BIT_MASK) == 0;
265 }
266
272 bool isSnesMode() const noexcept { return is_snes_mode_; }
273
274
278 struct Hit
279 {
280 Hit(UInt32 peptide_idx, float fragment_mz) :
281 peptide_idx(peptide_idx),
282 fragment_mz(fragment_mz)
283 {}
284 UInt32 peptide_idx; // index in database
286 };
287
294 std::vector<Hit> query(const Peak1D& peak,
295 const std::pair<size_t,size_t>& peptide_idx_range,
296 uint16_t peak_charge);
297
305 void querySpectrum(const MSSpectrum& spectrum,
307
319 void querySpectrum(const MSSpectrum& spectrum,
320 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
322
334 const std::vector<FASTAFile::FASTAEntry>& fasta_entries) const;
335
357 int realizeSNESLength(const Peptide& mother,
358 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
359 double target_mh_plus,
360 double tolerance_lower_magnitude,
361 double tolerance_upper_magnitude,
362 bool tolerance_ppm) const;
363
372 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
373 size_t realized_length,
374 uint32_t subset_bitmask = 0) const;
375
376protected:
377
378
381 struct Fragment
382 {
383 Fragment() = default;
384 Fragment(UInt32 peptide_idx, float fragment_mz):
385 peptide_idx_(peptide_idx),
386 fragment_mz_(fragment_mz)
387 {}
388 UInt32 peptide_idx_{}; // 32 bit in sage
389 float fragment_mz_{};
390 };
391
392 bool is_build_{false};
393
394 void updateMembers_() override;
395
402 void generatePeptides(const std::vector<FASTAFile::FASTAEntry>& fasta_entries);
403
425 void generateSNESMothers_(const std::vector<FASTAFile::FASTAEntry>& fasta_entries);
426
434
440 struct ModSlot
441 {
442 uint16_t position;
443 double delta_mass;
445
446 static constexpr uint16_t NTERM_SLOT = UINT16_MAX - 1;
447 static constexpr uint16_t CTERM_SLOT = UINT16_MAX;
448 };
449
450 static constexpr size_t MAX_MOD_SLOTS = 32;
451
455
465 size_t buildModSlots_(const char* sequence, size_t seq_len, ModSlot* out_slots,
466 bool is_protein_nterm = false, bool is_protein_cterm = false) const;
467
476 std::vector<double> computeSnesSigmaDeltaSet_(bool include_prot_nterm_mods,
477 bool include_prot_cterm_mods) const;
478
480 std::array<double, 128> fixed_mod_deltas_{};
482 std::array<const ResidueModification*, 128> fixed_mod_ptrs_{};
483 double fixed_nterm_delta_{0.0};
484 double fixed_cterm_delta_{0.0};
485 const ResidueModification* fixed_nterm_mod_ptr_{nullptr};
486 const ResidueModification* fixed_cterm_mod_ptr_{nullptr};
487
489 std::array<std::vector<VarModEntry>, 128> variable_mod_table_{};
491 std::vector<VarModEntry> variable_nterm_mods_;
493 std::vector<VarModEntry> variable_cterm_mods_;
494
495 bool mod_tables_initialized_{false};
496
502 bool is_snes_mode_{false};
503
509 bool snes_enabled_{false};
510
513 std::vector<double> snes_sigma_delta_set_;
520
523 static std::array<double, 128> residue_mass_table_;
524 static std::once_flag mass_table_once_flag_;
526
529 {
530 double b_offset{0.0};
531 double y_offset{0.0};
532 double a_offset{0.0};
533 double c_offset{0.0};
534 double x_offset{0.0};
535 double z_offset{0.0};
536 };
538
552 std::vector<Fragment>& fragments,
553 const char* sequence,
554 size_t seq_len,
555 UInt32 peptide_idx,
556 double n_term_mod_mass,
557 double c_term_mod_mass,
558 const double* residue_mod_masses) const;
559
581 std::vector<Fragment>& fragments,
582 const char* sequence,
583 size_t seq_len,
584 UInt32 peptide_idx,
585 double n_term_mod_mass,
586 double c_term_mod_mass,
587 const double* residue_mod_masses,
588 bool add_b,
589 bool add_a,
590 bool add_c,
591 bool add_y,
592 bool add_x,
593 bool add_z) const;
594
595 std::vector<Peptide> fi_peptides_;
596 std::vector<Fragment> fi_fragments_;
597
600 std::vector<uint32_t> protein_lengths_;
601
604 size_t min_ion_index_{0};
605 size_t bucketsize_;
606 std::vector<float> bucket_min_mz_;
607 double precursor_mass_tolerance_lower_{20.0};
608 double precursor_mass_tolerance_upper_{20.0};
609 bool precursor_mass_tolerance_unit_ppm_{true};
611 bool fragment_mz_tolerance_unit_ppm_{true};
612private:
613
614
656 void querySpectrumSNES_(const MSSpectrum& spectrum,
657 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
659
670 const MSSpectrum& spectrum,
671 const std::pair<size_t, size_t>& candidates_range,
672 const int16_t isotope_error,
673 const uint16_t precursor_charge);
683 float precursor_mass,
685 uint16_t charge);
686
690 void trimHits(SpectrumMatchesTopN& init_hits) const;
691
692 //since we work with TheoreticalSpectrumGenerator, we must transfer some of those member variables
699
700 // SpectrumGenerator independend member variables
701 std::string digestion_enzyme_;
702 EnzymaticDigestion::Specificity enzyme_specificity_{EnzymaticDigestion::SPEC_FULL};
703
709
713
714 // Search Related member variables
715
723
725 bool isOpenSearchMode_() const noexcept
726 {
727 return isOpenSearchMode(precursor_mass_tolerance_lower_,
728 precursor_mass_tolerance_upper_,
729 precursor_mass_tolerance_unit_ppm_);
730 }
731
736 std::pair<float, float> computeMassWindow_(float precursor_mass) const;
737
738
739 };
740
741}
Representation of a peptide/protein sequence.
Definition AASequence.h:88
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Generates from a set of Fasta files a 2D-datastructure which stores all theoretical masses of all b a...
Definition FragmentIndex.h:35
void generateFragmentsForSeries_(std::vector< Fragment > &fragments, const char *sequence, size_t seq_len, UInt32 peptide_idx, double n_term_mod_mass, double c_term_mod_mass, const double *residue_mod_masses, bool add_b, bool add_a, bool add_c, bool add_y, bool add_x, bool add_z) const
size_t bucketsize_
number of fragments per outer node
Definition FragmentIndex.h:605
uint16_t min_matched_peaks_
PSM with less hits are discarded.
Definition FragmentIndex.h:716
bool add_x_ions_
Definition FragmentIndex.h:697
void generateSNESMothers_(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
SNES-mode peptide enumeration: emit Single-N + Single-C mother peptides.
AASequence reconstructRealizedSubSequence(const Peptide &mother, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, size_t realized_length, uint32_t subset_bitmask=0) const
const ResidueModification * mod_ptr
pointer to the modification (for AASequence reconstruction)
Definition FragmentIndex.h:431
bool add_a_ions_
Definition FragmentIndex.h:695
void querySpectrum(const MSSpectrum &spectrum, SpectrumMatchesTopN &sms)
: queries one complete experimental spectra against the Database. Loops over all precursor charges St...
bool add_b_ions_
Definition FragmentIndex.h:693
static bool isOpenSearchMode(double lower_magnitude, double upper_magnitude, bool unit_ppm) noexcept
Definition FragmentIndex.h:215
void queryPeaks(SpectrumMatchesTopN &candidates, const MSSpectrum &spectrum, const std::pair< size_t, size_t > &candidates_range, const int16_t isotope_error, const uint16_t precursor_charge)
queries peaks for a given experimental spectrum with a set range of potential peptides,...
static IonOffsets ion_offsets_
Definition FragmentIndex.h:537
size_t buildModSlots_(const char *sequence, size_t seq_len, ModSlot *out_slots, bool is_protein_nterm=false, bool is_protein_cterm=false) const
ResidueModification::TermSpecificity term_spec
where this mod can be applied
Definition FragmentIndex.h:432
static std::once_flag mass_table_once_flag_
Definition FragmentIndex.h:524
std::pair< float, float > computeMassWindow_(float precursor_mass) const
StringList modifications_fixed_
Modification that are one all peptides.
Definition FragmentIndex.h:710
static bool isSingleCMother(uint32_t mod_bitmask) noexcept
Definition FragmentIndex.h:257
float fragment_mz_tolerance_
Definition FragmentIndex.h:610
bool add_y_ions_
Definition FragmentIndex.h:694
std::vector< Peptide > fi_peptides_
vector of all (digested) peptides
Definition FragmentIndex.h:595
std::vector< VarModEntry > variable_cterm_mods_
Pure C-terminal variable mods (not residue-specific)
Definition FragmentIndex.h:493
size_t missed_cleavages_
number of missed cleavages
Definition FragmentIndex.h:704
float fragment_min_mz_
smallest fragment mz
Definition FragmentIndex.h:602
uint16_t min_precursor_charge_
minimal possible precursor charge (usually always 1)
Definition FragmentIndex.h:719
uint32_t max_processed_hits_
The amount of PSM that will be used. the rest is filtered out.
Definition FragmentIndex.h:722
static bool isSingleNMother(uint32_t mod_bitmask) noexcept
Definition FragmentIndex.h:262
float peptide_max_mass_
Definition FragmentIndex.h:706
void querySpectrumSNES_(const MSSpectrum &spectrum, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, SpectrumMatchesTopN &sms)
SNES-mode spectrum query (MetaMorpheus-style: byte-count + b-ion filter).
uint16_t max_fragment_charge_
The maximal possible charge of the fragments.
Definition FragmentIndex.h:721
std::pair< size_t, size_t > getPeptidesInMassWindow(float precursor_mass, const std::pair< float, float > &window) const
std::vector< double > snes_sigma_delta_set_with_prot_cterm_
Definition FragmentIndex.h:519
std::vector< Hit > query(const Peak1D &peak, const std::pair< size_t, size_t > &peptide_idx_range, uint16_t peak_charge)
Queries one peak.
void generateFragmentsLightweight_(std::vector< Fragment > &fragments, const char *sequence, size_t seq_len, UInt32 peptide_idx, double n_term_mod_mass, double c_term_mod_mass, const double *residue_mod_masses) const
bool isOpenSearchMode_() const noexcept
Instance delegate — same rule, reads the member bounds.
Definition FragmentIndex.h:725
std::vector< Fragment > fi_fragments_
vector of all theoretical fragments (b- and y- ions)
Definition FragmentIndex.h:596
void querySpectrum(const MSSpectrum &spectrum, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, SpectrumMatchesTopN &sms)
Query a spectrum against the fragment index with FASTA context.
std::vector< double > snes_sigma_delta_set_
Definition FragmentIndex.h:513
int16_t max_isotope_error_
Maximal possible isotope error (both only used for closed search)
Definition FragmentIndex.h:718
float fragment_max_mz_
largest fragment mz
Definition FragmentIndex.h:603
std::string digestion_enzyme_
Definition FragmentIndex.h:701
bool isBuild() const
Indicates whether the fragment index has been built.
size_t peptide_max_length_
Definition FragmentIndex.h:708
~FragmentIndex() override=default
Default destructor.
std::vector< double > snes_sigma_delta_set_with_prot_nterm_
Definition FragmentIndex.h:516
void generatePeptides(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Generates all peptides from given fasta entries. If Bottom-up is set to false skips digestion....
std::vector< float > bucket_min_mz_
vector of the smalles fragment mz of each bucket
Definition FragmentIndex.h:606
double delta_mass
mass delta from this modification
Definition FragmentIndex.h:430
float peptide_min_mass_
Definition FragmentIndex.h:705
void searchDifferentPrecursorRanges(const MSSpectrum &spectrum, float precursor_mass, SpectrumMatchesTopN &sms, uint16_t charge)
If closed search loops over all isotope errors. For each iteration loop over all peaks with queryPeak...
int16_t min_isotope_error_
Minimal possible isotope error.
Definition FragmentIndex.h:717
uint16_t max_precursor_charge_
maximal possible precursor charge
Definition FragmentIndex.h:720
std::vector< uint32_t > protein_lengths_
Definition FragmentIndex.h:600
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void clear()
Delete fragment index. Sets is_build=false.
StringList modifications_variable_
Variable Modification -> all possible comibnations are created.
Definition FragmentIndex.h:711
size_t max_variable_mods_per_peptide_
Definition FragmentIndex.h:712
void build(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Given a set of Fasta files, builds the Fragment Index datastructure (FID). First all fragments are so...
int realizeSNESLength(const Peptide &mother, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, double target_mh_plus, double tolerance_lower_magnitude, double tolerance_upper_magnitude, bool tolerance_ppm) const
Find the realized sub-peptide length of a SNES mother that best matches the observed precursor mass.
bool isSnesMode() const noexcept
Definition FragmentIndex.h:272
void trimHits(SpectrumMatchesTopN &init_hits) const
places the k-largest elements in the front of the input array. Inside of the k-largest elements and o...
Size getNumFragments() const noexcept
Number of theoretical fragments stored in the index (0 before build()).
Definition FragmentIndex.h:156
const std::vector< Peptide > & getPeptides() const
Returns a reference to the internal peptide container.
AASequence reconstructModifiedSequence(const Peptide &peptide, const std::vector< FASTAFile::FASTAEntry > &fasta_entries) const
Reconstruct a fully modified AASequence from a Peptide's bitmask.
std::vector< VarModEntry > variable_nterm_mods_
Pure N-terminal variable mods (not residue-specific)
Definition FragmentIndex.h:491
size_t peptide_min_length_
Definition FragmentIndex.h:707
static void initResidueMassTable_()
bool add_c_ions_
Definition FragmentIndex.h:696
FragmentIndex()
Default constructor.
SnesAnchor
Definition FragmentIndex.h:249
bool add_z_ions_
Definition FragmentIndex.h:698
static std::array< double, 128 > residue_mass_table_
Definition FragmentIndex.h:523
std::vector< double > computeSnesSigmaDeltaSet_(bool include_prot_nterm_mods, bool include_prot_cterm_mods) const
Precomputed ion-type mass offsets (from Residue::getInternalTo*Ion formulas)
Definition FragmentIndex.h:529
Match between a query peak and an entry in the DB.
Definition FragmentIndex.h:72
Entry in the per-AA variable modification lookup table.
Definition FragmentIndex.h:429
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
TermSpecificity
Position where the modification is allowed to occur.
Definition ResidueModification.h:74
uint32_t UInt32
Unsigned integer type (32bit)
Definition Types.h:33
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
One entry in the fragment index.
Definition FragmentIndex.h:382
Fragment(UInt32 peptide_idx, float fragment_mz)
Definition FragmentIndex.h:384
Definition FragmentIndex.h:279
UInt32 peptide_idx
Definition FragmentIndex.h:284
Hit(UInt32 peptide_idx, float fragment_mz)
Definition FragmentIndex.h:280
float fragment_mz
Definition FragmentIndex.h:285
A candidate modification slot for a specific peptide.
Definition FragmentIndex.h:441
const ResidueModification * mod_ptr
for AASequence reconstruction
Definition FragmentIndex.h:444
uint16_t position
residue index, or NTERM_SLOT/CTERM_SLOT
Definition FragmentIndex.h:442
double delta_mass
mass delta
Definition FragmentIndex.h:443
Compact descriptor of a peptide instance held by the FragmentIndex.
Definition FragmentIndex.h:52
std::pair< uint16_t, uint16_t > sequence_
{start, length} within the source protein sequence (start is 0-based; length in residues)
Definition FragmentIndex.h:64
uint32_t mod_bitmask_
Bitmask of active variable mod slots (0 = unmodified/fixed-only; up to 32 slots)
Definition FragmentIndex.h:63
UInt32 protein_idx
0-based index into FASTA entries provided to build(); identifies the source protein
Definition FragmentIndex.h:62
float precursor_mz_
Mono-isotopic m/z at charge 1 (M+H)+ of this peptide; used for sorting/filtering.
Definition FragmentIndex.h:65
Peptide(UInt32 protein_idx, uint32_t mod_bitmask, std::pair< uint16_t, uint16_t > sequence, float precursor_mz)
Definition FragmentIndex.h:55
container for SpectrumMatch. Also keeps count of total number of candidates and total number of match...
Definition FragmentIndex.h:86
SpectrumMatchesTopN & operator+=(const SpectrumMatchesTopN &other)
Appends the a SpectrumMatchesTopN to another one. Add the number of all matched peaks up....
Definition FragmentIndex.h:98
void clear()
Definition FragmentIndex.h:105
std::vector< SpectrumMatch > hits_
The preliminary candidates.
Definition FragmentIndex.h:87