OpenMS
Loading...
Searching...
No Matches
FragmentIndex.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: $
6// $Authors: $
7// --------------------------------------------------------------------------
8
9#pragma once
10
18
19
20#include <array>
21#include <mutex>
22#include <vector>
23#include <functional>
24#include <algorithm> // std::max (used by inline static isOpenSearchMode)
25
26namespace OpenMS
27{
34 class OPENMS_DLLAPI FragmentIndex : public DefaultParamHandler
35 {
36 public:
37
38
52 struct Peptide {
53
54 // We need a constructor in order to emplace back
55 Peptide(UInt32 protein_idx, uint32_t mod_bitmask, std::pair<uint16_t , uint16_t> sequence, float precursor_mz):
56 protein_idx(protein_idx),
57 mod_bitmask_(mod_bitmask),
58 sequence_(sequence),
59 precursor_mz_(precursor_mz)
60 {}
61
63 uint32_t mod_bitmask_;
64 std::pair<uint16_t , uint16_t> sequence_;
66 };
67
72 {
73 uint32_t num_matched_{};
74 uint16_t precursor_charge_{};
75 int16_t isotope_error_{};
76 size_t peptide_idx_{};
77 };
78
79
84 {
85 std::vector<SpectrumMatch> hits_;
86
87
89
97 {
98
99 this->hits_.insert(this->hits_.end(), other.hits_.begin(), other.hits_.end());
100 return *this;
101 }
102
103 void clear()
104 {
105 hits_.clear();
106
107 }
108 };
119
126 ~FragmentIndex() override = default;
127
137 bool isBuild() const;
138
151 const std::vector<Peptide>& getPeptides() const;
152
153#ifdef DEBUG_FRAGMENT_INDEX
177 void addSpecialPeptide(AASequence& peptide, Size source_idx);
178#endif
179
186 void build(const std::vector<FASTAFile::FASTAEntry> & fasta_entries);
187
189 void clear();
190
191
203 std::pair<size_t, size_t> getPeptidesInMassWindow(float precursor_mass,
204 const std::pair<float, float>& window) const;
205
210 static bool isOpenSearchMode(double lower_magnitude,
211 double upper_magnitude,
212 bool unit_ppm) noexcept
213 {
214 const double threshold = unit_ppm ? 1000.0 : 1.0;
215 return std::max(lower_magnitude, upper_magnitude) > threshold;
216 }
217
221 struct Hit
222 {
223 Hit(UInt32 peptide_idx, float fragment_mz) :
224 peptide_idx(peptide_idx),
225 fragment_mz(fragment_mz)
226 {}
227 UInt32 peptide_idx; // index in database
229 };
230
237 std::vector<Hit> query(const Peak1D& peak,
238 const std::pair<size_t,size_t>& peptide_idx_range,
239 uint16_t peak_charge);
240
248 void querySpectrum(const MSSpectrum& spectrum,
250
262 const std::vector<FASTAFile::FASTAEntry>& fasta_entries) const;
263
264protected:
265
266
269 struct Fragment
270 {
271 Fragment() = default;
272 Fragment(UInt32 peptide_idx, float fragment_mz):
273 peptide_idx_(peptide_idx),
274 fragment_mz_(fragment_mz)
275 {}
276 UInt32 peptide_idx_{}; // 32 bit in sage
277 float fragment_mz_{};
278 };
279
280 bool is_build_{false};
281
282 void updateMembers_() override;
283
290 void generatePeptides(const std::vector<FASTAFile::FASTAEntry>& fasta_entries);
291
299
305 struct ModSlot
306 {
307 uint16_t position;
308 double delta_mass;
310
311 static constexpr uint16_t NTERM_SLOT = UINT16_MAX - 1;
312 static constexpr uint16_t CTERM_SLOT = UINT16_MAX;
313 };
314
315 static constexpr size_t MAX_MOD_SLOTS = 32;
316
320
330 size_t buildModSlots_(const char* sequence, size_t seq_len, ModSlot* out_slots,
331 bool is_protein_nterm = false, bool is_protein_cterm = false) const;
332
334 std::array<double, 128> fixed_mod_deltas_{};
336 std::array<const ResidueModification*, 128> fixed_mod_ptrs_{};
337 double fixed_nterm_delta_{0.0};
338 double fixed_cterm_delta_{0.0};
339 const ResidueModification* fixed_nterm_mod_ptr_{nullptr};
340 const ResidueModification* fixed_cterm_mod_ptr_{nullptr};
341
343 std::array<std::vector<VarModEntry>, 128> variable_mod_table_{};
345 std::vector<VarModEntry> variable_nterm_mods_;
347 std::vector<VarModEntry> variable_cterm_mods_;
348
349 bool mod_tables_initialized_{false};
350
353 static std::array<double, 128> residue_mass_table_;
354 static std::once_flag mass_table_once_flag_;
356
359 {
360 double b_offset{0.0};
361 double y_offset{0.0};
362 double a_offset{0.0};
363 double c_offset{0.0};
364 double x_offset{0.0};
365 double z_offset{0.0};
366 };
368
379 std::vector<Fragment>& fragments,
380 const char* sequence,
381 size_t seq_len,
382 UInt32 peptide_idx,
383 double n_term_mod_mass,
384 double c_term_mod_mass,
385 const double* residue_mod_masses) const;
386
387 std::vector<Peptide> fi_peptides_;
388 std::vector<Fragment> fi_fragments_;
389
392 size_t min_ion_index_{0};
393 size_t bucketsize_;
394 std::vector<float> bucket_min_mz_;
395 double precursor_mass_tolerance_lower_{20.0};
396 double precursor_mass_tolerance_upper_{20.0};
397 bool precursor_mass_tolerance_unit_ppm_{true};
399 bool fragment_mz_tolerance_unit_ppm_{true};
400private:
401
402
413 const MSSpectrum& spectrum,
414 const std::pair<size_t, size_t>& candidates_range,
415 const int16_t isotope_error,
416 const uint16_t precursor_charge);
426 float precursor_mass,
428 uint16_t charge);
429
433 void trimHits(SpectrumMatchesTopN& init_hits) const;
434
435 //since we work with TheoreticalSpectrumGenerator, we must transfer some of those member variables
442
443 // SpectrumGenerator independend member variables
444 std::string digestion_enzyme_;
445 EnzymaticDigestion::Specificity enzyme_specificity_{EnzymaticDigestion::SPEC_FULL};
446
452
456
457 // Search Related member variables
458
466
468 bool isOpenSearchMode_() const noexcept
469 {
470 return isOpenSearchMode(precursor_mass_tolerance_lower_,
471 precursor_mass_tolerance_upper_,
472 precursor_mass_tolerance_unit_ppm_);
473 }
474
479 std::pair<float, float> computeMassWindow_(float precursor_mass) const;
480
481
482 };
483
484}
Representation of a peptide/protein sequence.
Definition AASequence.h:88
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Generates from a set of Fasta files a 2D-datastructure which stores all theoretical masses of all b a...
Definition FragmentIndex.h:35
size_t bucketsize_
number of fragments per outer node
Definition FragmentIndex.h:393
uint16_t min_matched_peaks_
PSM with less hits are discarded.
Definition FragmentIndex.h:459
bool add_x_ions_
Definition FragmentIndex.h:440
const ResidueModification * mod_ptr
pointer to the modification (for AASequence reconstruction)
Definition FragmentIndex.h:296
bool add_a_ions_
Definition FragmentIndex.h:438
void querySpectrum(const MSSpectrum &spectrum, SpectrumMatchesTopN &sms)
: queries one complete experimental spectra against the Database. Loops over all precursor charges St...
bool add_b_ions_
Definition FragmentIndex.h:436
static bool isOpenSearchMode(double lower_magnitude, double upper_magnitude, bool unit_ppm) noexcept
Definition FragmentIndex.h:210
void queryPeaks(SpectrumMatchesTopN &candidates, const MSSpectrum &spectrum, const std::pair< size_t, size_t > &candidates_range, const int16_t isotope_error, const uint16_t precursor_charge)
queries peaks for a given experimental spectrum with a set range of potential peptides,...
static IonOffsets ion_offsets_
Definition FragmentIndex.h:367
size_t buildModSlots_(const char *sequence, size_t seq_len, ModSlot *out_slots, bool is_protein_nterm=false, bool is_protein_cterm=false) const
ResidueModification::TermSpecificity term_spec
where this mod can be applied
Definition FragmentIndex.h:297
static std::once_flag mass_table_once_flag_
Definition FragmentIndex.h:354
std::pair< float, float > computeMassWindow_(float precursor_mass) const
StringList modifications_fixed_
Modification that are one all peptides.
Definition FragmentIndex.h:453
float fragment_mz_tolerance_
Definition FragmentIndex.h:398
bool add_y_ions_
Definition FragmentIndex.h:437
std::vector< Peptide > fi_peptides_
vector of all (digested) peptides
Definition FragmentIndex.h:387
std::vector< VarModEntry > variable_cterm_mods_
Pure C-terminal variable mods (not residue-specific)
Definition FragmentIndex.h:347
size_t missed_cleavages_
number of missed cleavages
Definition FragmentIndex.h:447
float fragment_min_mz_
smallest fragment mz
Definition FragmentIndex.h:390
uint16_t min_precursor_charge_
minimal possible precursor charge (usually always 1)
Definition FragmentIndex.h:462
uint32_t max_processed_hits_
The amount of PSM that will be used. the rest is filtered out.
Definition FragmentIndex.h:465
float peptide_max_mass_
Definition FragmentIndex.h:449
uint16_t max_fragment_charge_
The maximal possible charge of the fragments.
Definition FragmentIndex.h:464
std::pair< size_t, size_t > getPeptidesInMassWindow(float precursor_mass, const std::pair< float, float > &window) const
std::vector< Hit > query(const Peak1D &peak, const std::pair< size_t, size_t > &peptide_idx_range, uint16_t peak_charge)
Queries one peak.
void generateFragmentsLightweight_(std::vector< Fragment > &fragments, const char *sequence, size_t seq_len, UInt32 peptide_idx, double n_term_mod_mass, double c_term_mod_mass, const double *residue_mod_masses) const
bool isOpenSearchMode_() const noexcept
Instance delegate — same rule, reads the member bounds.
Definition FragmentIndex.h:468
std::vector< Fragment > fi_fragments_
vector of all theoretical fragments (b- and y- ions)
Definition FragmentIndex.h:388
int16_t max_isotope_error_
Maximal possible isotope error (both only used for closed search)
Definition FragmentIndex.h:461
float fragment_max_mz_
largest fragment mz
Definition FragmentIndex.h:391
std::string digestion_enzyme_
Definition FragmentIndex.h:444
bool isBuild() const
Indicates whether the fragment index has been built.
size_t peptide_max_length_
Definition FragmentIndex.h:451
~FragmentIndex() override=default
Default destructor.
void generatePeptides(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Generates all peptides from given fasta entries. If Bottom-up is set to false skips digestion....
std::vector< float > bucket_min_mz_
vector of the smalles fragment mz of each bucket
Definition FragmentIndex.h:394
double delta_mass
mass delta from this modification
Definition FragmentIndex.h:295
float peptide_min_mass_
Definition FragmentIndex.h:448
void searchDifferentPrecursorRanges(const MSSpectrum &spectrum, float precursor_mass, SpectrumMatchesTopN &sms, uint16_t charge)
If closed search loops over all isotope errors. For each iteration loop over all peaks with queryPeak...
int16_t min_isotope_error_
Minimal possible isotope error.
Definition FragmentIndex.h:460
uint16_t max_precursor_charge_
maximal possible precursor charge
Definition FragmentIndex.h:463
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void clear()
Delete fragment index. Sets is_build=false.
StringList modifications_variable_
Variable Modification -> all possible comibnations are created.
Definition FragmentIndex.h:454
size_t max_variable_mods_per_peptide_
Definition FragmentIndex.h:455
void build(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Given a set of Fasta files, builds the Fragment Index datastructure (FID). First all fragments are so...
void trimHits(SpectrumMatchesTopN &init_hits) const
places the k-largest elements in the front of the input array. Inside of the k-largest elements and o...
const std::vector< Peptide > & getPeptides() const
Returns a reference to the internal peptide container.
AASequence reconstructModifiedSequence(const Peptide &peptide, const std::vector< FASTAFile::FASTAEntry > &fasta_entries) const
Reconstruct a fully modified AASequence from a Peptide's bitmask.
std::vector< VarModEntry > variable_nterm_mods_
Pure N-terminal variable mods (not residue-specific)
Definition FragmentIndex.h:345
size_t peptide_min_length_
Definition FragmentIndex.h:450
static void initResidueMassTable_()
bool add_c_ions_
Definition FragmentIndex.h:439
FragmentIndex()
Default constructor.
bool add_z_ions_
Definition FragmentIndex.h:441
static std::array< double, 128 > residue_mass_table_
Definition FragmentIndex.h:353
Precomputed ion-type mass offsets (from Residue::getInternalTo*Ion formulas)
Definition FragmentIndex.h:359
Match between a query peak and an entry in the DB.
Definition FragmentIndex.h:72
Entry in the per-AA variable modification lookup table.
Definition FragmentIndex.h:294
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
TermSpecificity
Position where the modification is allowed to occur.
Definition ResidueModification.h:74
uint32_t UInt32
Unsigned integer type (32bit)
Definition Types.h:33
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
One entry in the fragment index.
Definition FragmentIndex.h:270
Fragment(UInt32 peptide_idx, float fragment_mz)
Definition FragmentIndex.h:272
Definition FragmentIndex.h:222
UInt32 peptide_idx
Definition FragmentIndex.h:227
Hit(UInt32 peptide_idx, float fragment_mz)
Definition FragmentIndex.h:223
float fragment_mz
Definition FragmentIndex.h:228
A candidate modification slot for a specific peptide.
Definition FragmentIndex.h:306
const ResidueModification * mod_ptr
for AASequence reconstruction
Definition FragmentIndex.h:309
uint16_t position
residue index, or NTERM_SLOT/CTERM_SLOT
Definition FragmentIndex.h:307
double delta_mass
mass delta
Definition FragmentIndex.h:308
Compact descriptor of a peptide instance held by the FragmentIndex.
Definition FragmentIndex.h:52
std::pair< uint16_t, uint16_t > sequence_
{start, length} within the source protein sequence (start is 0-based; length in residues)
Definition FragmentIndex.h:64
uint32_t mod_bitmask_
Bitmask of active variable mod slots (0 = unmodified/fixed-only; up to 32 slots)
Definition FragmentIndex.h:63
UInt32 protein_idx
0-based index into FASTA entries provided to build(); identifies the source protein
Definition FragmentIndex.h:62
float precursor_mz_
Mono-isotopic m/z at charge 1 (M+H)+ of this peptide; used for sorting/filtering.
Definition FragmentIndex.h:65
Peptide(UInt32 protein_idx, uint32_t mod_bitmask, std::pair< uint16_t, uint16_t > sequence, float precursor_mz)
Definition FragmentIndex.h:55
container for SpectrumMatch. Also keeps count of total number of candidates and total number of match...
Definition FragmentIndex.h:84
SpectrumMatchesTopN & operator+=(const SpectrumMatchesTopN &other)
Appends the a SpectrumMatchesTopN to another one. Add the number of all matched peaks up....
Definition FragmentIndex.h:96
void clear()
Definition FragmentIndex.h:103
std::vector< SpectrumMatch > hits_
The preliminary candidates.
Definition FragmentIndex.h:85