OpenMS
Loading...
Searching...
No Matches
Biosaur2Algorithm.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <map>
16#include <vector>
17
18namespace OpenMS
19{
20
50class OPENMS_DLLAPI Biosaur2Algorithm :
52{
53public:
62 struct Hill
63 {
64 std::vector<Size> scan_indices;
65 std::vector<Size> peak_indices;
66 std::vector<double> mz_values;
67 std::vector<double> intensities;
68 std::vector<double> rt_values;
69 std::vector<double> drift_times;
70 std::vector<double> ion_mobilities;
71 double mz_weighted_mean = 0.0;
72 double rt_start = 0.0;
73 double rt_end = 0.0;
74 double rt_apex = 0.0;
75 double intensity_apex = 0.0;
76 double intensity_sum = 0.0;
77 double drift_time_median = -1.0;
78 double ion_mobility_median = -1.0;
79 Size length = 0;
80 Size hill_idx = 0;
81 };
82
91 {
92 Size hill_idx = 0;
93 Size isotope_number = 0;
94 double mass_diff_ppm = 0.0;
95 double cos_corr = 0.0;
96 };
97
107 {
108 double mz = 0.0;
109 double rt_start = 0.0;
110 double rt_end = 0.0;
111 double rt_apex = 0.0;
112 double intensity_apex = 0.0;
113 double intensity_sum = 0.0;
114 int charge = 0;
115 Size n_isotopes = 0;
116 Size n_scans = 0;
117 double mass_calib = 0.0;
118 double drift_time = -1.0;
119 double ion_mobility = -1.0;
120 std::vector<IsotopeCandidate> isotopes;
121 Size mono_hill_idx = 0;
122 };
123
130
136 void setMSData(const MSExperiment& ms_data);
137
143 void setMSData(MSExperiment&& ms_data);
144
151
157 const MSExperiment& getMSData() const;
158
169 void run(FeatureMap& feature_map);
170
192 void run(FeatureMap& feature_map,
193 std::vector<Hill>& hills,
194 std::vector<PeptideFeature>& peptide_features);
195
205 void writeTSV(const std::vector<PeptideFeature>& features, const String& filename) const;
206
216 void writeHills(const std::vector<Hill>& hills, const String& filename) const;
217
218protected:
220 void updateMembers_() override;
221
222private:
224
225
233 {
234 Size hill_index = 0;
235 Size first_scan = 0;
236 Size last_scan = 0;
237 };
238
246 {
247 Size mono_index = 0;
248 double mono_mz = 0.0;
249 int charge = 0;
250 double cos_cor_isotopes = 0.0;
251 std::vector<IsotopeCandidate> isotopes;
252 Size n_scans = 0;
253 };
254
256
258
259
267 double calculatePPM_(double mz1, double mz2) const;
268
275 double calculateMedian_(const std::vector<double>& values) const;
276
282 double cosineCorrelation1D_(const std::vector<double>& v1,
283 const std::vector<double>& v2) const;
284
292 std::pair<double, Size> checkingCosCorrelationForCarbon_(const std::vector<double>& theor_full,
293 const std::vector<double>& exp_full,
294 double thresh) const;
295
304 std::pair<std::vector<double>, Size> computeAveragine_(double neutral_mass,
305 double apex_intensity) const;
306
316 std::vector<double> meanFilter_(const std::vector<double>& data, Size window) const;
317
327 std::pair<double, double> calibrateMass_(const std::vector<double>& mass_errors, double bin_width = 0.05) const;
328
337 double htol_ppm,
338 double min_intensity,
339 double min_mz,
340 double max_mz) const;
341
350 void processTOF_(MSExperiment& exp) const;
351
358
369 void centroidPASEFData_(MSExperiment& exp, double mz_step, double pasef_tolerance) const;
370
386 std::vector<Hill> detectHills_(const MSExperiment& exp, double htol_ppm, double min_intensity, double min_mz, double max_mz, bool use_im, std::vector<double>* hill_mass_diffs = nullptr) const;
387
396 void linkScanToHills_(const MSSpectrum& spectrum,
397 Size scan_idx,
398 double htol_ppm,
399 double min_intensity,
400 double min_mz,
401 double max_mz,
402 double mz_step,
403 bool use_im_global,
404 std::vector<Hill>& hills,
405 Size& hill_idx_counter,
406 std::vector<Size>& prev_peak_to_hill,
407 const MSSpectrum*& prev_spectrum_ptr,
408 std::map<int, std::vector<int>>& prev_fast_dict,
409 std::vector<int>& prev_im_bins,
410 std::vector<double>* hill_mass_diffs) const;
411
419 std::vector<Hill> processHills_(const std::vector<Hill>& hills, Size min_length) const;
420
432 std::vector<Hill> splitHills_(const std::vector<Hill>& hills, double hvf, Size min_length) const;
433
445 Size checkIsotopeValleySplit_(const std::vector<IsotopeCandidate>& isotopes, const std::vector<Hill>& hills, double ivf) const;
446
461 std::map<int, std::pair<double, double>> performInitialIsotopeCalibration_(const std::vector<Hill>& hills,
462 double itol_ppm,
463 int min_charge,
464 int max_charge,
465 bool enable_isotope_calib) const;
466
479 double buildFastMzLookup_(const std::vector<Hill>& hills,
480 bool use_im,
481 std::map<int, std::vector<FastHillEntry>>& hills_mz_fast,
482 std::vector<int>& hill_im_bins) const;
483
503 std::vector<PatternCandidate> generateIsotopeCandidates_(const std::vector<Hill>& hills,
504 double itol_ppm,
505 int min_charge,
506 int max_charge,
507 double ivf,
508 double mz_step,
509 const std::map<int, std::vector<FastHillEntry>>& hills_mz_fast,
510 const std::map<Size, Size>& hill_idx_to_index,
511 const std::vector<int>& hill_im_bins,
512 bool use_im) const;
513
525 std::vector<PatternCandidate> applyRtFiltering_(const std::vector<PatternCandidate>& candidates,
526 const std::vector<Hill>& hills,
527 const std::map<Size, Size>& hill_idx_to_index) const;
528
540 std::map<int, std::pair<double, double>> refineIsotopeCalibration_(const std::vector<PatternCandidate>& candidates,
541 double itol_ppm,
542 bool enable_isotope_calib) const;
543
557 std::vector<PatternCandidate> filterByCalibration_(const std::vector<PatternCandidate>& candidates,
558 const std::vector<Hill>& hills,
559 const std::map<Size, Size>& hill_idx_to_index,
560 const std::map<int, std::pair<double, double>>& isotope_calib_map_ready,
561 bool enable_isotope_calib) const;
562
577 std::vector<PeptideFeature> selectNonOverlappingPatterns_(const std::vector<PatternCandidate>& filtered_ready,
578 const std::vector<Hill>& hills,
579 bool negative_mode,
580 int iuse,
581 double itol_ppm) const;
582
601 std::vector<PeptideFeature> detectIsotopePatterns_(std::vector<Hill>& hills, double itol_ppm, int min_charge, int max_charge, bool negative_mode, double ivf, int iuse, bool enable_isotope_calib, bool use_im) const;
602
615 FeatureMap convertToFeatureMap_(const std::vector<PeptideFeature>& features,
616 const std::vector<Hill>& hills) const;
617
634 void debugCheckIsotopeConsistency_(const char* stage_label,
635 double mono_mz_center,
636 double mono_rt_apex,
637 Size mono_hill_idx,
638 int charge,
639 double itol_ppm,
640 const Hill& iso_hill,
641 Size isotope_number) const;
642
655 double cosineCorrelation_(const std::vector<double>& intensities1, const std::vector<Size>& scans1,
656 const std::vector<double>& intensities2, const std::vector<Size>& scans2) const;
657
667 bool shouldThrowForMissingIM_(const MSSpectrum& spectrum) const;
668
681 void processFAIMSGroup_(double faims_cv,
682 MSExperiment& group_exp,
683 double original_paseftol,
684 std::vector<Hill>& hills_out,
685 std::vector<PeptideFeature>& features_out);
687
689
691
692 // Algorithm parameters (cached from Param for performance)
693 double mini_;
694 double minmz_;
695 double maxmz_;
696 double htol_;
697 double itol_;
698 double hvf_;
699 double ivf_;
701 int cmin_;
702 int cmax_;
703 double pasefmini_;
705 int iuse_;
711 double paseftol_;
712 double hrttol_;
716};
717
718} // namespace OpenMS
Implementation of the Biosaur2 feature detection workflow for LC-MS1 data.
Definition Biosaur2Algorithm.h:52
int cmin_
Minimum charge state to consider.
Definition Biosaur2Algorithm.h:701
double pasefmini_
Minimum intensity for PASEF/TIMS clusters after centroiding.
Definition Biosaur2Algorithm.h:703
std::vector< double > drift_times
Drift time values (TIMS data), empty if not available.
Definition Biosaur2Algorithm.h:69
void centroidProfileSpectra_(MSExperiment &exp) const
Centroid profile spectra using PeakPickerHiRes.
String convex_hull_mode_
Representation of feature convex hulls ("mass_traces" vs. "bounding_box")
Definition Biosaur2Algorithm.h:713
double buildFastMzLookup_(const std::vector< Hill > &hills, bool use_im, std::map< int, std::vector< FastHillEntry > > &hills_mz_fast, std::vector< int > &hill_im_bins) const
Build fast m/z and optional ion-mobility lookup structures for hills.
double minmz_
Minimum m/z value.
Definition Biosaur2Algorithm.h:694
double mini_
Minimum intensity threshold.
Definition Biosaur2Algorithm.h:693
void processFAIMSGroup_(double faims_cv, MSExperiment &group_exp, double original_paseftol, std::vector< Hill > &hills_out, std::vector< PeptideFeature > &features_out)
Process a single FAIMS compensation voltage group.
std::vector< double > mz_values
m/z values of peaks in this hill
Definition Biosaur2Algorithm.h:66
void linkScanToHills_(const MSSpectrum &spectrum, Size scan_idx, double htol_ppm, double min_intensity, double min_mz, double max_mz, double mz_step, bool use_im_global, std::vector< Hill > &hills, Size &hill_idx_counter, std::vector< Size > &prev_peak_to_hill, const MSSpectrum *&prev_spectrum_ptr, std::map< int, std::vector< int > > &prev_fast_dict, std::vector< int > &prev_im_bins, std::vector< double > *hill_mass_diffs) const
Link peaks in a single scan to existing hills or start new hills.
std::vector< double > rt_values
Retention time values corresponding to each peak.
Definition Biosaur2Algorithm.h:68
Size checkIsotopeValleySplit_(const std::vector< IsotopeCandidate > &isotopes, const std::vector< Hill > &hills, double ivf) const
Evaluate whether isotope pattern should be truncated at valley positions.
void run(FeatureMap &feature_map, std::vector< Hill > &hills, std::vector< PeptideFeature > &peptide_features)
Execute the Biosaur2 workflow on the stored MS1 experiment.
bool faims_merge_features_
Whether to merge features at different FAIMS CV values representing the same analyte.
Definition Biosaur2Algorithm.h:714
bool profile_mode_
Whether to centroid profile data using PeakPickerHiRes.
Definition Biosaur2Algorithm.h:708
std::vector< double > meanFilter_(const std::vector< double > &data, Size window) const
Apply a mean filter (moving average) to smooth data.
double htol_
Mass tolerance in ppm for hill detection.
Definition Biosaur2Algorithm.h:696
bool shouldThrowForMissingIM_(const MSSpectrum &spectrum) const
Check if missing ion mobility data should be treated as an error.
double hrttol_
Maximum RT difference between monoisotopic and isotope apex (0=disable)
Definition Biosaur2Algorithm.h:712
const MSExperiment & getMSData() const
Get const reference to MS data.
std::vector< PatternCandidate > applyRtFiltering_(const std::vector< PatternCandidate > &candidates, const std::vector< Hill > &hills, const std::map< Size, Size > &hill_idx_to_index) const
Apply RT-apex based filtering to isotope pattern candidates.
std::vector< Hill > detectHills_(const MSExperiment &exp, double htol_ppm, double min_intensity, double min_mz, double max_mz, bool use_im, std::vector< double > *hill_mass_diffs=nullptr) const
Detect hills (continuous m/z traces) in the MS experiment.
Size minlh_
Minimum number of scans required for a hill.
Definition Biosaur2Algorithm.h:700
std::vector< PeptideFeature > detectIsotopePatterns_(std::vector< Hill > &hills, double itol_ppm, int min_charge, int max_charge, bool negative_mode, double ivf, int iuse, bool enable_isotope_calib, bool use_im) const
Detect isotope patterns and assemble peptide features.
std::vector< double > ion_mobilities
Ion mobility values, empty if not available.
Definition Biosaur2Algorithm.h:70
std::map< int, std::pair< double, double > > refineIsotopeCalibration_(const std::vector< PatternCandidate > &candidates, double itol_ppm, bool enable_isotope_calib) const
Refine isotope mass calibration based on initial pattern candidates.
std::vector< Hill > splitHills_(const std::vector< Hill > &hills, double hvf, Size min_length) const
Split hills at valley positions to separate co-eluting species.
std::pair< double, Size > checkingCosCorrelationForCarbon_(const std::vector< double > &theor_full, const std::vector< double > &exp_full, double thresh) const
Check cosine correlation for averagine-based isotope intensities.
Biosaur2Algorithm()
Default constructor.
bool ignore_iso_calib_
Whether to disable automatic isotope mass calibration.
Definition Biosaur2Algorithm.h:710
std::pair< std::vector< double >, Size > computeAveragine_(double neutral_mass, double apex_intensity) const
Compute averagine-based theoretical isotope intensities.
double itol_
Mass tolerance in ppm for isotope pattern detection.
Definition Biosaur2Algorithm.h:697
std::vector< Size > scan_indices
Indices of spectra containing peaks of this hill.
Definition Biosaur2Algorithm.h:64
std::vector< PatternCandidate > generateIsotopeCandidates_(const std::vector< Hill > &hills, double itol_ppm, int min_charge, int max_charge, double ivf, double mz_step, const std::map< int, std::vector< FastHillEntry > > &hills_mz_fast, const std::map< Size, Size > &hill_idx_to_index, const std::vector< int > &hill_im_bins, bool use_im) const
Generate initial isotope pattern candidates for all monoisotopic hills.
bool tof_mode_
Whether to enable TOF-specific intensity filtering.
Definition Biosaur2Algorithm.h:707
void setMSData(const MSExperiment &ms_data)
Set the MS data used for feature detection (copy version)
void run(FeatureMap &feature_map)
Execute the Biosaur2 workflow on the stored MS1 experiment (simplified interface)
int cmax_
Maximum charge state to consider.
Definition Biosaur2Algorithm.h:702
int iuse_
Number of isotopes for intensity calculation (0=mono, -1=all, N=mono+N)
Definition Biosaur2Algorithm.h:705
std::vector< double > intensities
Intensity values of peaks in this hill.
Definition Biosaur2Algorithm.h:67
void writeHills(const std::vector< Hill > &hills, const String &filename) const
Export the detected hills as TSV for diagnostic purposes.
FeatureMap convertToFeatureMap_(const std::vector< PeptideFeature > &features, const std::vector< Hill > &hills) const
Convert peptide features to OpenMS FeatureMap format.
double maxmz_
Maximum m/z value.
Definition Biosaur2Algorithm.h:695
std::vector< PatternCandidate > filterByCalibration_(const std::vector< PatternCandidate > &candidates, const std::vector< Hill > &hills, const std::map< Size, Size > &hill_idx_to_index, const std::map< int, std::pair< double, double > > &isotope_calib_map_ready, bool enable_isotope_calib) const
Filter isotope pattern candidates using refined calibration and cosine checks.
std::pair< double, double > calibrateMass_(const std::vector< double > &mass_errors, double bin_width=0.05) const
Estimate mass calibration parameters from a distribution of mass errors.
double paseftol_
Ion mobility tolerance for PASEF/TIMS data (0=disable)
Definition Biosaur2Algorithm.h:711
bool use_hill_calib_
Whether to use automatic hill mass tolerance calibration.
Definition Biosaur2Algorithm.h:709
void debugCheckIsotopeConsistency_(const char *stage_label, double mono_mz_center, double mono_rt_apex, Size mono_hill_idx, int charge, double itol_ppm, const Hill &iso_hill, Size isotope_number) const
Debug helper to log obviously inconsistent isotope assignments.
std::vector< Size > peak_indices
Indices of peaks within each spectrum.
Definition Biosaur2Algorithm.h:65
Size pasefminlh_
Minimum number of points per PASEF/TIMS cluster.
Definition Biosaur2Algorithm.h:704
double ivf_
Isotope valley factor for splitting isotope patterns.
Definition Biosaur2Algorithm.h:699
double cosineCorrelation1D_(const std::vector< double > &v1, const std::vector< double > &v2) const
Compute a cosine correlation between two 1D intensity vectors.
void updateMembers_() override
Update internal member variables from parameters (called automatically when parameters change)
bool negative_mode_
Whether to use negative ion mode.
Definition Biosaur2Algorithm.h:706
MSExperiment & getMSData()
Get non-const reference to MS data.
double calculatePPM_(double mz1, double mz2) const
Calculate the mass accuracy (ppm) between two m/z values.
MSExperiment ms_data_
Input LC-MS data.
Definition Biosaur2Algorithm.h:690
void centroidPASEFData_(MSExperiment &exp, double mz_step, double pasef_tolerance) const
Centroid PASEF/TIMS spectra in joint m/z-ion mobility space.
double calculateMedian_(const std::vector< double > &values) const
Calculate the median of a vector of values.
void writeTSV(const std::vector< PeptideFeature > &features, const String &filename) const
Export detected peptide features to a Biosaur2-compatible TSV file.
std::vector< Hill > processHills_(const std::vector< Hill > &hills, Size min_length) const
Filter and process hills by applying length constraints and computing summary statistics.
double cosineCorrelation_(const std::vector< double > &intensities1, const std::vector< Size > &scans1, const std::vector< double > &intensities2, const std::vector< Size > &scans2) const
Compute cosine correlation between two intensity traces.
std::map< int, std::pair< double, double > > performInitialIsotopeCalibration_(const std::vector< Hill > &hills, double itol_ppm, int min_charge, int max_charge, bool enable_isotope_calib) const
Perform an initial mass calibration for isotope spacings based on raw hills.
void setMSData(MSExperiment &&ms_data)
Set the MS data used for feature detection (move version)
double computeHillMzStep_(const MSExperiment &exp, double htol_ppm, double min_intensity, double min_mz, double max_mz) const
Determine m/z binning step for hill detection.
void processTOF_(MSExperiment &exp) const
Apply TOF-specific intensity filtering.
std::vector< IsotopeCandidate > isotopes
List of associated isotope peaks.
Definition Biosaur2Algorithm.h:120
std::vector< PeptideFeature > selectNonOverlappingPatterns_(const std::vector< PatternCandidate > &filtered_ready, const std::vector< Hill > &hills, bool negative_mode, int iuse, double itol_ppm) const
Greedily select non-overlapping isotope patterns and assemble peptide features.
double hvf_
Hill valley factor for splitting hills at valleys.
Definition Biosaur2Algorithm.h:698
Lightweight index entry for fast m/z-based hill lookup.
Definition Biosaur2Algorithm.h:233
Representation of a single hill (continuous m/z trace across adjacent scans).
Definition Biosaur2Algorithm.h:63
Candidate isotope peak that can be associated with a monoisotopic hill.
Definition Biosaur2Algorithm.h:91
Internal representation of a candidate isotope pattern.
Definition Biosaur2Algorithm.h:246
Aggregated properties of a detected peptide feature.
Definition Biosaur2Algorithm.h:107
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
A container for features.
Definition FeatureMap.h:82
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
A more convenient string class.
Definition String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19