OpenMS
Loading...
Searching...
No Matches
Biosaur2Algorithm.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <map>
16#include <vector>
17
18namespace OpenMS
19{
20
50class OPENMS_DLLAPI Biosaur2Algorithm :
52{
53public:
62 struct Hill
63 {
64 std::vector<Size> scan_indices;
65 std::vector<Size> peak_indices;
66 std::vector<double> mz_values;
67 std::vector<double> intensities;
68 std::vector<double> rt_values;
69 std::vector<double> drift_times;
70 std::vector<double> ion_mobilities;
71 double mz_weighted_mean = 0.0;
72 double rt_start = 0.0;
73 double rt_end = 0.0;
74 double rt_apex = 0.0;
75 double intensity_apex = 0.0;
76 double intensity_sum = 0.0;
77 double drift_time_median = -1.0;
78 double ion_mobility_median = -1.0;
79 Size length = 0;
80 Size hill_idx = 0;
81 };
82
91 {
92 Size hill_idx = 0;
93 Size isotope_number = 0;
94 double mass_diff_ppm = 0.0;
95 double cos_corr = 0.0;
96 };
97
107 {
108 double mz = 0.0;
109 double rt_start = 0.0;
110 double rt_end = 0.0;
111 double rt_apex = 0.0;
112 double intensity_apex = 0.0;
113 double intensity_sum = 0.0;
114 int charge = 0;
115 Size n_isotopes = 0;
116 Size n_scans = 0;
117 double mass_calib = 0.0;
118 double drift_time = -1.0;
119 double ion_mobility = -1.0;
120 std::vector<IsotopeCandidate> isotopes;
121 Size mono_hill_idx = 0;
122 };
123
130
136 void setMSData(const MSExperiment& ms_data);
137
143 void setMSData(MSExperiment&& ms_data);
144
151
157 const MSExperiment& getMSData() const;
158
169 void run(FeatureMap& feature_map);
170
196 void run(FeatureMap& feature_map,
197 std::vector<Hill>& hills,
198 std::vector<PeptideFeature>& peptide_features);
199
209 void writeTSV(const std::vector<PeptideFeature>& features, const std::string& filename) const;
210
220 void writeHills(const std::vector<Hill>& hills, const std::string& filename) const;
221
222protected:
224 void updateMembers_() override;
225
226private:
228
229
237 {
238 Size hill_index = 0;
239 Size first_scan = 0;
240 Size last_scan = 0;
241 };
242
250 {
251 Size mono_index = 0;
252 double mono_mz = 0.0;
253 int charge = 0;
254 double cos_cor_isotopes = 0.0;
255 std::vector<IsotopeCandidate> isotopes;
256 Size n_scans = 0;
257 };
258
260
262
263
271 double calculatePPM_(double mz1, double mz2) const;
272
279 double calculateMedian_(const std::vector<double>& values) const;
280
286 double cosineCorrelation1D_(const std::vector<double>& v1,
287 const std::vector<double>& v2) const;
288
296 std::pair<double, Size> checkingCosCorrelationForCarbon_(const std::vector<double>& theor_full,
297 const std::vector<double>& exp_full,
298 double thresh) const;
299
308 std::pair<std::vector<double>, Size> computeAveragine_(double neutral_mass,
309 double apex_intensity) const;
310
320 std::vector<double> meanFilter_(const std::vector<double>& data, Size window) const;
321
331 std::pair<double, double> calibrateMass_(const std::vector<double>& mass_errors, double bin_width = 0.05) const;
332
341 double htol_ppm,
342 double min_intensity,
343 double min_mz,
344 double max_mz) const;
345
354 void processTOF_(MSExperiment& exp) const;
355
362
373 void centroidPASEFData_(MSExperiment& exp, double mz_step, double pasef_tolerance) const;
374
390 std::vector<Hill> detectHills_(const MSExperiment& exp, double htol_ppm, double min_intensity, double min_mz, double max_mz, bool use_im, std::vector<double>* hill_mass_diffs = nullptr) const;
391
400 void linkScanToHills_(const MSSpectrum& spectrum,
401 Size scan_idx,
402 double htol_ppm,
403 double min_intensity,
404 double min_mz,
405 double max_mz,
406 double mz_step,
407 bool use_im_global,
408 std::vector<Hill>& hills,
409 Size& hill_idx_counter,
410 std::vector<Size>& prev_peak_to_hill,
411 const MSSpectrum*& prev_spectrum_ptr,
412 std::map<int, std::vector<int>>& prev_fast_dict,
413 std::vector<int>& prev_im_bins,
414 std::vector<double>* hill_mass_diffs) const;
415
423 std::vector<Hill> processHills_(const std::vector<Hill>& hills, Size min_length) const;
424
436 std::vector<Hill> splitHills_(const std::vector<Hill>& hills, double hvf, Size min_length) const;
437
449 Size checkIsotopeValleySplit_(const std::vector<IsotopeCandidate>& isotopes, const std::vector<Hill>& hills, double ivf) const;
450
465 std::map<int, std::pair<double, double>> performInitialIsotopeCalibration_(const std::vector<Hill>& hills,
466 double itol_ppm,
467 int min_charge,
468 int max_charge,
469 bool enable_isotope_calib) const;
470
483 double buildFastMzLookup_(const std::vector<Hill>& hills,
484 bool use_im,
485 std::map<int, std::vector<FastHillEntry>>& hills_mz_fast,
486 std::vector<int>& hill_im_bins) const;
487
507 std::vector<PatternCandidate> generateIsotopeCandidates_(const std::vector<Hill>& hills,
508 double itol_ppm,
509 int min_charge,
510 int max_charge,
511 double ivf,
512 double mz_step,
513 const std::map<int, std::vector<FastHillEntry>>& hills_mz_fast,
514 const std::map<Size, Size>& hill_idx_to_index,
515 const std::vector<int>& hill_im_bins,
516 bool use_im) const;
517
529 std::vector<PatternCandidate> applyRtFiltering_(const std::vector<PatternCandidate>& candidates,
530 const std::vector<Hill>& hills,
531 const std::map<Size, Size>& hill_idx_to_index) const;
532
544 std::map<int, std::pair<double, double>> refineIsotopeCalibration_(const std::vector<PatternCandidate>& candidates,
545 double itol_ppm,
546 bool enable_isotope_calib) const;
547
561 std::vector<PatternCandidate> filterByCalibration_(const std::vector<PatternCandidate>& candidates,
562 const std::vector<Hill>& hills,
563 const std::map<Size, Size>& hill_idx_to_index,
564 const std::map<int, std::pair<double, double>>& isotope_calib_map_ready,
565 bool enable_isotope_calib) const;
566
581 std::vector<PeptideFeature> selectNonOverlappingPatterns_(const std::vector<PatternCandidate>& filtered_ready,
582 const std::vector<Hill>& hills,
583 bool negative_mode,
584 int iuse,
585 double itol_ppm) const;
586
605 std::vector<PeptideFeature> detectIsotopePatterns_(std::vector<Hill>& hills, double itol_ppm, int min_charge, int max_charge, bool negative_mode, double ivf, int iuse, bool enable_isotope_calib, bool use_im) const;
606
619 FeatureMap convertToFeatureMap_(const std::vector<PeptideFeature>& features,
620 const std::vector<Hill>& hills) const;
621
638 void debugCheckIsotopeConsistency_(const char* stage_label,
639 double mono_mz_center,
640 double mono_rt_apex,
641 Size mono_hill_idx,
642 int charge,
643 double itol_ppm,
644 const Hill& iso_hill,
645 Size isotope_number) const;
646
659 double cosineCorrelation_(const std::vector<double>& intensities1, const std::vector<Size>& scans1,
660 const std::vector<double>& intensities2, const std::vector<Size>& scans2) const;
661
671 bool shouldThrowForMissingIM_(const MSSpectrum& spectrum) const;
672
685 void processFAIMSGroup_(double faims_cv,
686 MSExperiment& group_exp,
687 double original_paseftol,
688 std::vector<Hill>& hills_out,
689 std::vector<PeptideFeature>& features_out);
691
693
695
696 // Algorithm parameters (cached from Param for performance)
697 double mini_;
698 double minmz_;
699 double maxmz_;
700 double htol_;
701 double itol_;
702 double hvf_;
703 double ivf_;
705 int cmin_;
706 int cmax_;
707 double pasefmini_;
709 int iuse_;
715 double paseftol_;
716 double hrttol_;
717 std::string convex_hull_mode_;
720};
721
722} // namespace OpenMS
Implementation of the Biosaur2 feature detection workflow for LC-MS1 data.
Definition Biosaur2Algorithm.h:52
int cmin_
Minimum charge state to consider.
Definition Biosaur2Algorithm.h:705
double pasefmini_
Minimum intensity for PASEF/TIMS clusters after centroiding.
Definition Biosaur2Algorithm.h:707
std::vector< double > drift_times
Drift time values (TIMS data), empty if not available.
Definition Biosaur2Algorithm.h:69
void centroidProfileSpectra_(MSExperiment &exp) const
Centroid profile spectra using PeakPickerHiRes.
void writeHills(const std::vector< Hill > &hills, const std::string &filename) const
Export the detected hills as TSV for diagnostic purposes.
double buildFastMzLookup_(const std::vector< Hill > &hills, bool use_im, std::map< int, std::vector< FastHillEntry > > &hills_mz_fast, std::vector< int > &hill_im_bins) const
Build fast m/z and optional ion-mobility lookup structures for hills.
void writeTSV(const std::vector< PeptideFeature > &features, const std::string &filename) const
Export detected peptide features to a Biosaur2-compatible TSV file.
double minmz_
Minimum m/z value.
Definition Biosaur2Algorithm.h:698
double mini_
Minimum intensity threshold.
Definition Biosaur2Algorithm.h:697
std::string convex_hull_mode_
Representation of feature convex hulls ("mass_traces" vs. "bounding_box")
Definition Biosaur2Algorithm.h:717
void processFAIMSGroup_(double faims_cv, MSExperiment &group_exp, double original_paseftol, std::vector< Hill > &hills_out, std::vector< PeptideFeature > &features_out)
Process a single FAIMS compensation voltage group.
std::vector< double > mz_values
m/z values of peaks in this hill
Definition Biosaur2Algorithm.h:66
void linkScanToHills_(const MSSpectrum &spectrum, Size scan_idx, double htol_ppm, double min_intensity, double min_mz, double max_mz, double mz_step, bool use_im_global, std::vector< Hill > &hills, Size &hill_idx_counter, std::vector< Size > &prev_peak_to_hill, const MSSpectrum *&prev_spectrum_ptr, std::map< int, std::vector< int > > &prev_fast_dict, std::vector< int > &prev_im_bins, std::vector< double > *hill_mass_diffs) const
Link peaks in a single scan to existing hills or start new hills.
std::vector< double > rt_values
Retention time values corresponding to each peak.
Definition Biosaur2Algorithm.h:68
Size checkIsotopeValleySplit_(const std::vector< IsotopeCandidate > &isotopes, const std::vector< Hill > &hills, double ivf) const
Evaluate whether isotope pattern should be truncated at valley positions.
void run(FeatureMap &feature_map, std::vector< Hill > &hills, std::vector< PeptideFeature > &peptide_features)
Execute the Biosaur2 workflow on the stored MS1 experiment.
bool faims_merge_features_
Whether to merge features at different FAIMS CV values representing the same analyte.
Definition Biosaur2Algorithm.h:718
bool profile_mode_
Whether to centroid profile data using PeakPickerHiRes.
Definition Biosaur2Algorithm.h:712
std::vector< double > meanFilter_(const std::vector< double > &data, Size window) const
Apply a mean filter (moving average) to smooth data.
double htol_
Mass tolerance in ppm for hill detection.
Definition Biosaur2Algorithm.h:700
bool shouldThrowForMissingIM_(const MSSpectrum &spectrum) const
Check if missing ion mobility data should be treated as an error.
double hrttol_
Maximum RT difference between monoisotopic and isotope apex (0=disable)
Definition Biosaur2Algorithm.h:716
const MSExperiment & getMSData() const
Get const reference to MS data.
std::vector< PatternCandidate > applyRtFiltering_(const std::vector< PatternCandidate > &candidates, const std::vector< Hill > &hills, const std::map< Size, Size > &hill_idx_to_index) const
Apply RT-apex based filtering to isotope pattern candidates.
std::vector< Hill > detectHills_(const MSExperiment &exp, double htol_ppm, double min_intensity, double min_mz, double max_mz, bool use_im, std::vector< double > *hill_mass_diffs=nullptr) const
Detect hills (continuous m/z traces) in the MS experiment.
Size minlh_
Minimum number of scans required for a hill.
Definition Biosaur2Algorithm.h:704
std::vector< PeptideFeature > detectIsotopePatterns_(std::vector< Hill > &hills, double itol_ppm, int min_charge, int max_charge, bool negative_mode, double ivf, int iuse, bool enable_isotope_calib, bool use_im) const
Detect isotope patterns and assemble peptide features.
std::vector< double > ion_mobilities
Ion mobility values, empty if not available.
Definition Biosaur2Algorithm.h:70
std::map< int, std::pair< double, double > > refineIsotopeCalibration_(const std::vector< PatternCandidate > &candidates, double itol_ppm, bool enable_isotope_calib) const
Refine isotope mass calibration based on initial pattern candidates.
std::vector< Hill > splitHills_(const std::vector< Hill > &hills, double hvf, Size min_length) const
Split hills at valley positions to separate co-eluting species.
std::pair< double, Size > checkingCosCorrelationForCarbon_(const std::vector< double > &theor_full, const std::vector< double > &exp_full, double thresh) const
Check cosine correlation for averagine-based isotope intensities.
Biosaur2Algorithm()
Default constructor.
bool ignore_iso_calib_
Whether to disable automatic isotope mass calibration.
Definition Biosaur2Algorithm.h:714
std::pair< std::vector< double >, Size > computeAveragine_(double neutral_mass, double apex_intensity) const
Compute averagine-based theoretical isotope intensities.
double itol_
Mass tolerance in ppm for isotope pattern detection.
Definition Biosaur2Algorithm.h:701
std::vector< Size > scan_indices
Indices of spectra containing peaks of this hill.
Definition Biosaur2Algorithm.h:64
std::vector< PatternCandidate > generateIsotopeCandidates_(const std::vector< Hill > &hills, double itol_ppm, int min_charge, int max_charge, double ivf, double mz_step, const std::map< int, std::vector< FastHillEntry > > &hills_mz_fast, const std::map< Size, Size > &hill_idx_to_index, const std::vector< int > &hill_im_bins, bool use_im) const
Generate initial isotope pattern candidates for all monoisotopic hills.
bool tof_mode_
Whether to enable TOF-specific intensity filtering.
Definition Biosaur2Algorithm.h:711
void setMSData(const MSExperiment &ms_data)
Set the MS data used for feature detection (copy version)
void run(FeatureMap &feature_map)
Execute the Biosaur2 workflow on the stored MS1 experiment (simplified interface)
int cmax_
Maximum charge state to consider.
Definition Biosaur2Algorithm.h:706
int iuse_
Number of isotopes for intensity calculation (0=mono, -1=all, N=mono+N)
Definition Biosaur2Algorithm.h:709
std::vector< double > intensities
Intensity values of peaks in this hill.
Definition Biosaur2Algorithm.h:67
FeatureMap convertToFeatureMap_(const std::vector< PeptideFeature > &features, const std::vector< Hill > &hills) const
Convert peptide features to OpenMS FeatureMap format.
double maxmz_
Maximum m/z value.
Definition Biosaur2Algorithm.h:699
std::vector< PatternCandidate > filterByCalibration_(const std::vector< PatternCandidate > &candidates, const std::vector< Hill > &hills, const std::map< Size, Size > &hill_idx_to_index, const std::map< int, std::pair< double, double > > &isotope_calib_map_ready, bool enable_isotope_calib) const
Filter isotope pattern candidates using refined calibration and cosine checks.
std::pair< double, double > calibrateMass_(const std::vector< double > &mass_errors, double bin_width=0.05) const
Estimate mass calibration parameters from a distribution of mass errors.
double paseftol_
Ion mobility tolerance for PASEF/TIMS data (0=disable)
Definition Biosaur2Algorithm.h:715
bool use_hill_calib_
Whether to use automatic hill mass tolerance calibration.
Definition Biosaur2Algorithm.h:713
void debugCheckIsotopeConsistency_(const char *stage_label, double mono_mz_center, double mono_rt_apex, Size mono_hill_idx, int charge, double itol_ppm, const Hill &iso_hill, Size isotope_number) const
Debug helper to log obviously inconsistent isotope assignments.
std::vector< Size > peak_indices
Indices of peaks within each spectrum.
Definition Biosaur2Algorithm.h:65
Size pasefminlh_
Minimum number of points per PASEF/TIMS cluster.
Definition Biosaur2Algorithm.h:708
double ivf_
Isotope valley factor for splitting isotope patterns.
Definition Biosaur2Algorithm.h:703
double cosineCorrelation1D_(const std::vector< double > &v1, const std::vector< double > &v2) const
Compute a cosine correlation between two 1D intensity vectors.
void updateMembers_() override
Update internal member variables from parameters (called automatically when parameters change)
bool negative_mode_
Whether to use negative ion mode.
Definition Biosaur2Algorithm.h:710
MSExperiment & getMSData()
Get non-const reference to MS data.
double calculatePPM_(double mz1, double mz2) const
Calculate the mass accuracy (ppm) between two m/z values.
MSExperiment ms_data_
Input LC-MS data.
Definition Biosaur2Algorithm.h:694
void centroidPASEFData_(MSExperiment &exp, double mz_step, double pasef_tolerance) const
Centroid PASEF/TIMS spectra in joint m/z-ion mobility space.
double calculateMedian_(const std::vector< double > &values) const
Calculate the median of a vector of values.
std::vector< Hill > processHills_(const std::vector< Hill > &hills, Size min_length) const
Filter and process hills by applying length constraints and computing summary statistics.
double cosineCorrelation_(const std::vector< double > &intensities1, const std::vector< Size > &scans1, const std::vector< double > &intensities2, const std::vector< Size > &scans2) const
Compute cosine correlation between two intensity traces.
std::map< int, std::pair< double, double > > performInitialIsotopeCalibration_(const std::vector< Hill > &hills, double itol_ppm, int min_charge, int max_charge, bool enable_isotope_calib) const
Perform an initial mass calibration for isotope spacings based on raw hills.
void setMSData(MSExperiment &&ms_data)
Set the MS data used for feature detection (move version)
double computeHillMzStep_(const MSExperiment &exp, double htol_ppm, double min_intensity, double min_mz, double max_mz) const
Determine m/z binning step for hill detection.
void processTOF_(MSExperiment &exp) const
Apply TOF-specific intensity filtering.
std::vector< IsotopeCandidate > isotopes
List of associated isotope peaks.
Definition Biosaur2Algorithm.h:120
std::vector< PeptideFeature > selectNonOverlappingPatterns_(const std::vector< PatternCandidate > &filtered_ready, const std::vector< Hill > &hills, bool negative_mode, int iuse, double itol_ppm) const
Greedily select non-overlapping isotope patterns and assemble peptide features.
double hvf_
Hill valley factor for splitting hills at valleys.
Definition Biosaur2Algorithm.h:702
Lightweight index entry for fast m/z-based hill lookup.
Definition Biosaur2Algorithm.h:237
Representation of a single hill (continuous m/z trace across adjacent scans).
Definition Biosaur2Algorithm.h:63
Candidate isotope peak that can be associated with a monoisotopic hill.
Definition Biosaur2Algorithm.h:91
Internal representation of a candidate isotope pattern.
Definition Biosaur2Algorithm.h:250
Aggregated properties of a detected peptide feature.
Definition Biosaur2Algorithm.h:107
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
A container for features.
Definition FeatureMap.h:78
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19