OpenMS
Loading...
Searching...
No Matches
OpenSearchModificationAnalysis.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
13
14#include <map>
15#include <vector>
16#include <unordered_map>
17#include <unordered_set>
18
19namespace OpenMS
20{
35 {
36 public:
37
40 {
41 double count = 0.0;
42 std::vector<double> masses;
43 int num_charge_states = 0;
44 };
45
48 {
49 int count;
52 std::vector<double> masses;
53 };
54
57 {
58 double delta_mass = 0.0;
59 int count = 0;
60 int unique_peptides = 0;
61 int num_charge_states = 0;
62 double percentage = 0.0;
63 String mapped_modification = "";
64 bool is_known_modification = false;
65 };
66
68 struct PTMEntry
69 {
71 double theoretical_mass = 0.0;
72 double observed_mass = 0.0;
73 double mass_deviation = 0.0;
74 int count = 0;
75 int unique_peptides = 0;
76 int num_charge_states = 0;
77 double percentage = 0.0;
78 std::map<char, int> residue_counts;
80 };
81
84 {
85 std::vector<DeltaMassEntry> entries;
86 int total_psms = 0;
87 int modified_psms = 0;
88 int unmodified_psms = 0;
89 double mean_delta_mass = 0.0;
90 double median_delta_mass = 0.0;
91 };
92
95 {
96 std::vector<PTMEntry> entries;
97 int total_modified_psms = 0;
98 int unknown_modification_psms = 0;
99 int num_unique_modifications = 0;
100 };
101
109
112 {
113 double epsilon;
114 FuzzyDoubleComparator(double eps = 1e-9) : epsilon(eps) {}
115 bool operator()(const double& a, const double& b) const
116 {
117 return std::fabs(a - b) >= epsilon && a < b;
118 }
119 };
120
122 using DeltaMassHistogram = std::map<double, double, FuzzyDoubleComparator>;
123 using DeltaMassToChargeCount = std::map<double, int, FuzzyDoubleComparator>;
124
127
130
139 std::pair<DeltaMassHistogram, DeltaMassToChargeCount>
141 bool use_smoothing = false,
142 bool debug = false) const;
143
155 std::vector<ModificationSummary>
157 const DeltaMassToChargeCount& charge_histogram,
158 PeptideIdentificationList& peptide_ids,
159 double precursor_mass_tolerance = 5.0,
160 bool precursor_mass_tolerance_unit_ppm = true,
161 const String& output_file = "") const;
162
173 std::vector<ModificationSummary>
175 double precursor_mass_tolerance = 5.0,
176 bool precursor_mass_tolerance_unit_ppm = true,
177 bool use_smoothing = false,
178 const String& output_file = "") const;
179
197 double precursor_mass_tolerance = 5.0,
198 bool precursor_mass_tolerance_unit_ppm = true,
199 bool use_smoothing = false,
200 const String& output_file = "") const;
201
217 const DeltaMassToChargeCount& charge_histogram,
218 const PeptideIdentificationList& peptide_ids,
219 double precursor_mass_tolerance = 5.0,
220 bool precursor_mass_tolerance_unit_ppm = true) const;
221
235 double precursor_mass_tolerance = 5.0,
236 bool precursor_mass_tolerance_unit_ppm = true) const;
237
249 std::map<char, int>
251 double delta_mass,
252 double tolerance = 0.01) const;
253
261 const String& output_file) const;
262
270 const String& output_file) const;
271
272 private:
273
276 static constexpr double MAX_MOD_MAPPING_TOL_ = 0.02; // Da
277
279 static constexpr double DELTA_MASS_ZERO_THRESHOLD_ = 0.05; // Da
280
282 static double gaussian_(double x, double sigma);
283
286 double sigma = 0.001);
287
290 double count_threshold = 0.0,
291 double snr = 2.0);
292
294 void writeModificationSummary_(const std::vector<ModificationSummary>& modifications,
295 const String& output_file) const;
296
298 std::map<double, String, FuzzyDoubleComparator>
300
302 String getTargetResidues_(const String& mod_name) const;
303
306 double delta_mass,
307 double tolerance) const;
308 };
309
310} // namespace OpenMS
Utility class for analyzing modification patterns in open search results.
Definition OpenSearchModificationAnalysis.h:35
std::map< char, int > residue_counts
Count per amino acid residue.
Definition OpenSearchModificationAnalysis.h:78
std::vector< ModificationSummary > mapDeltaMassesToModifications(const DeltaMassHistogram &delta_mass_histogram, const DeltaMassToChargeCount &charge_histogram, PeptideIdentificationList &peptide_ids, double precursor_mass_tolerance=5.0, bool precursor_mass_tolerance_unit_ppm=true, const String &output_file="") const
Map delta masses to known modifications and annotate peptides.
static DeltaMassHistogram smoothDeltaMassHistogram_(const DeltaMassHistogram &histogram, double sigma=0.001)
Smooth delta mass histogram using Gaussian kernel density estimation.
~OpenSearchModificationAnalysis()=default
Destructor.
int num_charge_states
Number of charge states.
Definition OpenSearchModificationAnalysis.h:51
DeltaMassStatistics delta_mass_stats
Delta mass histogram statistics.
Definition OpenSearchModificationAnalysis.h:105
void writeModificationSummary_(const std::vector< ModificationSummary > &modifications, const String &output_file) const
Write modification summary table to file.
std::vector< ModificationSummary > analyzeModifications(PeptideIdentificationList &peptide_ids, double precursor_mass_tolerance=5.0, bool precursor_mass_tolerance_unit_ppm=true, bool use_smoothing=false, const String &output_file="") const
Complete analysis workflow: analyze patterns and map to modifications.
std::vector< ModificationSummary > summaries
Legacy modification summaries.
Definition OpenSearchModificationAnalysis.h:107
std::map< double, int, FuzzyDoubleComparator > DeltaMassToChargeCount
Definition OpenSearchModificationAnalysis.h:123
std::vector< double > masses
Masses associated with the modification.
Definition OpenSearchModificationAnalysis.h:42
OpenSearchModificationAnalysis()=default
Default constructor.
String getTargetResidues_(const String &mod_name) const
Get target residues for a modification name.
PTMStatistics generatePTMStatistics(const PeptideIdentificationList &peptide_ids, double precursor_mass_tolerance=5.0, bool precursor_mass_tolerance_unit_ppm=true) const
Generate PTM statistics table with residue localization.
String name
Modification name.
Definition OpenSearchModificationAnalysis.h:50
PTMStatistics ptm_stats
Mapped PTM statistics.
Definition OpenSearchModificationAnalysis.h:106
DeltaMassStatistics generateDeltaMassStatistics(const DeltaMassHistogram &histogram, const DeltaMassToChargeCount &charge_histogram, const PeptideIdentificationList &peptide_ids, double precursor_mass_tolerance=5.0, bool precursor_mass_tolerance_unit_ppm=true) const
Generate delta mass statistics table from histogram data.
void writePTMStatistics(const PTMStatistics &stats, const String &output_file) const
Write PTM statistics to a TSV file.
std::map< double, double, FuzzyDoubleComparator > DeltaMassHistogram
Type definitions for delta mass analysis.
Definition OpenSearchModificationAnalysis.h:122
std::map< char, int > analyzeResidueFrequency(const PeptideIdentificationList &peptide_ids, double delta_mass, double tolerance=0.01) const
Analyze which amino acid residues are associated with a delta mass.
static double gaussian_(double x, double sigma)
Gaussian function for smoothing.
std::pair< DeltaMassHistogram, DeltaMassToChargeCount > analyzeDeltaMassPatterns(const PeptideIdentificationList &peptide_ids, bool use_smoothing=false, bool debug=false) const
Analyze delta mass patterns from peptide identifications.
String target_residues
Target residues for this modification.
Definition OpenSearchModificationAnalysis.h:79
std::vector< DeltaMassEntry > entries
All delta mass entries.
Definition OpenSearchModificationAnalysis.h:85
int count
Modification rate (number of occurrences)
Definition OpenSearchModificationAnalysis.h:49
OpenSearchAnalysisResult analyzeModificationsWithStatistics(PeptideIdentificationList &peptide_ids, double precursor_mass_tolerance=5.0, bool precursor_mass_tolerance_unit_ppm=true, bool use_smoothing=false, const String &output_file="") const
Complete analysis returning structured statistics tables.
int countUniquePeptides_(const PeptideIdentificationList &peptide_ids, double delta_mass, double tolerance) const
Count unique peptide sequences matching a delta mass.
static DeltaMassHistogram findPeaksInHistogram_(const DeltaMassHistogram &histogram, double count_threshold=0.0, double snr=2.0)
Find peaks in delta mass histogram based on count threshold and signal-to-noise ratio.
std::map< double, String, FuzzyDoubleComparator > buildModificationMassLookup_() const
Build lookup table mapping mass differences to known modifications.
std::vector< PTMEntry > entries
All PTM entries.
Definition OpenSearchModificationAnalysis.h:96
void writeDeltaMassStatistics(const DeltaMassStatistics &stats, const String &output_file) const
Write delta mass statistics to a TSV file.
Statistics for a single delta mass bin in the histogram.
Definition OpenSearchModificationAnalysis.h:57
Container for delta mass statistics table.
Definition OpenSearchModificationAnalysis.h:84
Stores details of a modification pattern found in the data.
Definition OpenSearchModificationAnalysis.h:40
Data structure for modification summary output.
Definition OpenSearchModificationAnalysis.h:48
Combined result of open search modification analysis.
Definition OpenSearchModificationAnalysis.h:104
Statistics for a mapped PTM.
Definition OpenSearchModificationAnalysis.h:69
Container for PTM statistics table.
Definition OpenSearchModificationAnalysis.h:95
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Comparator for approximate comparison of double values.
Definition OpenSearchModificationAnalysis.h:112
double epsilon
Definition OpenSearchModificationAnalysis.h:113
FuzzyDoubleComparator(double eps=1e-9)
Definition OpenSearchModificationAnalysis.h:114
bool operator()(const double &a, const double &b) const
Definition OpenSearchModificationAnalysis.h:115