OpenMS
Loading...
Searching...
No Matches
NeighborSeq.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow, Philipp Wang $
6// $Authors: Chris Bielow, Philipp Wang $
7// --------------------------------------------------------------------------
8
9#pragma once
10
13
14#include <vector>
15#include <map>
16
17
18
19namespace OpenMS
20{
41 class OPENMS_DLLAPI NeighborSeq
42 {
43
44 public:
63 NeighborSeq(std::vector<AASequence>&& digested_relevant_peptides);
64
73 MSSpectrum generateSpectrum(const AASequence& peptide_sequence);
74
97 static bool isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size);
108 static int computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size);
109
146 bool isNeighborPeptide(const AASequence& neighbor_candidate,
147 const double mass_tolerance_pc,
148 const bool mass_tolerance_pc_ppm,
149 const double min_shared_ion_fraction,
150 const double mz_bin_size);
151
154 {
159 int unfindable_peptides = 0;
160 int findable_no_neighbors = 0;
161 int findable_one_neighbor = 0;
162 int findable_multiple_neighbors = 0;
164
166 int total() const
167 {
168 return unfindable_peptides + findable_no_neighbors + findable_one_neighbor + findable_multiple_neighbors;
169 }
170
172 int percentOfTotal_(int count) const
173 {
174 const int t = total();
175 return (t == 0) ? 0 : count * 100 / t;
176 }
177
179 std::string unfindable() const
180 {
181 return StringUtils::toStr(unfindable_peptides) + " (" + StringUtils::toStr(percentOfTotal_(unfindable_peptides)) + "%)";
182 }
183
185 std::string noNB() const
186 {
187 return StringUtils::toStr(findable_no_neighbors) + " (" + StringUtils::toStr(percentOfTotal_(findable_no_neighbors)) + "%)";
188 }
189
191 std::string oneNB() const
192 {
193 return StringUtils::toStr(findable_one_neighbor) + " (" + StringUtils::toStr(percentOfTotal_(findable_one_neighbor)) + "%)";
194 }
195
197 std::string multiNB() const
198 {
199 return StringUtils::toStr(findable_multiple_neighbors) + " (" + StringUtils::toStr(percentOfTotal_(findable_multiple_neighbors)) + "%)";
200 }
201 };
202
205
206 protected:
211 std::map<double, std::vector<int>> createMassLookup_();
212
220 auto findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm);
221
222
223 private:
224 const std::vector<AASequence>& digested_relevant_peptides_;
225 std::map<double, std::vector<int>> mass_position_map_;
226
229
230 std::vector<int> neighbor_stats_;
231
232 }; // class NeighborSeq
233
234} // namespace OpenMS
Representation of a peptide/protein sequence.
Definition AASequence.h:88
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
Subset-neighbor peptide search: find peptides from a wider pool (typically a FASTA digest) that are s...
Definition NeighborSeq.h:42
TheoreticalSpectrumGenerator spec_gen_
for b/y ions with charge 1
Definition NeighborSeq.h:227
MSSpectrum generateSpectrum(const AASequence &peptide_sequence)
Generates a theoretical spectrum for a given peptide sequence with b/y ions at charge 1.
std::vector< int > neighbor_stats_
how many neighbors per reference peptide searched using isNeighborPeptide()?
Definition NeighborSeq.h:230
auto findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm)
Finds candidate positions based on a given mono-isotopic weight and mass tolerance.
std::map< double, std::vector< int > > createMassLookup_()
Creates a map of masses to positions from the internal relevant peptides.
static bool isNeighborSpectrum(const MSSpectrum &spec1, const MSSpectrum &spec2, const double min_shared_ion_fraction, const double mz_bin_size)
Whether two spectra share enough peaks (in mz_bin_size m/z bins) to be considered neighbors.
NeighborStats getNeighborStats() const
after calling isNeighborPeptide() multiple times, this function returns the statistics of how many ne...
static int computeSharedIonCount(const MSSpectrum &spec1, const MSSpectrum &spec2, const double &mz_bin_size)
Compute the number of shared ions between two spectra.
const std::vector< AASequence > & digested_relevant_peptides_
digested relevant peptides
Definition NeighborSeq.h:224
std::map< double, std::vector< int > > mass_position_map_
map of masses to positions in digested_relevant_peptides_
Definition NeighborSeq.h:225
bool isNeighborPeptide(const AASequence &neighbor_candidate, const double mass_tolerance_pc, const bool mass_tolerance_pc_ppm, const double min_shared_ion_fraction, const double mz_bin_size)
Whether neighbor_candidate is a spectral neighbor of any of the relevant peptides.
const Residue * x_residue_
residue for unknown amino acid
Definition NeighborSeq.h:228
NeighborSeq(std::vector< AASequence > &&digested_relevant_peptides)
Construct from a vector of "relevant" digested peptides.
Representation of an amino acid residue.
Definition Residue.h:41
Generates theoretical spectra for peptides with various options.
Definition TheoreticalSpectrumGenerator.h:45
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Statistics of how many neighbors were found per reference peptide.
Definition NeighborSeq.h:154
std::string oneNB() const
findable_one_neighbor formatted as "X (Y%)"; returns "X (0%)" when total is 0.
Definition NeighborSeq.h:191
std::string multiNB() const
findable_multiple_neighbors formatted as "X (Y%)"; returns "X (0%)" when total is 0.
Definition NeighborSeq.h:197
int percentOfTotal_(int count) const
percentage (0..100) of count relative to total; returns 0 when total is 0 (avoids integer division by...
Definition NeighborSeq.h:172
std::string unfindable() const
unfindable_peptides formatted as "X (Y%)"; returns "X (0%)" when total is 0.
Definition NeighborSeq.h:179
std::string noNB() const
findable_no_neighbors formatted as "X (Y%)"; returns "X (0%)" when total is 0.
Definition NeighborSeq.h:185
int total() const
Sum of all four categories (i.e. the number of relevant peptides registered at construction time).
Definition NeighborSeq.h:166