OpenMS
NeighborSeq.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow, Philipp Wang $
6 // $Authors: Chris Bielow, Philipp Wang $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
13 
14 #include <vector>
15 #include <map>
16 
17 
18 
19 namespace OpenMS
20 {
29  class OPENMS_DLLAPI NeighborSeq
30  {
31 
32  public:
35  NeighborSeq(std::vector<AASequence>&& digested_relevant_peptides);
36 
45  MSSpectrum generateSpectrum(const AASequence& peptide_sequence);
46 
58  static bool isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size);
69  static int computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size);
70 
83  bool isNeighborPeptide(const AASequence& neighbor_candidate,
84  const double mass_tolerance_pc,
85  const bool mass_tolerance_pc_ppm,
86  const double min_shared_ion_fraction,
87  const double mz_bin_size);
88 
91  {
96  int unfindable_peptides = 0;
97  int findable_no_neighbors = 0;
98  int findable_one_neighbor = 0;
99  int findable_multiple_neighbors = 0;
101 
103  int total() const
104  {
105  return unfindable_peptides + findable_no_neighbors + findable_one_neighbor + findable_multiple_neighbors;
106  }
109  {
110  return String(unfindable_peptides) + " (" + unfindable_peptides * 100 / total() + "%)";
111  }
112 
114  String noNB() const
115  {
116  return String(findable_no_neighbors) + " (" + findable_no_neighbors * 100 / total() + "%)";
117  }
119  String oneNB() const
120  {
121  return String(findable_one_neighbor) + " (" + findable_one_neighbor * 100 / total() + "%)";
122  }
124  String multiNB() const
125  {
126  return String(findable_multiple_neighbors) + " (" + findable_multiple_neighbors * 100 / total() + "%)";
127  }
128  };
129 
132 
133  protected:
138  std::map<double, std::vector<int>> createMassLookup_();
139 
147  auto findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm);
148 
149 
150  private:
151  const std::vector<AASequence>& digested_relevant_peptides_;
152  std::map<double, std::vector<int>> mass_position_map_;
153 
156 
157  std::vector<int> neighbor_stats_;
158 
159  }; // class NeighborSeq
160 
161 } // namespace OpenMS
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
The Neighbor Peptide functionality is designed to find peptides (neighbors) in a given set of sequenc...
Definition: NeighborSeq.h:30
TheoreticalSpectrumGenerator spec_gen_
for b/y ions with charge 1
Definition: NeighborSeq.h:154
MSSpectrum generateSpectrum(const AASequence &peptide_sequence)
Generates a theoretical spectrum for a given peptide sequence with b/y ions at charge 1.
std::vector< int > neighbor_stats_
how many neighbors per reference peptide searched using isNeighborPeptide()?
Definition: NeighborSeq.h:157
auto findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm)
Finds candidate positions based on a given mono-isotopic weight and mass tolerance.
static bool isNeighborSpectrum(const MSSpectrum &spec1, const MSSpectrum &spec2, const double min_shared_ion_fraction, const double mz_bin_size)
Compares two spectra to determine if they share a sufficient number of ions.
NeighborStats getNeighborStats() const
after calling isNeighborPeptide() multiple times, this function returns the statistics of how many ne...
static int computeSharedIonCount(const MSSpectrum &spec1, const MSSpectrum &spec2, const double &mz_bin_size)
Compute the number of shared ions between two spectra.
const std::vector< AASequence > & digested_relevant_peptides_
digested relevant peptides
Definition: NeighborSeq.h:151
std::map< double, std::vector< int > > mass_position_map_
map of masses to positions in digested_relevant_peptides_
Definition: NeighborSeq.h:152
std::map< double, std::vector< int > > createMassLookup_()
Creates a map of masses to positions from the internal relevant peptides.
bool isNeighborPeptide(const AASequence &neighbor_candidate, const double mass_tolerance_pc, const bool mass_tolerance_pc_ppm, const double min_shared_ion_fraction, const double mz_bin_size)
Is this peptide a neighbor to one of the relevant peptides?
const Residue * x_residue_
residue for unknown amino acid
Definition: NeighborSeq.h:155
NeighborSeq(std::vector< AASequence > &&digested_relevant_peptides)
Representation of an amino acid residue.
Definition: Residue.h:40
A more convenient string class.
Definition: String.h:34
Generates theoretical spectra for peptides with various options.
Definition: TheoreticalSpectrumGenerator.h:45
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Statistics of how many neighbors were found per reference peptide.
Definition: NeighborSeq.h:91
String unfindable() const
Number of reference peptides that contain an 'X' (unknown amino acid), formatted as 'X (Y%)'.
Definition: NeighborSeq.h:108
String multiNB() const
Number of reference peptides that had multiple neighbors, formatted as 'X (Y%)'.
Definition: NeighborSeq.h:124
String oneNB() const
Number of reference peptides that had exactly one neighbor, formatted as 'X (Y%)'.
Definition: NeighborSeq.h:119
String noNB() const
Number of reference peptides that had no neighbors, formatted as 'X (Y%)'.
Definition: NeighborSeq.h:114
int total() const
Sum of all 4 categories.
Definition: NeighborSeq.h:103