OpenMS
Loading...
Searching...
No Matches
PeptideAndProteinQuant.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Hendrik Weisser $
6// $Authors: Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
18
19#include <map>
20#include <string>
21#include <utility>
22
23namespace OpenMS
24{
32 class OPENMS_DLLAPI PeptideAndProteinQuant :
34 {
35public:
36
38 typedef std::map<UInt64, double> SampleAbundances;
39
42 {
44 std::map<Int, std::map<std::string, std::map<Int, std::map<UInt, double>>>> abundances;
45
47 std::map<Int, std::map<std::string, std::map<Int, UInt64>>> psm_counts;
48
51
54
56 std::set<std::string> accessions;
57
59 Size psm_count = 0;
60
62 PeptideData() = default;
63 };
64
66 typedef std::map<AASequence, PeptideData> PeptideQuant;
67
70 {
72 std::map<std::string, SampleAbundances> peptide_abundances;
73
74 std::map<std::string, SampleAbundances> peptide_psm_counts;
75
77 std::map<std::string, std::map<UInt, double>> channel_level_abundances;
78
80 std::map<std::string, UInt64> file_level_psm_counts;
81
84
87
90
92 Size psm_count = 0;
93
95 ProteinData() = default;
96 };
97
99 typedef std::map<std::string, ProteinData> ProteinQuant;
100
103 {
106
109
112
114 Size quant_proteins, too_few_peptides;
115
117 Size quant_peptides, total_peptides;
118
120 Size quant_features, total_features, blank_features, ambig_features;
121
124 n_samples(0), quant_proteins(0), too_few_peptides(0),
125 quant_peptides(0), total_peptides(0), quant_features(0),
126 total_features(0), blank_features(0), ambig_features(0) {}
127 };
128
131
134
140 void readQuantData(FeatureMap& features, const ExperimentalDesign& ed);
141
147 void readQuantData(ConsensusMap& consensus, const ExperimentalDesign& ed);
148
154 void readQuantData(std::vector<ProteinIdentification>& proteins,
156 const ExperimentalDesign& ed);
157
169
170
179
180
181 std::map<std::string, std::string> mapAccessionToLeader(const OpenMS::ProteinIdentification& proteins) const;
182
185
188
191
194 const ProteinQuant& protein_quants,
195 ProteinIdentification& proteins,
196 bool remove_unquantified = true);
197
198private:
199
202
205
208
211
215 std::map<std::pair<std::string, UInt>, size_t> sample_id_lookup_;
216
217
225
236 void quantifyFeature_(const FeatureHandle& feature,
237 size_t fraction,
238 const std::string& filename,
239 const PeptideHit& hit,
240 UInt channel_or_label);
241
250 const std::map<Int, std::map<std::string, std::map<Int, std::map<UInt, double>>>> & peptide_abundances,
251 std::tuple<size_t, std::string, size_t, UInt> & best);
252
258 template <typename T>
259 void orderBest_(const std::map<T, SampleAbundances> & abundances,
260 std::vector<T>& result)
261 {
262 typedef std::pair<Size, double> PairType;
263 std::multimap<PairType, T, std::greater<PairType> > order;
264 for (typename std::map<T, SampleAbundances>::const_iterator ab_it =
265 abundances.begin(); ab_it != abundances.end(); ++ab_it)
266 {
267 double total = 0.0;
268 for (SampleAbundances::const_iterator samp_it = ab_it->second.begin();
269 samp_it != ab_it->second.end(); ++samp_it)
270 {
271 total += samp_it->second;
272 }
273 if (total <= 0.0) continue; // not quantified
274 PairType key = std::make_pair(ab_it->second.size(), total);
275 order.insert(std::make_pair(key, ab_it->first));
276 }
277 result.clear();
278 for (typename std::multimap<PairType, T, std::greater<PairType> >::
279 iterator ord_it = order.begin(); ord_it != order.end(); ++ord_it)
280 {
281 result.push_back(ord_it->second);
282 }
283 }
284
285
286
291
300
309 std::vector<std::string> selectPeptidesForQuantification_(const std::string& protein_accession,
310 Size top_n,
311 bool fix_peptides);
312
320 double aggregateAbundances_(const std::vector<double>& abundances,
321 const std::string& method) const;
322
332 void calculateProteinAbundances_(const std::string& protein_accession,
333 const std::vector<std::string>& selected_peptides,
334 const std::string& aggregate_method,
335 Size top_n,
336 bool include_all);
337
348 void calculateFileAndChannelLevelProteinAbundances_(const std::string& protein_accession,
349 const std::vector<std::string>& selected_peptides,
350 const std::string& aggregate_method,
351 Size top_n,
352 bool include_all,
353 const std::map<std::string, std::string>& accession_to_leader);
354
361
374 std::string getAccession_(const std::set<std::string>& pep_accessions,
375 const std::map<std::string, std::string>& accession_to_leader) const;
376
383
392
401 size_t getSampleIDFromFilenameAndChannel_(const std::string& filename,
402 UInt channel_or_label) const;
403
405 void updateMembers_() override;
406
407 }; // class
408
409} // namespace
A container for consensus elements.
Definition ConsensusMap.h:67
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
Representation of a Peak2D, RichPeak2D or Feature .
Definition FeatureHandle.h:36
A container for features.
Definition FeatureMap.h:78
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition PeptideAndProteinQuant.h:34
void readQuantData(ConsensusMap &consensus, const ExperimentalDesign &ed)
Read quantitative data from a consensus map.
void annotateQuantificationsToProteins(const ProteinQuant &protein_quants, ProteinIdentification &proteins, bool remove_unquantified=true)
Annotate protein quant results as meta data to protein ids.
std::map< AASequence, PeptideData > PeptideQuant
Mapping: peptide sequence (modified) -> peptide data.
Definition PeptideAndProteinQuant.h:66
void readQuantData(FeatureMap &features, const ExperimentalDesign &ed)
Read quantitative data from a feature map.
void buildSampleIDLookup_()
(Re)build sample_id_lookup_ from experimental_design_.
ExperimentalDesign experimental_design_
Experimental design for filename/channel to sample mapping.
Definition PeptideAndProteinQuant.h:210
std::map< std::string, ProteinData > ProteinQuant
Mapping: protein accession -> protein data.
Definition PeptideAndProteinQuant.h:99
double aggregateAbundances_(const std::vector< double > &abundances, const std::string &method) const
Aggregate abundances using the specified mathematical method.
std::vector< std::string > selectPeptidesForQuantification_(const std::string &protein_accession, Size top_n, bool fix_peptides)
Select peptides for protein quantification based on filtering criteria.
void quantifyFeature_(const FeatureHandle &feature, size_t fraction, const std::string &filename, const PeptideHit &hit, UInt channel_or_label)
Gather quantitative information from a feature.
const PeptideQuant & getPeptideResults()
Get peptide abundance data.
void performIbaqNormalization_(const ProteinIdentification &proteins)
Perform iBAQ normalization on protein abundances.
std::map< std::string, std::string > mapAccessionToLeader(const OpenMS::ProteinIdentification &proteins) const
bool getBest_(const std::map< Int, std::map< std::string, std::map< Int, std::map< UInt, double > > > > &peptide_abundances, std::tuple< size_t, std::string, size_t, UInt > &best)
Determine fraction, filename, charge state, and channel of a peptide with the highest number of abund...
void countPeptides_(PeptideIdentificationList &peptides)
Count the number of identifications (best hits only) of each peptide sequence.
void quantifyPeptides(const PeptideIdentificationList &peptides=PeptideIdentificationList())
Compute peptide abundances.
PeptideQuant pep_quant_
Peptide quantification data.
Definition PeptideAndProteinQuant.h:204
void transferPeptideDataToProteins_(const ProteinIdentification &proteins)
Transfer peptide-level quantitative data to protein-level data structures.
size_t getSampleIDFromFilenameAndChannel_(const std::string &filename, UInt channel_or_label) const
Map (filename, channel) to sample using the precomputed sample_id_lookup_.
~PeptideAndProteinQuant() override
Destructor.
Definition PeptideAndProteinQuant.h:133
std::string getAccession_(const std::set< std::string > &pep_accessions, const std::map< std::string, std::string > &accession_to_leader) const
Get the "canonical" protein accession from the list of protein accessions of a peptide.
void readQuantData(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const ExperimentalDesign &ed)
Read quantitative data from identification results (for quantification via spectral counting).
void updateMembers_() override
Clear all data when parameters are set.
const ProteinQuant & getProteinResults()
Get protein abundance data.
void normalizePeptides_()
Normalize peptide abundances across samples by (multiplicative) scaling to equal medians.
void calculateFileAndChannelLevelProteinAbundances_(const std::string &protein_accession, const std::vector< std::string > &selected_peptides, const std::string &aggregate_method, Size top_n, bool include_all, const std::map< std::string, std::string > &accession_to_leader)
Calculate detailed protein abundances at channel level using selected peptides.
void quantifyProteins(const ProteinIdentification &proteins=ProteinIdentification())
Compute protein abundances.
Statistics stats_
Processing statistics for output in the end.
Definition PeptideAndProteinQuant.h:201
const Statistics & getStatistics()
Get summary statistics.
void calculateProteinAbundances_(const std::string &protein_accession, const std::vector< std::string > &selected_peptides, const std::string &aggregate_method, Size top_n, bool include_all)
Calculate protein abundances for a single protein using selected peptides.
std::map< std::pair< std::string, UInt >, size_t > sample_id_lookup_
Definition PeptideAndProteinQuant.h:215
PeptideHit getAnnotation_(PeptideIdentificationList &peptides)
Get the "canonical" annotation (a single peptide hit) of a feature/consensus feature from the associa...
void orderBest_(const std::map< T, SampleAbundances > &abundances, std::vector< T > &result)
Order keys (charges/peptides for peptide/protein quantification) according to how many samples they a...
Definition PeptideAndProteinQuant.h:259
std::map< UInt64, double > SampleAbundances
Mapping: sample ID -> abundance.
Definition PeptideAndProteinQuant.h:38
ProteinQuant prot_quant_
Protein quantification data.
Definition PeptideAndProteinQuant.h:207
PeptideAndProteinQuant()
Constructor.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition ProteinIdentification.h:55
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Quantitative and associated data for a peptide.
Definition PeptideAndProteinQuant.h:42
std::map< Int, std::map< std::string, std::map< Int, UInt64 > > > psm_counts
mapping: fraction -> filename -> charge -> abundance
Definition PeptideAndProteinQuant.h:47
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition PeptideAndProteinQuant.h:53
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition PeptideAndProteinQuant.h:50
std::map< Int, std::map< std::string, std::map< Int, std::map< UInt, double > > > > abundances
mapping: fraction -> filename -> charge -> channel/label -> abundance
Definition PeptideAndProteinQuant.h:44
std::set< std::string > accessions
protein accessions for this peptide
Definition PeptideAndProteinQuant.h:56
Quantitative and associated data for a protein.
Definition PeptideAndProteinQuant.h:70
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition PeptideAndProteinQuant.h:86
std::map< std::string, SampleAbundances > peptide_psm_counts
Definition PeptideAndProteinQuant.h:74
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition PeptideAndProteinQuant.h:83
std::map< std::string, std::map< UInt, double > > channel_level_abundances
mapping: filename -> channel/label -> abundance
Definition PeptideAndProteinQuant.h:77
std::map< std::string, SampleAbundances > peptide_abundances
mapping: peptide (unmodified) -> sample -> abundance
Definition PeptideAndProteinQuant.h:72
std::map< std::string, UInt64 > file_level_psm_counts
mapping: filename -> PSM counts
Definition PeptideAndProteinQuant.h:80
SampleAbundances total_distinct_peptides
number of distinct peptide sequences
Definition PeptideAndProteinQuant.h:89
Statistics for processing summary.
Definition PeptideAndProteinQuant.h:103
Size quant_proteins
protein statistics
Definition PeptideAndProteinQuant.h:114
Size quant_peptides
peptide statistics
Definition PeptideAndProteinQuant.h:117
Size n_samples
number of samples (or assays in mzTab terms)
Definition PeptideAndProteinQuant.h:105
Size n_fractions
number of fractions
Definition PeptideAndProteinQuant.h:108
Statistics()
constructor
Definition PeptideAndProteinQuant.h:123
Size n_ms_files
number of MS files
Definition PeptideAndProteinQuant.h:111
Size ambig_features
Definition PeptideAndProteinQuant.h:120