OpenMS
Loading...
Searching...
No Matches
PeptideAndProteinQuant.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Hendrik Weisser $
6// $Authors: Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
18
19#include <map>
20#include <string>
21#include <unordered_map>
22#include <utility>
23#include <vector>
24
25namespace OpenMS
26{
34 class OPENMS_DLLAPI PeptideAndProteinQuant :
36 {
37public:
38
40 typedef std::map<UInt64, double> SampleAbundances;
41
44 {
46 std::map<Int, std::map<std::string, std::map<Int, std::map<UInt, double>>>> abundances;
47
49 std::map<Int, std::map<std::string, std::map<Int, UInt64>>> psm_counts;
50
53
56
58 std::set<std::string> accessions;
59
61 Size psm_count = 0;
62
64 PeptideData() = default;
65 };
66
68 typedef std::map<AASequence, PeptideData> PeptideQuant;
69
72 {
74 std::map<std::string, SampleAbundances> peptide_abundances;
75
76 std::map<std::string, SampleAbundances> peptide_psm_counts;
77
79 std::map<std::string, std::map<UInt, double>> channel_level_abundances;
80
82 std::map<std::string, UInt64> file_level_psm_counts;
83
86
89
92
94 Size psm_count = 0;
95
97 ProteinData() = default;
98 };
99
101 typedef std::map<std::string, ProteinData> ProteinQuant;
102
105 {
108
111
114
116 Size quant_proteins, too_few_peptides;
117
119 Size quant_peptides, total_peptides;
120
122 Size quant_features, total_features, blank_features, ambig_features;
123
126 n_samples(0), quant_proteins(0), too_few_peptides(0),
127 quant_peptides(0), total_peptides(0), quant_features(0),
128 total_features(0), blank_features(0), ambig_features(0) {}
129 };
130
133
136
142 void readQuantData(FeatureMap& features, const ExperimentalDesign& ed);
143
149 void readQuantData(ConsensusMap& consensus, const ExperimentalDesign& ed);
150
156 void readQuantData(std::vector<ProteinIdentification>& proteins,
158 const ExperimentalDesign& ed);
159
171
172
181
182
183 std::map<std::string, std::string> mapAccessionToLeader(const OpenMS::ProteinIdentification& proteins) const;
184
187
190
193
196 const ProteinQuant& protein_quants,
197 ProteinIdentification& proteins,
198 bool remove_unquantified = true);
199
200private:
201
206 {
207 std::size_t operator()(const std::pair<std::string, UInt>& p) const noexcept
208 {
209 const std::size_t h1 = std::hash<std::string>{}(p.first);
210 const std::size_t h2 = std::hash<UInt>{}(p.second);
211 // boost-style hash_combine
212 return h1 ^ (h2 + 0x9e3779b97f4a7c15ULL + (h1 << 6) + (h1 >> 2));
213 }
214 };
215
222 typedef std::unordered_map<std::string, std::vector<const PeptideQuant::value_type*>> UnmodifiedToEntriesIndex;
223
226
229
232
235
240 std::unordered_map<std::pair<std::string, UInt>, size_t, FileLabelHash> sample_id_lookup_;
241
242
250
261 void quantifyFeature_(const FeatureHandle& feature,
262 size_t fraction,
263 const std::string& filename,
264 const PeptideHit& hit,
265 UInt channel_or_label);
266
275 const std::map<Int, std::map<std::string, std::map<Int, std::map<UInt, double>>>> & peptide_abundances,
276 std::tuple<size_t, std::string, size_t, UInt> & best);
277
283 template <typename T>
284 void orderBest_(const std::map<T, SampleAbundances> & abundances,
285 std::vector<T>& result)
286 {
287 typedef std::pair<Size, double> PairType;
288 std::multimap<PairType, T, std::greater<PairType> > order;
289 for (typename std::map<T, SampleAbundances>::const_iterator ab_it =
290 abundances.begin(); ab_it != abundances.end(); ++ab_it)
291 {
292 double total = 0.0;
293 for (SampleAbundances::const_iterator samp_it = ab_it->second.begin();
294 samp_it != ab_it->second.end(); ++samp_it)
295 {
296 total += samp_it->second;
297 }
298 if (total <= 0.0) continue; // not quantified
299 PairType key = std::make_pair(ab_it->second.size(), total);
300 order.insert(std::make_pair(key, ab_it->first));
301 }
302 result.clear();
303 for (typename std::multimap<PairType, T, std::greater<PairType> >::
304 iterator ord_it = order.begin(); ord_it != order.end(); ++ord_it)
305 {
306 result.push_back(ord_it->second);
307 }
308 }
309
310
311
316
325
334 std::vector<std::string> selectPeptidesForQuantification_(const std::string& protein_accession,
335 Size top_n,
336 bool fix_peptides);
337
345 double aggregateAbundances_(const std::vector<double>& abundances,
346 const std::string& method) const;
347
357 void calculateProteinAbundances_(const std::string& protein_accession,
358 const std::vector<std::string>& selected_peptides,
359 const std::string& aggregate_method,
360 Size top_n,
361 bool include_all);
362
374 void calculateFileAndChannelLevelProteinAbundances_(const std::string& protein_accession,
375 const std::vector<std::string>& selected_peptides,
376 const std::string& aggregate_method,
377 Size top_n,
378 bool include_all,
379 const std::map<std::string, std::string>& accession_to_leader,
380 const UnmodifiedToEntriesIndex& unmod_to_entries);
381
388
401 std::string getAccession_(const std::set<std::string>& pep_accessions,
402 const std::map<std::string, std::string>& accession_to_leader) const;
403
410
419
428 size_t getSampleIDFromFilenameAndChannel_(const std::string& filename,
429 UInt channel_or_label) const;
430
432 void updateMembers_() override;
433
434 }; // class
435
436} // namespace
A container for consensus elements.
Definition ConsensusMap.h:67
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
Representation of a Peak2D, RichPeak2D or Feature .
Definition FeatureHandle.h:36
A container for features.
Definition FeatureMap.h:78
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition PeptideAndProteinQuant.h:36
void readQuantData(ConsensusMap &consensus, const ExperimentalDesign &ed)
Read quantitative data from a consensus map.
void annotateQuantificationsToProteins(const ProteinQuant &protein_quants, ProteinIdentification &proteins, bool remove_unquantified=true)
Annotate protein quant results as meta data to protein ids.
std::map< AASequence, PeptideData > PeptideQuant
Mapping: peptide sequence (modified) -> peptide data.
Definition PeptideAndProteinQuant.h:68
void readQuantData(FeatureMap &features, const ExperimentalDesign &ed)
Read quantitative data from a feature map.
void buildSampleIDLookup_()
(Re)build sample_id_lookup_ from experimental_design_.
std::unordered_map< std::string, std::vector< const PeptideQuant::value_type * > > UnmodifiedToEntriesIndex
Definition PeptideAndProteinQuant.h:222
ExperimentalDesign experimental_design_
Experimental design for filename/channel to sample mapping.
Definition PeptideAndProteinQuant.h:234
std::map< std::string, ProteinData > ProteinQuant
Mapping: protein accession -> protein data.
Definition PeptideAndProteinQuant.h:101
void calculateFileAndChannelLevelProteinAbundances_(const std::string &protein_accession, const std::vector< std::string > &selected_peptides, const std::string &aggregate_method, Size top_n, bool include_all, const std::map< std::string, std::string > &accession_to_leader, const UnmodifiedToEntriesIndex &unmod_to_entries)
Calculate detailed protein abundances at channel level using selected peptides.
double aggregateAbundances_(const std::vector< double > &abundances, const std::string &method) const
Aggregate abundances using the specified mathematical method.
std::vector< std::string > selectPeptidesForQuantification_(const std::string &protein_accession, Size top_n, bool fix_peptides)
Select peptides for protein quantification based on filtering criteria.
void quantifyFeature_(const FeatureHandle &feature, size_t fraction, const std::string &filename, const PeptideHit &hit, UInt channel_or_label)
Gather quantitative information from a feature.
const PeptideQuant & getPeptideResults()
Get peptide abundance data.
void performIbaqNormalization_(const ProteinIdentification &proteins)
Perform iBAQ normalization on protein abundances.
std::map< std::string, std::string > mapAccessionToLeader(const OpenMS::ProteinIdentification &proteins) const
bool getBest_(const std::map< Int, std::map< std::string, std::map< Int, std::map< UInt, double > > > > &peptide_abundances, std::tuple< size_t, std::string, size_t, UInt > &best)
Determine fraction, filename, charge state, and channel of a peptide with the highest number of abund...
void countPeptides_(PeptideIdentificationList &peptides)
Count the number of identifications (best hits only) of each peptide sequence.
void quantifyPeptides(const PeptideIdentificationList &peptides=PeptideIdentificationList())
Compute peptide abundances.
PeptideQuant pep_quant_
Peptide quantification data.
Definition PeptideAndProteinQuant.h:228
void transferPeptideDataToProteins_(const ProteinIdentification &proteins)
Transfer peptide-level quantitative data to protein-level data structures.
size_t getSampleIDFromFilenameAndChannel_(const std::string &filename, UInt channel_or_label) const
Map (filename, channel) to sample using the precomputed sample_id_lookup_.
~PeptideAndProteinQuant() override
Destructor.
Definition PeptideAndProteinQuant.h:135
std::string getAccession_(const std::set< std::string > &pep_accessions, const std::map< std::string, std::string > &accession_to_leader) const
Get the "canonical" protein accession from the list of protein accessions of a peptide.
void readQuantData(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const ExperimentalDesign &ed)
Read quantitative data from identification results (for quantification via spectral counting).
void updateMembers_() override
Clear all data when parameters are set.
const ProteinQuant & getProteinResults()
Get protein abundance data.
void normalizePeptides_()
Normalize peptide abundances across samples by (multiplicative) scaling to equal medians.
void quantifyProteins(const ProteinIdentification &proteins=ProteinIdentification())
Compute protein abundances.
Statistics stats_
Processing statistics for output in the end.
Definition PeptideAndProteinQuant.h:225
const Statistics & getStatistics()
Get summary statistics.
void calculateProteinAbundances_(const std::string &protein_accession, const std::vector< std::string > &selected_peptides, const std::string &aggregate_method, Size top_n, bool include_all)
Calculate protein abundances for a single protein using selected peptides.
PeptideHit getAnnotation_(PeptideIdentificationList &peptides)
Get the "canonical" annotation (a single peptide hit) of a feature/consensus feature from the associa...
void orderBest_(const std::map< T, SampleAbundances > &abundances, std::vector< T > &result)
Order keys (charges/peptides for peptide/protein quantification) according to how many samples they a...
Definition PeptideAndProteinQuant.h:284
std::map< UInt64, double > SampleAbundances
Mapping: sample ID -> abundance.
Definition PeptideAndProteinQuant.h:40
ProteinQuant prot_quant_
Protein quantification data.
Definition PeptideAndProteinQuant.h:231
PeptideAndProteinQuant()
Constructor.
std::unordered_map< std::pair< std::string, UInt >, size_t, FileLabelHash > sample_id_lookup_
Definition PeptideAndProteinQuant.h:240
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition ProteinIdentification.h:55
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Definition PeptideAndProteinQuant.h:206
std::size_t operator()(const std::pair< std::string, UInt > &p) const noexcept
Definition PeptideAndProteinQuant.h:207
Quantitative and associated data for a peptide.
Definition PeptideAndProteinQuant.h:44
std::map< Int, std::map< std::string, std::map< Int, UInt64 > > > psm_counts
mapping: fraction -> filename -> charge -> abundance
Definition PeptideAndProteinQuant.h:49
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition PeptideAndProteinQuant.h:55
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition PeptideAndProteinQuant.h:52
std::map< Int, std::map< std::string, std::map< Int, std::map< UInt, double > > > > abundances
mapping: fraction -> filename -> charge -> channel/label -> abundance
Definition PeptideAndProteinQuant.h:46
std::set< std::string > accessions
protein accessions for this peptide
Definition PeptideAndProteinQuant.h:58
Quantitative and associated data for a protein.
Definition PeptideAndProteinQuant.h:72
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition PeptideAndProteinQuant.h:88
std::map< std::string, SampleAbundances > peptide_psm_counts
Definition PeptideAndProteinQuant.h:76
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition PeptideAndProteinQuant.h:85
std::map< std::string, std::map< UInt, double > > channel_level_abundances
mapping: filename -> channel/label -> abundance
Definition PeptideAndProteinQuant.h:79
std::map< std::string, SampleAbundances > peptide_abundances
mapping: peptide (unmodified) -> sample -> abundance
Definition PeptideAndProteinQuant.h:74
std::map< std::string, UInt64 > file_level_psm_counts
mapping: filename -> PSM counts
Definition PeptideAndProteinQuant.h:82
SampleAbundances total_distinct_peptides
number of distinct peptide sequences
Definition PeptideAndProteinQuant.h:91
Statistics for processing summary.
Definition PeptideAndProteinQuant.h:105
Size quant_proteins
protein statistics
Definition PeptideAndProteinQuant.h:116
Size quant_peptides
peptide statistics
Definition PeptideAndProteinQuant.h:119
Size n_samples
number of samples (or assays in mzTab terms)
Definition PeptideAndProteinQuant.h:107
Size n_fractions
number of fractions
Definition PeptideAndProteinQuant.h:110
Statistics()
constructor
Definition PeptideAndProteinQuant.h:125
Size n_ms_files
number of MS files
Definition PeptideAndProteinQuant.h:113
Size ambig_features
Definition PeptideAndProteinQuant.h:122