OpenMS
Loading...
Searching...
No Matches
MapAlignmentAlgorithmIdentification.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Hendrik Weisser $
6// $Authors: Eva Lange, Clemens Groepl, Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
22
23#include <cmath> // for "abs"
24#include <limits> // for "max"
25#include <map>
26
27namespace OpenMS
28{
29 /* Concept for FeatureMap or ConsensusMap*/
30 template <typename MapType>
31 concept IsFCMap = std::same_as<MapType, OpenMS::FeatureMap> || std::same_as<MapType, OpenMS::ConsensusMap>;
32
33 class AnnotatedMSRun;
34
55 public ProgressLogger
56 {
57public:
60
63
64 // Set a reference for the alignment
65 template <typename DataType> void setReference(const DataType& data)
66 {
67 reference_.clear();
68 if (data.empty()) return; // empty input resets the reference
69 SeqToList rt_data;
70 // set these here because "checkParameters_" may not have been called yet:
71 use_feature_rt_ = param_.getValue("use_feature_rt").toBool();
72 score_cutoff_ = param_.getValue("score_cutoff").toBool();
73 score_type_ = StringUtils::toStr(param_.getValue("score_type"));
74 bool sorted = getRetentionTimes_(data, rt_data);
75 computeMedians_(rt_data, reference_, sorted);
76
77 if (reference_.empty())
78 {
79 throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Could not extract retention time information from the reference file");
80 }
81 }
82
92 template <typename DataType>
93 void align(const std::vector<DataType>& data,
94 std::vector<TransformationDescription>& transformations,
95 Int reference_index = -1)
96 {
97 // is reference one of the input files?
98 bool use_internal_reference = (reference_index >= 0);
99 // Drop any reference_ left over from a previous align() call before
100 // checkParameters_ counts it as an extra run; setReference() further
101 // down repopulates reference_ for this invocation. External references
102 // set explicitly via setReference() are preserved (reference_index < 0).
103 if (use_internal_reference) reference_.clear();
104
105 checkParameters_(data.size());
106 startProgress(0, 3, "aligning maps");
107
108 reference_index_ = reference_index;
109 if (use_internal_reference)
110 {
111 if (reference_index >= Int(data.size()))
112 {
113 throw Exception::IndexOverflow(__FILE__, __LINE__,
114 OPENMS_PRETTY_FUNCTION,
115 reference_index, data.size());
116 }
117 setReference(data[reference_index]);
118 }
119
120 // one set of RT data for each input map, except reference (if any):
121 std::vector<SeqToList> rt_data(data.size() - use_internal_reference);
122 bool all_sorted = true;
123 for (Size i = 0, j = 0; i < data.size(); ++i)
124 {
125 if ((reference_index >= 0) && (i == Size(reference_index)))
126 {
127 continue; // skip reference map, if any
128 }
129 all_sorted &= getRetentionTimes_(data[i], rt_data[j++]);
130 }
131 setProgress(1);
132
133 computeTransformations_(rt_data, transformations, all_sorted);
134 setProgress(2);
135
136 setProgress(3);
137 endProgress();
138 }
139
140protected:
141
143 typedef std::map<std::string, DoubleList> SeqToList;
144
146 typedef std::map<std::string, double> SeqToValue;
147
150
153
156
158 bool use_feature_rt_{};
159
161 bool use_adducts_{};
162
165
167 bool score_cutoff_{};
168
170 std::string score_type_;
171
173 bool (*better_) (double, double) = [](double, double) {return true;};
174
184 void computeMedians_(SeqToList& rt_data, SeqToValue& medians,
185 bool sorted = false);
186
196 SeqToList& rt_data);
197
206 // "id_data" can't be "const" here or template resolution will fail
207 bool getRetentionTimes_(const IdentificationData& id_data, SeqToList& rt_data);
208
224 bool getRetentionTimes_(const IsFCMap auto& features, SeqToList& rt_data)
225 {
226 if (!score_cutoff_)
227 {
228 better_ = [](double, double)
229 {return true;};
230 }
231 else if (features[0].getPeptideIdentifications()[0].isHigherScoreBetter())
232 {
233 better_ = [](double a, double b)
234 { return a >= b; };
235 }
236 else
237 {
238 better_ = [](double a, double b)
239 { return a <= b; };
240 }
241
242 for (auto feat_it = features.cbegin(); feat_it != features.cend(); ++feat_it)
243 {
244 if (use_feature_rt_)
245 {
246 // find the peptide ID closest in RT to the feature centroid:
247 std::string sequence;
248 double rt_distance = std::numeric_limits<double>::max();
249 bool any_hit = false;
251 feat_it->getPeptideIdentifications().begin(); pep_it !=
252 feat_it->getPeptideIdentifications().end(); ++pep_it)
253 {
254 if (!pep_it->getHits().empty())
255 {
256 any_hit = true;
257 double current_distance = fabs(pep_it->getRT() -
258 feat_it->getRT());
259 if (current_distance < rt_distance)
260 {
261 const PeptideHit* best_hit = getBestScoringHit(pep_it->getHits(), pep_it->isHigherScoreBetter());
262 if (best_hit && better_(best_hit->getScore(), min_score_))
263 {
264 sequence = best_hit->getSequence().toString();
265 rt_distance = current_distance;
266 }
267 }
268 }
269 }
270
271 if (any_hit) rt_data[sequence].push_back(feat_it->getRT());
272 }
273 else
274 {
275 getRetentionTimes_(feat_it->getPeptideIdentifications(), rt_data);
276 }
277 }
278
279 if (!use_feature_rt_ &&
280 param_.getValue("use_unassigned_peptides").toBool())
281 {
282 getRetentionTimes_(features.getUnassignedPeptideIdentifications(),
283 rt_data);
284 }
285
286 // remove duplicates (can occur if a peptide ID was assigned to several
287 // features due to overlap or annotation tolerance):
288 for (SeqToList::iterator rt_it = rt_data.begin(); rt_it != rt_data.end();
289 ++rt_it)
290 {
291 DoubleList& rt_values = rt_it->second;
292 sort(rt_values.begin(), rt_values.end());
293 DoubleList::iterator it = unique(rt_values.begin(), rt_values.end());
294 rt_values.resize(it - rt_values.begin());
295 }
296 return true; // RTs were already sorted for duplicate detection
297 }
298
306 void computeTransformations_(std::vector<SeqToList>& rt_data,
307 std::vector<TransformationDescription>&
308 transforms, bool sorted = false);
309
317 void checkParameters_(const Size runs);
318
325
332
341 const PeptideHit* getBestScoringHit(const std::vector<PeptideHit>& hits, const bool is_higher_score_better);
342
343private:
344
347
350
351 };
352
353} // namespace OpenMS
std::string toString() const
returns the peptide as string with modifications embedded in brackets
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Int overflow exception.
Definition Exception.h:211
Not all required information provided.
Definition Exception.h:155
typename VecMember::const_iterator const_iterator
Definition ExposedVector.h:69
Definition IdentificationData.h:87
A map alignment algorithm based on peptide identifications from MS2 spectra.
Definition MapAlignmentAlgorithmIdentification.h:56
void computeTransformations_(std::vector< SeqToList > &rt_data, std::vector< TransformationDescription > &transforms, bool sorted=false)
Compute retention time transformations from RT data grouped by peptide sequence.
bool getRetentionTimes_(const PeptideIdentificationList &peptides, SeqToList &rt_data)
Collect retention time data from peptide IDs.
const PeptideHit * getBestScoringHit(const std::vector< PeptideHit > &hits, const bool is_higher_score_better)
Get the best-scoring PeptideHit from a list of hits.
void setReference(const DataType &data)
Definition MapAlignmentAlgorithmIdentification.h:65
bool getRetentionTimes_(const IdentificationData &id_data, SeqToList &rt_data)
Collect retention time data from spectrum matches.
~MapAlignmentAlgorithmIdentification() override
Destructor.
void checkParameters_(const Size runs)
Check that parameter values are valid.
void getReference_()
Get reference retention times.
bool getRetentionTimes_(const IsFCMap auto &features, SeqToList &rt_data)
Collect retention time data from peptide IDs contained in feature maps or consensus maps.
Definition MapAlignmentAlgorithmIdentification.h:224
Int reference_index_
Index of input file to use as reference (if any)
Definition MapAlignmentAlgorithmIdentification.h:149
MapAlignmentAlgorithmIdentification & operator=(const MapAlignmentAlgorithmIdentification &)
Assignment operator intentionally not implemented -> private.
std::map< std::string, double > SeqToValue
Type to store one representative retention time per peptide sequence.
Definition MapAlignmentAlgorithmIdentification.h:146
SeqToValue reference_
Reference retention times (per peptide sequence)
Definition MapAlignmentAlgorithmIdentification.h:152
double min_score_
Minimum score to reach for a peptide to be considered.
Definition MapAlignmentAlgorithmIdentification.h:164
std::map< std::string, DoubleList > SeqToList
Type to store retention times given for individual peptide sequences.
Definition MapAlignmentAlgorithmIdentification.h:143
Size min_run_occur_
Minimum number of runs a peptide must occur in.
Definition MapAlignmentAlgorithmIdentification.h:155
std::string score_type_
Score type to use for filtering.
Definition MapAlignmentAlgorithmIdentification.h:170
MapAlignmentAlgorithmIdentification()
Default constructor.
IdentificationData::ScoreTypeRef handleIdDataScoreType_(const IdentificationData &id_data)
Helper function to find/define the score type for processing IdentificationData.
void align(const std::vector< DataType > &data, std::vector< TransformationDescription > &transformations, Int reference_index=-1)
Align feature maps, consensus maps, or peptide identifications.
Definition MapAlignmentAlgorithmIdentification.h:93
void computeMedians_(SeqToList &rt_data, SeqToValue &medians, bool sorted=false)
Compute the median retention time for each peptide sequence.
MapAlignmentAlgorithmIdentification(const MapAlignmentAlgorithmIdentification &)
Copy constructor intentionally not implemented -> private.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Definition MapAlignmentAlgorithmIdentification.h:31
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< double > DoubleList
Vector of double precision real types.
Definition ListUtils.h:36
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition MetaData.h:20