OpenMS
Loading...
Searching...
No Matches
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Julianus Pfeuffer $
6// $Authors: Julianus Pfeuffer $
7// --------------------------------------------------------------------------
8
9#pragma once
10
17
18#include <algorithm>
19#include <vector>
20#include <set>
21#include <map>
22
23namespace OpenMS
24{
25
41 class OPENMS_DLLAPI IDScoreSwitcherAlgorithm :
43 {
44 public:
47
53
65 bool isScoreType(const String& score_name, const ScoreType& type) const
66 {
67 return Scores::isScoreType(score_name, type);
68 }
69
82 static ScoreType toScoreTypeEnum(const String& score_type)
83 {
84 return Scores::parseIDType(score_type);
85 }
86
94 {
95 return Scores::isHigherBetter(score_type);
96 }
97
103 std::vector<String> getScoreNames();
104
109 {
110 bool is_main_score_type = false;
112 };
113
129 template <typename IdentificationType>
130 ScoreSearchResult findScoreType(const IdentificationType& id, ScoreType score_type) const
131 {
132 ScoreSearchResult result;
133
134 // First check if main score is already of the requested score type using existing infrastructure
135 const String& main_score_type = id.getScoreType();
136 result.is_main_score_type = isScoreType(main_score_type, score_type);
137
138 if (result.is_main_score_type)
139 {
140 // Main score is of the requested type, so return the main score name
141 result.score_name = main_score_type;
142 }
143 else if (!id.getHits().empty())
144 {
145 // Main score is not of the requested type, look for it in meta values
146 const auto& first_hit = id.getHits()[0];
147 const std::set<String>& score_types = Scores::getIDNamesForType(score_type);
148
149 // Search for scores of the requested type in meta values using the existing score type collection
150 for (const String& score_name : score_types)
151 {
152 if (first_hit.metaValueExists(score_name))
153 {
154 result.score_name = score_name;
155 break;
156 }
157 // Also check for "_score" suffix variant
158 String score_name_with_suffix = score_name + "_score";
159 if (first_hit.metaValueExists(score_name_with_suffix))
160 {
161 result.score_name = score_name_with_suffix;
162 break;
163 }
164 }
165 }
166 // If neither main score nor meta values contain the requested type, score_name remains empty
167
168 return result;
169 }
170
191 template <typename IdentificationType>
192 void switchScores(IdentificationType& id, Size& counter)
193 {
194 for (auto hit_it = id.getHits().begin();
195 hit_it != id.getHits().end(); ++hit_it, ++counter)
196 {
197 if (!hit_it->metaValueExists(new_score_))
198 {
199 std::stringstream msg;
200 msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
201 throw Exception::MissingInformation(__FILE__, __LINE__,
202 OPENMS_PRETTY_FUNCTION, msg.str());
203 }
204
205 const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
206 old_score_);
207 const DataValue& dv = hit_it->getMetaValue(old_score_meta);
208 if (!dv.isEmpty()) // meta value for old score already exists
209 {
210 // TODO: find a better way to check if old score type is something different (even if it has same name)
211 // This currently, is a workaround for e.g., having Percolator_qvalue as meta value and same q-value as main score (getScore()).
212 // Note by jpfeuffer: The problem with this is, that this may add the old score to some of the hits if different, but not
213 // all, in case one is by chance the same. I would be fine with this, if it was done in the beginning and checked
214 // for every score.
215 if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
216 (double(dv) + hit_it->getScore())) > tolerance_)
217 {
218 hit_it->setMetaValue(old_score_meta + "~", hit_it->getScore());
219 }
220 }
221 else
222 {
223 hit_it->setMetaValue(old_score_meta, hit_it->getScore());
224 }
225 hit_it->setScore(hit_it->getMetaValue(new_score_));
226 }
227 id.setScoreType(new_score_type_);
228 id.setHigherScoreBetter(higher_better_);
229 }
230
257 template<class IdentificationType>
258 void switchToGeneralScoreType(std::vector<IdentificationType>& id, ScoreType type, Size& counter)
259 {
260 if (id.empty()) return;
261
262 auto sr = findScoreType(id[0], type);
263
264 // If the main score is already of the requested type, assume all are set correctly
265 if (sr.is_main_score_type)
266 {
267 // we assume that all the other peptide ids
268 // also already have the correct score set
269 return;
270 }
271
272 // Otherwise we need a score name to switch to
273 if (sr.score_name.empty())
274 {
275 String msg = "First encountered ID does not have the requested score type.";
276 throw Exception::MissingInformation(__FILE__, __LINE__,
277 OPENMS_PRETTY_FUNCTION, msg);
278 }
279
280 String t = sr.score_name;
281
282 if (t.hasSuffix("_score"))
283 {
284 new_score_type_ = t.chop(6);
285 }
286 else
287 {
288 new_score_type_ = t;
289 }
290 new_score_ = t;
291
292 if (higher_better_ != Scores::isHigherBetter(type))
293 {
294 OPENMS_LOG_WARN << "Requested score type does not match the expected score direction. Correcting!\n";
295 higher_better_ = Scores::isHigherBetter(type);
296 }
297 for (auto& i : id)
298 {
299 switchScores(i, counter);
300 }
301 }
302
313 {
314 std::vector<PeptideIdentification>& vec = pep_ids.getData();
315 switchToGeneralScoreType(vec, type, counter);
316 }
317
331 void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
332 {
333 String new_type = "";
334 for (const auto& f : cmap)
335 {
336 const auto& ids = f.getPeptideIdentifications();
337 if (!ids.empty())
338 {
339 auto sr = findScoreType(ids[0], type);
340 if (sr.is_main_score_type)
341 {
342 return;
343 }
344 if (!sr.score_name.empty())
345 {
346 new_type = sr.score_name;
347 break;
348 }
349 }
350 }
351
352 if (new_type.empty())
353 {
354 String msg = "First encountered ID does not have the requested score type.";
355 throw Exception::MissingInformation(__FILE__, __LINE__,
356 OPENMS_PRETTY_FUNCTION, msg);
357 }
358
359 if (new_type.hasSuffix("_score"))
360 {
361 new_score_type_ = new_type.chop(6);
362 }
363 else
364 {
365 new_score_type_ = new_type;
366 }
367 new_score_ = new_type;
368
369 if (higher_better_ != Scores::isHigherBetter(type))
370 {
371 OPENMS_LOG_WARN << "Requested score type does not match the expected score direction. Correcting!\n";
372 higher_better_ = Scores::isHigherBetter(type);
373 }
374
375 const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
376 cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
377 }
378
393 const PeptideIdentificationList& pep_ids,
394 String& name,
395 bool& higher_better,
396 ScoreType& score_type)
397 {
398 //TODO check all pep IDs? this assumes equality
399 if (!pep_ids.empty())
400 {
401 name = pep_ids[0].getScoreType(); // The name of the score. Typically a name like "XTandem" or "Percolator_qvalue"
402 higher_better = pep_ids[0].isHigherScoreBetter();
403
404 // look up the score category ("RAW", "PEP", "q-value", etc.) for the given score name
405 if (Scores::findIDTypeByName(name, score_type))
406 {
407 OPENMS_LOG_INFO << "Found score type " << name << " to be of type "
408 << static_cast<std::underlying_type<ScoreType>::type>(score_type) << std::endl;
409 }
410 }
411 }
412
428 String& name,
429 bool& higher_better,
430 ScoreType& score_type,
431 bool include_unassigned = true)
432 {
433 name = "";
434 higher_better = true;
435
436 // TODO: check all pep IDs? this assumes equality to first encountered
437 for (const auto& cf : cmap)
438 {
439 const auto& pep_ids = cf.getPeptideIdentifications();
440 if (!pep_ids.empty())
441 {
442 name = pep_ids[0].getScoreType();
443 higher_better = pep_ids[0].isHigherScoreBetter();
444
445 // look up the score category ("RAW", "PEP", "q-value", etc.) for the given score name
446 if (Scores::findIDTypeByName(name, score_type))
447 {
448 return;
449 }
450 }
451 }
452
453 if (name.empty() && include_unassigned)
454 {
455 for (const auto& id : cmap.getUnassignedPeptideIdentifications())
456 {
457 name = id.getScoreType();
458 higher_better = id.isHigherScoreBetter();
459
460 // look up the score category ("RAW", "PEP", "q-value", etc.) for the given score name
461 if (Scores::findIDTypeByName(name, score_type))
462 {
463 return;
464 }
465 return;
466 }
467 }
468 }
469
483 void switchScores(ConsensusMap& cmap, Size& counter, bool unassigned_peptides_too = true)
484 {
485 for (const auto& f : cmap)
486 {
487 const auto& ids = f.getPeptideIdentifications();
488 if (!ids.empty())
489 {
490 if (new_score_ == ids[0].getScoreType()) // correct score or category already set
491 {
492 return;
493 }
494 else
495 {
496 break;
497 }
498 }
499 }
500 const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
501 cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
502 }
503
517 {
518 if (pep_ids.empty()) return;
519
520 if (new_score_ == pep_ids[0].getScoreType()) // correct score already set
521 {
522 return;
523 }
524
525 for (auto& id : pep_ids)
526 {
527 switchScores(id, counter);
528 }
529 }
530
531
541 {
542 // the score name, orientation and type used before the switch
544 bool original_score_higher_better = true;
545 IDScoreSwitcherAlgorithm::ScoreType original_score_type = IDScoreSwitcherAlgorithm::ScoreType::RAW;
546 // the score name, orientation and type used after the switch
547 bool requested_score_higher_better = original_score_higher_better;
548 IDScoreSwitcherAlgorithm::ScoreType requested_score_type = original_score_type;
549 String requested_score_name; // the search engine score name (e.g. "X!Tandem_score" or score category (e.g. "PEP")
550 // wheter the main score was switched
551 bool score_switched = false;
552 };
553
567 static IDSwitchResult switchToScoreType(ConsensusMap& cmap, String requested_score_type_as_string, bool include_unassigned = true)
568 {
569 IDSwitchResult result;
570 // fill in the original score name, orientation and type
572 result.original_score_name,
574 result.original_score_type,
575 include_unassigned);
576
577 // initalize with the assumption that the main score is the requested score
578 result.requested_score_name = result.original_score_name; // the search engine score name (e.g. "X!Tandem_score" or score category (e.g. "PEP")
581
582 // no score type specified -> use main score
583 if (requested_score_type_as_string.empty())
584 {
585 OPENMS_LOG_DEBUG << "No score type specified. Using main score." << std::endl;
586 return result;
587 }
588
589 // ENUM for requested score type (e.g. "RAW", "PEP", "q-value")
590 result.requested_score_type = IDScoreSwitcherAlgorithm::toScoreTypeEnum(requested_score_type_as_string);
591 if (result.requested_score_type != result.original_score_type) // switch needed because we change type?
592 { // user requests a different score type than the main score
595 auto param = idsa.getDefaults();
596 param.setValue("new_score", result.requested_score_name);
597 param.setValue("new_score_orientation", result.requested_score_higher_better ? "higher_better" : "lower_better");
598 param.setValue("proteins", "false");
599 param.setValue("old_score", ""); // use default name generated for old score
600 idsa.setParameters(param);
601
602 Size counter = 0;
603 idsa.switchToGeneralScoreType(cmap, result.requested_score_type, counter, include_unassigned);
604 OPENMS_LOG_DEBUG << "Switched scores for " << counter << " IDs." << std::endl;
605 result.score_switched = true;
606 }
607
608 // update after potential switch and read out actual score name
610 result.requested_score_name,
613 include_unassigned);
614
615 return result;
616 }
617
632 static IDSwitchResult switchToScoreType(PeptideIdentificationList& pep_ids, String requested_score_type_as_string)
633 {
634 IDSwitchResult result;
635 // fill in the original score name, orientation and type
637 result.original_score_name,
640 );
641
642 // initalize with the assumption that the main score is the requested score
643 result.requested_score_name = result.original_score_name; // the search engine score name (e.g. "X!Tandem_score" or score category (e.g. "PEP")
646
647 // no score type specified -> use main score
648 if (requested_score_type_as_string.empty())
649 {
650 OPENMS_LOG_DEBUG << "No score type specified. Using main score." << std::endl;
651 return result;
652 }
653
654 // ENUM for requested score type (e.g. "RAW", "PEP", "q-value")
655 result.requested_score_type = IDScoreSwitcherAlgorithm::toScoreTypeEnum(requested_score_type_as_string);
656 if (result.requested_score_type != result.original_score_type) // switch needed because we change type?
657 { // user requests a different score type than the main score
660 auto param = idsa.getDefaults();
661 param.setValue("new_score", result.requested_score_name);
662 param.setValue("new_score_orientation", result.requested_score_higher_better ? "higher_better" : "lower_better");
663 param.setValue("proteins", "false");
664 param.setValue("old_score", ""); // use default name generated for old score
665 idsa.setParameters(param);
666 Size counter = 0;
667 idsa.switchToGeneralScoreType(pep_ids, result.requested_score_type, counter);
668 OPENMS_LOG_DEBUG << "Switched scores for " << counter << " IDs." << std::endl;
669
670 result.score_switched = true;
671 }
672
673 // update after potential switch and read out actual score name
675 result.requested_score_name,
678 );
679
680 return result;
681 }
682
693 static void switchBackScoreType(ConsensusMap& cmap, IDSwitchResult isr, bool include_unassigned = true)
694 {
695 if (isr.score_switched)
696 {
697 // switch back to original score
699 auto param = idsa.getDefaults();
700 param.setValue("new_score", isr.original_score_name);
701 param.setValue("new_score_orientation", isr.original_score_higher_better ? "higher_better" : "lower_better");
702 param.setValue("proteins", "false");
703 param.setValue("old_score", ""); // use default name generated for old score
704 idsa.setParameters(param);
705 Size counter = 0;
706 idsa.switchScores(cmap, counter, include_unassigned);
707 OPENMS_LOG_DEBUG << "Switched scores back for " << counter << " PSMs." << std::endl;
708 }
709 }
710
722 {
723 if (isr.score_switched)
724 {
725 // switch back to original score
727 auto param = idsa.getDefaults();
728 param.setValue("new_score", isr.original_score_name);
729 param.setValue("new_score_orientation", isr.original_score_higher_better ? "higher_better" : "lower_better");
730 param.setValue("proteins", "false");
731 param.setValue("old_score", ""); // use default name generated for old score
732 idsa.setParameters(param);
733 Size counter = 0;
734 idsa.switchScores(pep_ids, counter);
735 OPENMS_LOG_DEBUG << "Switched scores back for " << counter << " PSMs." << std::endl;
736 }
737 }
738
739 private:
740
741 void updateMembers_() override;
742
744 const double tolerance_ = 1e-6;
745
747 String new_score_, new_score_type_, old_score_;
748
750 bool higher_better_; // for the new scores, are higher ones better?
751 };
752} // namespace OpenMS
#define OPENMS_LOG_DEBUG
Macro for debug information - includes file and line info.
Definition LogStream.h:558
#define OPENMS_LOG_WARN
Macro for warnings.
Definition LogStream.h:550
#define OPENMS_LOG_INFO
Macro for information/status messages.
Definition LogStream.h:554
A container for consensus elements.
Definition ConsensusMap.h:68
const PeptideIdentificationList & getUnassignedPeptideIdentifications() const
non-mutable access to the unassigned peptide identifications
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition DataValue.h:34
bool isEmpty() const
Test if the value is empty.
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
const Param & getDefaults() const
Non-mutable access to the default parameters.
void setParameters(const Param &param)
Sets the parameters.
Not all required information provided.
Definition Exception.h:155
const VecMember & getData() const
read-only access to the underlying data
Definition ExposedVector.h:328
bool empty() const noexcept
Definition ExposedVector.h:140
This class is used to switch identification scores within identification or consensus feature maps.
Definition IDScoreSwitcherAlgorithm.h:43
void switchScores(PeptideIdentificationList &pep_ids, Size &counter)
Switches the scores of peptide identifications.
Definition IDScoreSwitcherAlgorithm.h:516
bool score_switched
Definition IDScoreSwitcherAlgorithm.h:551
bool requested_score_higher_better
the type of the original score
Definition IDScoreSwitcherAlgorithm.h:547
std::vector< String > getScoreNames()
Gets a vector of all score names that are used in OpenMS.
IDScoreSwitcherAlgorithm::ScoreType original_score_type
whether a higher original score is better
Definition IDScoreSwitcherAlgorithm.h:545
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition IDScoreSwitcherAlgorithm.h:93
String score_name
Name of score to use (main score name if is_main_score_type=true, meta value name if found in meta va...
Definition IDScoreSwitcherAlgorithm.h:111
void switchToGeneralScoreType(PeptideIdentificationList &pep_ids, ScoreType type, Size &counter)
Switches the score type of a PeptideIdentificationList to a general score type.
Definition IDScoreSwitcherAlgorithm.h:312
void determineScoreNameOrientationAndType(const ConsensusMap &cmap, String &name, bool &higher_better, ScoreType &score_type, bool include_unassigned=true)
Determines the score type and orientation of the main score in a ConsensusMap.
Definition IDScoreSwitcherAlgorithm.h:427
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Switches the score type of a ConsensusMap to a general score type.
Definition IDScoreSwitcherAlgorithm.h:331
IDScoreSwitcherAlgorithm::ScoreType requested_score_type
whether a higher requested score is better
Definition IDScoreSwitcherAlgorithm.h:548
bool is_main_score_type
True if the main score is already of the requested score type.
Definition IDScoreSwitcherAlgorithm.h:110
bool isScoreType(const String &score_name, const ScoreType &type) const
Checks if the given score name corresponds to a specific score type.
Definition IDScoreSwitcherAlgorithm.h:65
void switchScores(ConsensusMap &cmap, Size &counter, bool unassigned_peptides_too=true)
Switches the scores of peptide identifications in a ConsensusMap.
Definition IDScoreSwitcherAlgorithm.h:483
String requested_score_name
the type of the requested score
Definition IDScoreSwitcherAlgorithm.h:549
ScoreSearchResult findScoreType(const IdentificationType &id, ScoreType score_type) const
Searches for a general score type (e.g. PEP, QVAL) in an identification data structure.
Definition IDScoreSwitcherAlgorithm.h:130
void determineScoreNameOrientationAndType(const PeptideIdentificationList &pep_ids, String &name, bool &higher_better, ScoreType &score_type)
Determines the score type and orientation of the main score for a set of peptide identifications.
Definition IDScoreSwitcherAlgorithm.h:392
bool original_score_higher_better
The name of the original score used before the switch.
Definition IDScoreSwitcherAlgorithm.h:544
void updateMembers_() override
documented in base class
String original_score_name
Definition IDScoreSwitcherAlgorithm.h:543
IDScoreSwitcherAlgorithm()
Default constructor. Initializes the parameter handler with default values.
static void switchBackScoreType(ConsensusMap &cmap, IDSwitchResult isr, bool include_unassigned=true)
Reverts the score type of a ConsensusMap to its original type based on the provided IDSwitchResult.
Definition IDScoreSwitcherAlgorithm.h:693
String new_score_
will be set according to the algorithm parameters
Definition IDScoreSwitcherAlgorithm.h:747
static void switchBackScoreType(PeptideIdentificationList &pep_ids, IDSwitchResult isr)
Reverts the scoring type of peptide identifications to their original scores.
Definition IDScoreSwitcherAlgorithm.h:721
static IDSwitchResult switchToScoreType(PeptideIdentificationList &pep_ids, String requested_score_type_as_string)
Switches the score type of peptide identifications to the requested type.
Definition IDScoreSwitcherAlgorithm.h:632
void switchScores(IdentificationType &id, Size &counter)
Switches the main scores of all hits in an identification object based on the new scoring settings.
Definition IDScoreSwitcherAlgorithm.h:192
static IDSwitchResult switchToScoreType(ConsensusMap &cmap, String requested_score_type_as_string, bool include_unassigned=true)
Switches the score type of a ConsensusMap to the requested score type.
Definition IDScoreSwitcherAlgorithm.h:567
static ScoreType toScoreTypeEnum(const String &score_type)
Converts a string representation of a score type to a ScoreType enum.
Definition IDScoreSwitcherAlgorithm.h:82
bool higher_better_
will be set according to the algorithm parameters
Definition IDScoreSwitcherAlgorithm.h:750
void switchToGeneralScoreType(std::vector< IdentificationType > &id, ScoreType type, Size &counter)
Switches the scoring type of identification objects to a general score type.
Definition IDScoreSwitcherAlgorithm.h:258
Structure holding score switching information for IDScoreSwitcherAlgorithm.
Definition IDScoreSwitcherAlgorithm.h:541
Structure to hold score detection results for any ScoreType.
Definition IDScoreSwitcherAlgorithm.h:109
void applyFunctionOnPeptideIDs(T &&f, bool include_unassigned=true)
applies a function on all PeptideIDs or only assigned ones
Definition MapUtilities.h:43
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:66
IDType
Hierarchy of possible score types in MS identification.
Definition Scores.h:51
A more convenient string class.
Definition String.h:34
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19