OpenMS
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Julianus Pfeuffer $
6 // $Authors: Julianus Pfeuffer $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 
16 #include <algorithm>
17 #include <vector>
18 #include <set>
19 
20 namespace OpenMS
21 {
22 
23  class OPENMS_DLLAPI IDScoreSwitcherAlgorithm:
24  public DefaultParamHandler
25  {
26  public:
28 
33  enum class ScoreType
34  {
35  RAW,
36  RAW_EVAL,
37  PP,
38  PEP,
39  FDR,
40  QVAL,
41  };
42 
44  bool isScoreType(const String& score_name, const ScoreType& type)
45  {
46  String chopped = score_name;
47  if (chopped.hasSuffix("_score"))
48  {
49  chopped = chopped.chop(6);
50  }
51  const std::set<String>& possible_types = type_to_str_[type];
52  return possible_types.find(chopped) != possible_types.end();
53  }
54 
56  static ScoreType getScoreType(String score_type)
57  {
58  if (score_type.hasSuffix("_score"))
59  {
60  score_type = score_type.chop(6);
61  }
62  score_type.toLower();
63  score_type.erase(std::remove_if(score_type.begin(), score_type.end(),
64  [](unsigned char c) { return c == '-' || c == '_' || c == ' '; }),
65  score_type.end());
66 
67  const std::map<String, ScoreType> s_to_type =
68  {
69  {"raw", ScoreType::RAW},
70  {"rawevalue", ScoreType::RAW_EVAL},
71  {"qvalue", ScoreType::QVAL},
72  {"fdr", ScoreType::FDR},
73  {"falsediscoveryrate", ScoreType::FDR},
74  {"pep", ScoreType::PEP},
75  {"posteriorerrorprobability", ScoreType::PEP},
76  {"posteriorprobabilty", ScoreType::PP},
77  {"pp", ScoreType::PP}
78  };
79 
80  if (auto it = s_to_type.find(score_type); it != s_to_type.end())
81  {
82  return it->second;
83  }
84  else
85  {
86  throw Exception::MissingInformation(__FILE__, __LINE__,
87  OPENMS_PRETTY_FUNCTION, String("Unknown score type ") + score_type);
88  }
89  }
90 
98  {
99  return type_to_better_[score_type];
100  }
101 
102  /*
103  * @brief Gets a vector of all score names that are used in OpenMS.
104  *
105  * @return A vector of all score names that are used in OpenMS (e.g., "q-value", "ln(hyperscore)").
106  */
107  std::vector<String> getScoreTypeNames();
108 
129  template <typename IDType>
130  void switchScores(IDType& id, Size& counter)
131  {
132  for (auto hit_it = id.getHits().begin();
133  hit_it != id.getHits().end(); ++hit_it, ++counter)
134  {
135  if (!hit_it->metaValueExists(new_score_))
136  {
137  std::stringstream msg;
138  msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
139  throw Exception::MissingInformation(__FILE__, __LINE__,
140  OPENMS_PRETTY_FUNCTION, msg.str());
141  }
142 
143  const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
144  old_score_);
145  const DataValue& dv = hit_it->getMetaValue(old_score_meta);
146  if (!dv.isEmpty()) // meta value for old score already exists
147  {
148  // TODO: find a better way to check if old score type is something different (even if it has same name)
149  // This currently, is a workaround for e.g., having Percolator_qvalue as meta value and same q-value as main score (getScore()).
150  // Note by jpfeuffer: The problem with this is, that this may add the old score to some of the hits if different, but not
151  // all, in case one is by chance the same. I would be fine with this, if it was done in the beginning and checked
152  // for every score.
153  if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
154  (double(dv) + hit_it->getScore())) > tolerance_)
155  {
156  hit_it->setMetaValue(old_score_meta + "~", hit_it->getScore());
157  }
158  }
159  else
160  {
161  hit_it->setMetaValue(old_score_meta, hit_it->getScore());
162  }
163  hit_it->setScore(hit_it->getMetaValue(new_score_));
164  }
165  id.setScoreType(new_score_type_);
166  id.setHigherScoreBetter(higher_better_);
167  }
168 
195  template<class IDType>
196  void switchToGeneralScoreType(std::vector<IDType>& id, ScoreType type, Size& counter)
197  {
198  if (id.empty()) return;
199  String t = findScoreType(id[0], type);
200  if (t.empty())
201  {
202  String msg = "First encountered ID does not have the requested score type.";
203  throw Exception::MissingInformation(__FILE__, __LINE__,
204  OPENMS_PRETTY_FUNCTION, msg);
205  }
206  else if (t == id[0].getScoreType())
207  {
208  // we assume that all the other peptide ids
209  // also already have the correct score set
210  return;
211  }
212 
213  if (t.hasSuffix("_score"))
214  {
215  new_score_type_ = t.chop(6);
216  }
217  else
218  {
219  new_score_type_ = t;
220  }
221  new_score_ = t;
222 
223  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
224  {
225  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
226  higher_better_ = type_to_better_[type];
227  }
228  for (auto& i : id)
229  {
230  switchScores(i, counter);
231  }
232  }
233 
237  void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
238  {
239  String new_type = "";
240  for (const auto& f : cmap)
241  {
242  const auto& ids = f.getPeptideIdentifications();
243  if (!ids.empty())
244  {
245  new_type = findScoreType(ids[0], type);
246  if (new_type == ids[0].getScoreType())
247  {
248  return;
249  }
250  else
251  {
252  break;
253  }
254  }
255  }
256 
257  if (new_type.empty())
258  {
259  String msg = "First encountered ID does not have the requested score type.";
260  throw Exception::MissingInformation(__FILE__, __LINE__,
261  OPENMS_PRETTY_FUNCTION, msg);
262  }
263 
264  if (new_type.hasSuffix("_score"))
265  {
266  new_score_type_ = new_type.chop(6);
267  }
268  else
269  {
270  new_score_type_ = new_type;
271  }
272  new_score_ = new_type;
273 
274  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
275  {
276  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
277  higher_better_ = type_to_better_[type];
278  }
279 
280  const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
281  cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
282  }
283 
284 
309  template <typename IDType>
311  {
312  const String& curr_score_type = id.getScoreType();
313  const std::set<String>& possible_types = type_to_str_[type];
314  if (possible_types.find(curr_score_type) != possible_types.end())
315  {
316  OPENMS_LOG_INFO << "Requested score type already set as main score: " + curr_score_type + "\n";
317  return curr_score_type;
318  }
319  else
320  {
321  if (id.getHits().empty())
322  {
323  OPENMS_LOG_WARN << "Identification entry used to check for alternative score was empty.\n";
324  return "";
325  }
326  const auto& hit = id.getHits()[0];
327  for (const auto& poss_str : possible_types)
328  {
329  if (hit.metaValueExists(poss_str))
330  {
331  return poss_str;
332  }
333  else if (hit.metaValueExists(poss_str + "_score"))
334  {
335  return poss_str + "_score";
336  }
337  }
338  OPENMS_LOG_WARN << "Score of requested type not found in the UserParams of the checked ID object.\n";
339  return "";
340  }
341  }
342 
343  private:
344  void updateMembers_() override;
345 
347  const double tolerance_ = 1e-6;
348 
350  String new_score_, new_score_type_, old_score_;
351 
353  bool higher_better_; // for the new scores, are higher ones better?
354 
356  std::map<ScoreType, std::set<String>> type_to_str_ =
357  {
358  //TODO introduce real meaningful score names for XTandem, Mascot etc. (e.g., hyperscore)
359  {ScoreType::RAW, {"svm", "MS:1001492", "XTandem", "OMSSA", "SEQUEST:xcorr", "Mascot", "mvh", "hyperscore", "ln(hyperscore)"}},
360  //TODO find out reasonable raw scores for SES that provide E-Values as main score or see below
361  //TODO there is no test for spectraST idXML, so I don't know its score
362  //TODO check if we should combine RAW and RAW_EVAL:
363  // What if a SE does not have an e-value score (spectrast, OMSSA, crux/sequest, myrimatch),
364  // then you need additional if's/try's
365  {ScoreType::RAW_EVAL, {"expect", "SpecEValue", "E-Value", "evalue", "MS:1002053", "MS:1002257"}},
366  {ScoreType::PP, {"Posterior Probability"}},
367  {ScoreType::PEP, {"Posterior Error Probability", "pep", "MS:1001493"}}, // TODO add CV terms
368  {ScoreType::FDR, {"FDR", "fdr", "false discovery rate"}},
369  {ScoreType::QVAL, {"q-value", "qvalue", "MS:1001491", "q-Value", "qval"}}
370  };
371 
373  std::map<ScoreType, bool> type_to_better_ =
374  {
375  {ScoreType::RAW, true}, //TODO this might actually not always be true
376  {ScoreType::RAW_EVAL, false},
377  {ScoreType::PP, true},
378  {ScoreType::PEP, false},
379  {ScoreType::FDR, false},
380  {ScoreType::QVAL, false}
381  };
382  };
383 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:449
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Not all required information provided.
Definition: Exception.h:155
Definition: IDScoreSwitcherAlgorithm.h:25
static ScoreType getScoreType(String score_type)
Gets a ScoreType enum from a given score name score_name.
Definition: IDScoreSwitcherAlgorithm.h:56
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
Searches for a specified score type within an identification object and its meta values.
Definition: IDScoreSwitcherAlgorithm.h:310
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:97
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Definition: IDScoreSwitcherAlgorithm.h:237
void switchScores(IDType &id, Size &counter)
Switches the main scores of all hits in an identification object based on the new scoring settings.
Definition: IDScoreSwitcherAlgorithm.h:130
std::vector< String > getScoreTypeNames()
ScoreType
Definition: IDScoreSwitcherAlgorithm.h:34
void switchToGeneralScoreType(std::vector< IDType > &id, ScoreType type, Size &counter)
Switches the scoring type of identification objects to a general score type.
Definition: IDScoreSwitcherAlgorithm.h:196
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
String new_score_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:350
bool higher_better_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:353
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:44
void applyFunctionOnPeptideIDs(T &&f, bool include_unassigned=true)
applies a function on all PeptideIDs or only assigned ones
Definition: MapUtilities.h:42
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
A more convenient string class.
Definition: String.h:34
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
String & toLower()
Converts the string to lowercase.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
const double c
Definition: Constants.h:188
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19