OpenMS  2.6.0
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 #include <vector>
42 #include <set>
43 
44 namespace OpenMS
45 {
46 
47  class OPENMS_DLLAPI IDScoreSwitcherAlgorithm:
48  public DefaultParamHandler
49  {
50  public:
52 
57  enum class ScoreType
58  {
59  RAW,
60  RAW_EVAL,
61  PP,
62  PEP,
63  FDR,
64  QVAL,
65  };
66 
69  template <typename IDType>
70  void switchScores(IDType& id, Size& counter)
71  {
72  for (typename std::vector<typename IDType::HitType>::iterator hit_it = id.getHits().begin();
73  hit_it != id.getHits().end(); ++hit_it, ++counter)
74  {
75  if (!hit_it->metaValueExists(new_score_))
76  {
77  std::stringstream msg;
78  msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
79  throw Exception::MissingInformation(__FILE__, __LINE__,
80  OPENMS_PRETTY_FUNCTION, msg.str());
81  }
82 
83  const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
84  old_score_);
85  const DataValue& dv = hit_it->getMetaValue(old_score_meta);
86  if (!dv.isEmpty()) // meta value for old score already exists
87  {
88  if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
89  (double(dv) + hit_it->getScore())) > tolerance_)
90  {
91  std::stringstream msg;
92  msg << "Meta value '" << old_score_meta << "' already exists "
93  << "with a conflicting value for " << *hit_it;
94  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
95  msg.str(), dv.toString());
96  } // else: values match, nothing to do
97  }
98  else
99  {
100  hit_it->setMetaValue(old_score_meta, hit_it->getScore());
101  }
102  hit_it->setScore(hit_it->getMetaValue(new_score_));
103  }
104  id.setScoreType(new_score_type_);
105  id.setHigherScoreBetter(higher_better_);
106  }
107 
111  void switchToGeneralScoreType(std::vector<PeptideIdentification>& id, ScoreType type, Size& counter)
112  {
113  if (id.empty()) return;
114  String t = findScoreType(id[0], type);
115  if (t.empty())
116  {
117  String msg = "First encountered ID does not have the requested score type.";
118  throw Exception::MissingInformation(__FILE__, __LINE__,
119  OPENMS_PRETTY_FUNCTION, msg);
120  }
121  else if (t == id[0].getScoreType())
122  {
123  // we assume that all the other peptide ids
124  // also already have the correct score set
125  return;
126  }
127 
128  if (t.hasSuffix("_score"))
129  {
130  new_score_type_ = t.chop(6);
131  }
132  else
133  {
134  new_score_type_ = t;
135  }
136  new_score_ = t;
137 
138  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
139  {
140  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
141  higher_better_ = type_to_better_[type];
142  }
143  for (auto& i : id)
144  {
145  switchScores(i, counter);
146  }
147  }
148 
152  void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
153  {
154  String new_type = "";
155  for (const auto& f : cmap)
156  {
157  const auto& ids = f.getPeptideIdentifications();
158  if (!ids.empty())
159  {
160  new_type = findScoreType(ids[0], type);
161  if (new_type == ids[0].getScoreType())
162  {
163  return;
164  }
165  else
166  {
167  break;
168  }
169  }
170  }
171 
172  if (new_type.empty())
173  {
174  String msg = "First encountered ID does not have the requested score type.";
175  throw Exception::MissingInformation(__FILE__, __LINE__,
176  OPENMS_PRETTY_FUNCTION, msg);
177  }
178 
179  if (new_type.hasSuffix("_score"))
180  {
181  new_score_type_ = new_type.chop(6);
182  }
183  else
184  {
185  new_score_type_ = new_type;
186  }
187  new_score_ = new_type;
188 
189  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
190  {
191  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
192  higher_better_ = type_to_better_[type];
193  }
194 
195  const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
196  cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
197  }
198 
199 
201  template <typename IDType>
203  {
204  const String& curr_score_type = id.getScoreType();
205  const std::set<String>& possible_types = type_to_str_[type];
206  if (possible_types.find(curr_score_type) != possible_types.end())
207  {
208  OPENMS_LOG_INFO << "Requested score type already set as main score: " + curr_score_type + "\n";
209  return curr_score_type;
210  }
211  else
212  {
213  if (id.getHits().empty())
214  {
215  OPENMS_LOG_WARN << "Identification entry used to check for alternative score was empty.\n";
216  return "";
217  }
218  const auto& hit = id.getHits()[0];
219  for (const auto& poss_str : possible_types)
220  {
221  if (hit.metaValueExists(poss_str)) return poss_str;
222  else if (hit.metaValueExists(poss_str + "_score")) return poss_str + "_score";
223  }
224  OPENMS_LOG_WARN << "Score of requested type not found in the UserParams of the checked ID object.\n";
225  return "";
226  }
227  }
228 
229  private:
230  void updateMembers_() override;
231 
233  const double tolerance_ = 1e-6;
234 
236  String new_score_, new_score_type_, old_score_;
238  bool higher_better_; // for the new scores, are higher ones better?
239 
241  std::map<ScoreType, std::set<String>> type_to_str_ =
242  {
243  {ScoreType::RAW, {"XTandem", "OMSSA", "SEQUEST:xcorr", "Mascot", "mvh"}},
244  //TODO find out reasonable raw scores for SES that provide evalues as main score or see below
245  //TODO there is no test for spectraST idXML, so I dont know its score
246  //TODO check if we should combine RAW and RAW_EVAL:
247  // What if a SE does not have an e-value score (spectrast, OMSSA, crux/sequest, myrimatch),
248  // then you need additional ifs/trys
249  {ScoreType::RAW_EVAL, {"expect", "SpecEValue", "E-Value", "evalue", "MS:1002053", "MS:1002257"}},
250  {ScoreType::PP, {"Posterior Probability"}},
251  {ScoreType::PEP, {"Posterior Error Probability", "pep", "MS:1001493"}}, // TODO add CV terms
252  {ScoreType::FDR, {"FDR", "fdr", "false discovery rate"}},
253  {ScoreType::QVAL, {"q-value", "qvalue", "MS:1001491", "q-Value", "qval"}}
254  };
255 
257  std::map<ScoreType, bool> type_to_better_ =
258  {
259  {ScoreType::RAW, true}, //TODO this might actually not always be true
260  {ScoreType::RAW_EVAL, false},
261  {ScoreType::PP, true},
262  {ScoreType::PEP, false},
263  {ScoreType::FDR, false},
264  {ScoreType::QVAL, false}
265  };
266  };
267 } // namespace OpenMS
IDScoreSwitcherAlgorithm.h
LogStream.h
DefaultParamHandler.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::IDScoreSwitcherAlgorithm::ScoreType
ScoreType
Definition: IDScoreSwitcherAlgorithm.h:57
OpenMS::IDScoreSwitcherAlgorithm::higher_better_
bool higher_better_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:238
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::IDScoreSwitcherAlgorithm::switchToGeneralScoreType
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Definition: IDScoreSwitcherAlgorithm.h:152
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:335
OpenMS::String
A more convenient string class.
Definition: String.h:59
OpenMS::DataValue::isEmpty
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:375
ConsensusMap.h
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
OpenMS::DataValue::toString
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
OpenMS::IDScoreSwitcherAlgorithm::switchToGeneralScoreType
void switchToGeneralScoreType(std::vector< PeptideIdentification > &id, ScoreType type, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:111
OpenMS::IDScoreSwitcherAlgorithm::findScoreType
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
finds a certain score type in an ID and its metavalues if present, otherwise returns empty string
Definition: IDScoreSwitcherAlgorithm.h:202
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::IDScoreSwitcherAlgorithm::old_score_
String old_score_
Definition: IDScoreSwitcherAlgorithm.h:236
OpenMS::String::chop
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Exception.h
OpenMS::DataValue
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
OpenMS::IDScoreSwitcherAlgorithm::switchScores
void switchScores(IDType &id, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:70
ProteinIdentification.h
OpenMS::IDScoreSwitcherAlgorithm
Definition: IDScoreSwitcherAlgorithm.h:47
OpenMS::String::hasSuffix
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:80
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:195
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
PeptideIdentification.h
TOPPBase.h
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63