OpenMS  2.5.0
XFDRAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Eugen Netz $
32 // $Authors: Lukas Zimmermann, Eugen Netz $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
44 
45 namespace OpenMS
46 {
47 
48  //-------------------------------------------------------------
49  // Doxygen docu
50  //-------------------------------------------------------------
51 
87  class OPENMS_DLLAPI XFDRAlgorithm :
88  public DefaultParamHandler, public ProgressLogger
89  {
90 
91  public:
92 
94  enum ExitCodes
95  {
98  UNEXPECTED_RESULT
99  };
100 
102  XFDRAlgorithm();
103 
105  ~XFDRAlgorithm() override;
106 
113  ExitCodes run(std::vector<PeptideIdentification>& peptide_ids, ProteinIdentification& protein_id);
114 
119  ExitCodes validateClassArguments() const;
120 
121 private:
122  void updateMembers_() override;
123 
133  void initDataStructures_(std::vector<PeptideIdentification>& peptide_ids, ProteinIdentification& protein_id);
134 
140  static void assignTypes_(PeptideHit& ph, StringList& types);
141 
147  void fdr_xprophet_(std::map< String, Math::Histogram<> >& cum_histograms,
148  const String& targetclass, const String& decoyclass, const String& fulldecoyclass,
149  std::vector< double >& fdr, bool mono);
150 
156  static void calc_qfdr_(const std::vector< double >& fdr, std::vector< double >& qfdr);
157 
158  void findTopUniqueHits_(std::vector<PeptideIdentification>& peptide_ids);
159 
160  void writeArgumentsLog_() const;
161 
162  String getId_(const PeptideHit& ph) const;
163 
165  {
166  Size alpha_ions = Size(ph.getMetaValue("matched_linear_alpha")) + Size(ph.getMetaValue("matched_xlink_alpha"));
167  Size beta_ions = Size(ph.getMetaValue("matched_linear_beta")) + Size(ph.getMetaValue("matched_xlink_beta"));
168  return std::min(alpha_ions, beta_ions);
169  }
170 
171  inline static void setIntraProtein_(PeptideHit& ph, const bool value)
172  {
173  ph.setMetaValue("XFDR:is_intraprotein", DataValue(value ? "true" : "false"));
174  }
175 
176  inline static void setInterProtein_(PeptideHit& ph, const bool value)
177  {
178  ph.setMetaValue("XFDR:is_interprotein", DataValue(value ? "true" : "false"));
179  }
180 
184  static bool isSameProtein_(
185  String prot1,
186  String prot2,
187  const String &decoy_string)
188  {
189  prot1.substitute(decoy_string, "");
190  prot2.substitute(decoy_string, "");
191  assert( ! prot1.hasSubstring(decoy_string));
192  assert( ! prot2.hasSubstring(decoy_string));
193  return prot1 == prot2;
194  }
195 
196  // Score range for this of the tool
199 
200  // unique top hits
201  std::vector<String> unique_ids_;
202  std::vector<double> unique_id_scores_;
203 
204  // maps index of peptide id all_pep_ids_ to vector of cross link class
205  std::map<String, std::vector<String>> cross_link_classes_;
206 
207  // Program arguments
216  double arg_binsize_;
217 
218  // Names of the class parameters
220  static const String param_minborder_;
221  static const String param_maxborder_;
222  static const String param_mindeltas_;
224  static const String param_uniquexl_;
225  static const String param_no_qvalues_;
226  static const String param_minscore_;
227  static const String param_binsize_;
228 
229  // Constants related to particular crosslink classes
242  };
243 }
DefaultParamHandler.h
OpenMS::XFDRAlgorithm::param_maxborder_
static const String param_maxborder_
Definition: XFDRAlgorithm.h:221
OpenMS::FileTypes::IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
OpenMS::XFDRAlgorithm::param_mindeltas_
static const String param_mindeltas_
Definition: XFDRAlgorithm.h:222
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::MzIdentMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a MzIdentML file.
TOPPXFDR
Definition: XFDR.cpp:91
OpenMS::Param::copy
Param copy(const String &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
FileHandler.h
XMLFile.h
XQuestResultXMLFile.h
TOPPXFDR::TOPPXFDR
TOPPXFDR()
Definition: XFDR.cpp:96
XFDRAlgorithm.h
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::XFDRAlgorithm::arg_minscore_
double arg_minscore_
Definition: XFDRAlgorithm.h:213
OpenMS::XFDRAlgorithm::arg_uniquex_
bool arg_uniquex_
Definition: XFDRAlgorithm.h:214
OpenMS::String::substitute
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
OpenMS::XFDRAlgorithm::crosslink_class_intradecoys_
static const String crosslink_class_intradecoys_
Definition: XFDRAlgorithm.h:230
OpenMS::MetaInfoInterface::getMetaValue
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
OpenMS::XQuestResultXMLFile::load
void load(const String &filename, std::vector< PeptideIdentification > &pep_ids, std::vector< ProteinIdentification > &prot_ids)
Load the content of the xquest.xml file into the provided data structures.
OpenMS::XFDRAlgorithm::crosslink_class_monolinks_
static const String crosslink_class_monolinks_
Definition: XFDRAlgorithm.h:237
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::XFDRAlgorithm::unique_ids_
std::vector< String > unique_ids_
Definition: XFDRAlgorithm.h:201
TOPPXFDR::arg_in_type_
String arg_in_type_
Definition: XFDR.cpp:203
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
OpenMS::XFDRAlgorithm::param_minborder_
static const String param_minborder_
Definition: XFDRAlgorithm.h:220
OpenMS::XFDRAlgorithm::arg_minborder_
double arg_minborder_
Definition: XFDRAlgorithm.h:210
OpenMS::XFDRAlgorithm::arg_maxborder_
double arg_maxborder_
Definition: XFDRAlgorithm.h:211
OpenMS::XFDRAlgorithm::arg_binsize_
double arg_binsize_
Definition: XFDRAlgorithm.h:216
OpenMS::XFDRAlgorithm::arg_mindeltas_
double arg_mindeltas_
Definition: XFDRAlgorithm.h:209
OpenMS::XFDRAlgorithm::ILLEGAL_PARAMETERS
Definition: XFDRAlgorithm.h:97
OpenMS::FileTypes::MZIDENTML
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
main
int main(int argc, const char **argv)
Definition: XFDR.cpp:316
OpenMS::XFDRAlgorithm::getMinIonsMatched_
static Size getMinIonsMatched_(const PeptideHit &ph)
Definition: XFDRAlgorithm.h:164
XMLHandler.h
OpenMS::XFDRAlgorithm
Definition: XFDRAlgorithm.h:87
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:71
OpenMS::XFDRAlgorithm::arg_no_qvalues_
bool arg_no_qvalues_
Definition: XFDRAlgorithm.h:215
OpenMS::XFDRAlgorithm::run
ExitCodes run(std::vector< PeptideIdentification > &peptide_ids, ProteinIdentification &protein_id)
Performs the main function of this class, the FDR estimation for cross-linked peptide experiments.
OpenMS::String::hasSubstring
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
OpenMS::XFDRAlgorithm::max_score_
Int max_score_
Definition: XFDRAlgorithm.h:198
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::XFDRAlgorithm::ExitCodes
ExitCodes
Exit codes.
Definition: XFDRAlgorithm.h:94
OpenMS::XFDRAlgorithm::min_score_
Int min_score_
Definition: XFDRAlgorithm.h:197
XQuestResultXMLHandler.h
OpenMS::XFDRAlgorithm::param_decoy_string_
static const String param_decoy_string_
Definition: XFDRAlgorithm.h:219
TOPPXFDR::registerOptionsAndFlags_
void registerOptionsAndFlags_() final
Sets the valid command line options (with argument) and flags (without argument).
Definition: XFDR.cpp:105
OpenMS::MzIdentMLFile
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::Math::Histogram
Representation of a histogram.
Definition: Histogram.h:63
OpenMS::XFDRAlgorithm::validateClassArguments
ExitCodes validateClassArguments() const
Checks whether the parameters of the object are valid.
PeptideHit.h
OpenMS::XQuestResultXMLFile
Used to load and store xQuest result files.
Definition: XQuestResultXMLFile.h:55
OpenMS::XFDRAlgorithm::crosslink_class_decoys_
static const String crosslink_class_decoys_
Definition: XFDRAlgorithm.h:238
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
OpenMS::MetaInfoInterface::setMetaValue
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
TOPPXFDR::param_in_type_
static const String param_in_type_
Definition: XFDR.cpp:206
OpenMS::DataValue
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
ProgressLogger.h
OpenMS::XFDRAlgorithm::arg_minionsmatched_
Int arg_minionsmatched_
Definition: XFDRAlgorithm.h:212
OpenMS::XFDRAlgorithm::crosslink_class_hybriddecoysintralinks_
static const String crosslink_class_hybriddecoysintralinks_
Definition: XFDRAlgorithm.h:240
int
OpenMS::XFDRAlgorithm::crosslink_class_fulldecoysintralinks_
static const String crosslink_class_fulldecoysintralinks_
Definition: XFDRAlgorithm.h:231
OpenMS::XFDRAlgorithm::crosslink_class_interlinks_
static const String crosslink_class_interlinks_
Definition: XFDRAlgorithm.h:236
ProteinIdentification.h
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:58
OpenMS::FileTypes::XQUESTXML
xQuest XML file format for protein-protein cross-link identifications (.xquest.xml)
Definition: FileTypes.h:112
OpenMS::XFDRAlgorithm::param_minionsmatched_
static const String param_minionsmatched_
Definition: XFDRAlgorithm.h:223
OpenMS::XFDRAlgorithm::unique_id_scores_
std::vector< double > unique_id_scores_
Definition: XFDRAlgorithm.h:202
OpenMS::XFDRAlgorithm::crosslink_class_interdecoys_
static const String crosslink_class_interdecoys_
Definition: XFDRAlgorithm.h:232
TOPPXFDR::loadArguments_
void loadArguments_()
Definition: XFDR.cpp:211
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::XFDRAlgorithm::isSameProtein_
static bool isSameProtein_(String prot1, String prot2, const String &decoy_string)
Determines whether the Petide Evidences belong to the same protein, modulo decoy.
Definition: XFDRAlgorithm.h:184
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::XFDRAlgorithm::crosslink_class_monodecoys_
static const String crosslink_class_monodecoys_
Definition: XFDRAlgorithm.h:234
OpenMS::DefaultParamHandler::getParameters
const Param & getParameters() const
Non-mutable access to the parameters.
OpenMS::XFDRAlgorithm::EXECUTION_OK
Definition: XFDRAlgorithm.h:96
TOPPXFDR::arg_out_xquest_
String arg_out_xquest_
Definition: XFDR.cpp:201
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::MzIdentMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
OpenMS::XFDRAlgorithm::param_binsize_
static const String param_binsize_
Definition: XFDRAlgorithm.h:227
OpenMS::XFDRAlgorithm::param_minscore_
static const String param_minscore_
Definition: XFDRAlgorithm.h:226
TOPPXFDR::param_out_idXML_
static const String param_out_idXML_
Definition: XFDR.cpp:207
OpenMS::XFDRAlgorithm::setIntraProtein_
static void setIntraProtein_(PeptideHit &ph, const bool value)
Definition: XFDRAlgorithm.h:171
TOPPXFDR::arg_in_
String arg_in_
Definition: XFDR.cpp:202
OpenMS::XFDRAlgorithm::cross_link_classes_
std::map< String, std::vector< String > > cross_link_classes_
Definition: XFDRAlgorithm.h:205
Histogram.h
OpenMS::Param::update
bool update(const Param &p_outdated, const bool add_unknown=false)
Rescue parameter values from p_outdated to current param.
TOPPXFDR::param_in_
static const String param_in_
Definition: XFDR.cpp:205
OpenMS::XFDRAlgorithm::crosslink_class_hybriddecoysinterlinks_
static const String crosslink_class_hybriddecoysinterlinks_
Definition: XFDRAlgorithm.h:241
OPENMS_LOG_ERROR
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
OpenMS::OpenMS_Log_debug
Logger::LogStream OpenMS_Log_debug
Global static instance of a LogStream to capture messages classified as debug output....
OpenMS::XFDRAlgorithm::crosslink_class_intralinks_
static const String crosslink_class_intralinks_
Definition: XFDRAlgorithm.h:235
TOPPXFDR::main_
ExitCodes main_(int, const char **) final
The actual "main" method. main_() is invoked by main().
Definition: XFDR.cpp:133
OpenMS::TOPPBase::ExitCodes
ExitCodes
Exit codes.
Definition: TOPPBase.h:149
OpenMS::XFDRAlgorithm::param_no_qvalues_
static const String param_no_qvalues_
Definition: XFDRAlgorithm.h:225
OpenMS::FileTypes::nameToType
static Type nameToType(const String &name)
Converts a file type name into a Type.
TOPPXFDR::param_out_mzid_
static const String param_out_mzid_
Definition: XFDR.cpp:208
OpenMS::XFDRAlgorithm::crosslink_class_fulldecoysinterlinks_
static const String crosslink_class_fulldecoysinterlinks_
Definition: XFDRAlgorithm.h:233
TOPPXFDR::logFatal
void logFatal(const String &message) const
Definition: XFDR.cpp:279
OpenMS::XFDRAlgorithm::setInterProtein_
static void setInterProtein_(PeptideHit &ph, const bool value)
Definition: XFDRAlgorithm.h:176
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:73
OpenMS::XQuestResultXMLFile::getNumberOfHits
int getNumberOfHits() const
Returns the total number of hits in the file.
TOPPXFDR::arg_out_idXML_
String arg_out_idXML_
Definition: XFDR.cpp:199
OpenMS::XQuestResultXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a xQuest XML file.
TOPPXFDR::arg_out_mzid_
String arg_out_mzid_
Definition: XFDR.cpp:200
PeptideIdentification.h
TOPPXFDR::loadInputFile_
ExitCodes loadInputFile_(std::vector< PeptideIdentification > &peptide_ids, ProteinIdentification &protein_id)
Definition: XFDR.cpp:225
TOPPXFDR::param_out_xquest_
static const String param_out_xquest_
Definition: XFDR.cpp:209
TOPPXFDR::validateToolArguments_
ExitCodes validateToolArguments_() const
Definition: XFDR.cpp:284
OpenMS::TOPPBase::main
ExitCodes main(int argc, const char **argv)
Main routine of all TOPP applications.
StandardTypes.h
OpenMS::XFDRAlgorithm::decoy_string_
String decoy_string_
Definition: XFDRAlgorithm.h:208
MzIdentMLFile.h
OpenMS::ProgressLogger::setLogType
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
TOPPBase.h
OpenMS::XFDRAlgorithm::param_uniquexl_
static const String param_uniquexl_
Definition: XFDRAlgorithm.h:224
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63
OpenMS::XFDRAlgorithm::crosslink_class_targets_
static const String crosslink_class_targets_
Definition: XFDRAlgorithm.h:239
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:54