OpenMS
Loading...
Searching...
No Matches
MzTabFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/FORMAT/MzTab.h>
12
18
19#include <vector>
20
21namespace OpenMS
22{
23 class SVOutStream;
29 class OPENMS_DLLAPI MzTabFile
30 {
31 public:
36
37 typedef std::map<std::pair<std::string, std::string>, std::vector<PeptideHit> > MapAccPepType;
38
39 // store MzTab file
40 void store(const std::string& filename, const MzTab& mz_tab) const;
41
42 // stream IDs to file
43 void store(
44 const std::string& filename,
45 const std::vector<ProteinIdentification>& protein_identifications,
46 const PeptideIdentificationList& peptide_identifications,
47 bool first_run_inference_only,
48 bool export_empty_pep_ids = false,
49 bool export_all_psms = false,
50 const std::string& title = "ID export from OpenMS");
51
52 // stream ConsensusMap to file
53 void store(
54 const std::string& filename,
55 const ConsensusMap& cmap,
56 const bool first_run_inference_only,
57 const bool export_unidentified_features,
58 const bool export_unassigned_ids,
59 const bool export_subfeatures,
60 const bool export_empty_pep_ids = false,
61 const bool export_all_psms = false) const;
62
63 // Set store behaviour of optional "reliability" and "uri" columns (default=no)
66 void storePSMReliabilityColumn(bool store);
68 void storeProteinUriColumn(bool store);
69 void storePeptideUriColumn(bool store);
70 void storePSMUriColumn(bool store);
72 void storeProteinGoTerms(bool store);
73
74 // load MzTab file
75 void load(const std::string& filename, MzTab& mz_tab);
76
77 protected:
94
96
99 std::string generateMzTabProteinHeader_(const MzTabProteinSectionRow& reference_row,
100 const Size n_best_search_engine_scores,
101 const std::vector<std::string>& optional_columns,
102 const MzTabMetaData& meta,
103 size_t& n_columns) const;
104
105 std::string generateMzTabSectionRow_(const MzTabProteinSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
106
107 std::string generateMzTabPeptideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<std::string>& optional_columns, size_t& n_columns) const;
108
109 std::string generateMzTabSectionRow_(const MzTabPeptideSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
110
111 std::string generateMzTabPSMHeader_(Size n_search_engine_scores, const std::vector<std::string>& optional_columns, size_t& n_columns) const;
112
113 std::string generateMzTabSectionRow_(const MzTabPSMSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
114
115 std::string generateMzTabSmallMoleculeHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<std::string>& optional_columns, size_t& n_columns) const;
116
117 std::string generateMzTabSectionRow_(const MzTabSmallMoleculeSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
118
119 std::string generateMzTabNucleicAcidHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_scores, const std::vector<std::string>& optional_columns, size_t& n_columns) const;
120
121 std::string generateMzTabSectionRow_(const MzTabNucleicAcidSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
122
123 std::string generateMzTabOligonucleotideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, const std::vector<std::string>& optional_columns, size_t& n_columns) const;
124
125 std::string generateMzTabSectionRow_(const MzTabOligonucleotideSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
126
127 std::string generateMzTabOSMHeader_(Size n_search_engine_scores, const std::vector<std::string>& optional_columns, size_t& n_columns) const;
128
129 std::string generateMzTabSectionRow_(const MzTabOSMSectionRow& row, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
130
132 template <typename SectionRow> void generateMzTabSection_(const std::vector<SectionRow>& rows, const std::vector<std::string>& optional_columns, const MzTabMetaData& meta, StringList& output, size_t n_header_columns) const
133 {
134 output.reserve(output.size() + rows.size() + 1);
135 for (const auto& row : rows)
136 {
137 size_t n_section_columns = 0;
138 output.push_back(generateMzTabSectionRow_(row, optional_columns, meta, n_section_columns));
139 if (n_header_columns != n_section_columns) throw Exception::Postcondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Header and content differs in columns. Please report this bug to the OpenMS developers.");
140 }
141 }
142
143 // auxiliary functions
144
146 static void addOptionalColumnsToSectionRow_(const std::vector<std::string>& column_names, const std::vector<MzTabOptionalColumnEntry>& column_entries, StringList& output);
147
148 // extract two integers from string (e.g. search_engine_score[1]_ms_run[2] -> 1,2)
149 static std::pair<int, int> extractIndexPairsFromBrackets_(const std::string& s);
150
152
154
157 const std::vector<ProteinIdentification>& pro_ids,
158 std::map<std::string, PeptideIdentificationList >& map_run_to_pepids,
159 std::map<std::string, std::vector<ProteinIdentification> >& map_run_to_proids
160 );
161
162
164 static void createProteinToPeptideLinks_(const std::map<std::string, PeptideIdentificationList >& map_run_to_pepids, MapAccPepType& map_run_accession_to_pephits);
165
167 static std::string extractProteinAccession_(const PeptideHit& peptide_hit);
168
170 static std::string extractPeptideModifications_(const PeptideHit& peptide_hit);
171
173 static std::string mapSearchEngineToCvParam_(const std::string& openms_search_engine_name);
174
175 static std::string mapSearchEngineScoreToCvParam_(const std::string& openms_search_engine_name, double score, std::string score_type);
176
177 static std::string extractNumPeptides_(const std::string& common_identifier, const std::string& protein_accession,
178 const MapAccPepType& map_run_accession_to_peptides);
179
180 // mzTab definition of distinct
181 static std::string extractNumPeptidesDistinct_(std::string common_identifier, std::string protein_accession,
182 const MapAccPepType& map_run_accession_to_peptides);
183
184 // same as distinct but additional constraint of uniqueness (=maps to exactly one Protein)
185 static std::string extractNumPeptidesUnambiguous_(std::string common_identifier, std::string protein_accession,
186 const MapAccPepType& map_run_accession_to_peptides);
187
188 static std::map<std::string, Size> extractNumberOfSubSamples_(const std::map<std::string, std::vector<ProteinIdentification> >& map_run_to_proids);
189
190 static void writePeptideHeader_(SVOutStream& output, std::map<std::string, Size> n_sub_samples);
191
192 static void writeProteinHeader_(SVOutStream& output, std::map<std::string, Size> n_sub_samples);
193
194 static void writeProteinData_(SVOutStream& output,
195 const ProteinIdentification& prot_id,
196 Size run_count,
197 std::string input_filename,
198 bool has_coverage,
199 const MapAccPepType& map_run_accession_to_peptides,
200 const std::map<std::string, Size>& map_run_to_num_sub
201 );
202
203 private:
204 friend class MzTabMFile;
205 };
206
207} // namespace OpenMS
208
A container for consensus elements.
Definition ConsensusMap.h:67
Postcondition failed exception.
Definition Exception.h:141
typename VecMember::iterator iterator
Definition ExposedVector.h:68
File adapter for MzTab files.
Definition MzTabFile.h:30
static std::string extractProteinAccession_(const PeptideHit &peptide_hit)
Extracts, if possible a unique protein accession for a peptide hit in mzTab format....
std::string generateMzTabOSMHeader_(Size n_search_engine_scores, const std::vector< std::string > &optional_columns, size_t &n_columns) const
void storePSMUriColumn(bool store)
std::string generateMzTabSectionRow_(const MzTabPeptideSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static std::string mapSearchEngineToCvParam_(const std::string &openms_search_engine_name)
Map search engine identifier to CV, param etc.
void load(const std::string &filename, MzTab &mz_tab)
bool store_osm_uri_
Definition MzTabFile.h:92
bool store_psm_uri_
Definition MzTabFile.h:84
bool store_smallmolecule_reliability_
Definition MzTabFile.h:81
static void addOptionalColumnsToSectionRow_(const std::vector< std::string > &column_names, const std::vector< MzTabOptionalColumnEntry > &column_entries, StringList &output)
Helper function for "generateMzTabSectionRow_" functions.
static void writeProteinHeader_(SVOutStream &output, std::map< std::string, Size > n_sub_samples)
bool store_protein_goterms_
Definition MzTabFile.h:86
std::map< std::pair< std::string, std::string >, std::vector< PeptideHit > > MapAccPepType
Definition MzTabFile.h:37
void storeSmallMoleculeReliabilityColumn(bool store)
std::string generateMzTabSectionRow_(const MzTabOSMSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
bool store_protein_uri_
Definition MzTabFile.h:82
static void keepFirstPSM_(PeptideIdentificationList::iterator begin, PeptideIdentificationList::iterator end)
static std::string extractPeptideModifications_(const PeptideHit &peptide_hit)
Extracts, modifications and positions of a peptide hit in mzTab format.
bool store_nucleic_acid_goterms_
Definition MzTabFile.h:93
std::string generateMzTabPeptideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector< std::string > &optional_columns, size_t &n_columns) const
static void writePeptideHeader_(SVOutStream &output, std::map< std::string, Size > n_sub_samples)
void storePSMReliabilityColumn(bool store)
bool store_protein_reliability_
Definition MzTabFile.h:78
std::string generateMzTabOligonucleotideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, const std::vector< std::string > &optional_columns, size_t &n_columns) const
bool store_psm_reliability_
Definition MzTabFile.h:80
std::string generateMzTabNucleicAcidHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_scores, const std::vector< std::string > &optional_columns, size_t &n_columns) const
bool store_oligonucleotide_uri_
Definition MzTabFile.h:91
static void createProteinToPeptideLinks_(const std::map< std::string, PeptideIdentificationList > &map_run_to_pepids, MapAccPepType &map_run_accession_to_pephits)
create links from protein to peptides
static std::string extractNumPeptides_(const std::string &common_identifier, const std::string &protein_accession, const MapAccPepType &map_run_accession_to_peptides)
static std::string mapSearchEngineScoreToCvParam_(const std::string &openms_search_engine_name, double score, std::string score_type)
std::string generateMzTabSmallMoleculeHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector< std::string > &optional_columns, size_t &n_columns) const
std::string generateMzTabProteinHeader_(const MzTabProteinSectionRow &reference_row, const Size n_best_search_engine_scores, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
~MzTabFile()
Destructor.
std::string generateMzTabPSMHeader_(Size n_search_engine_scores, const std::vector< std::string > &optional_columns, size_t &n_columns) const
static void partitionIntoRuns_(const PeptideIdentificationList &pep_ids, const std::vector< ProteinIdentification > &pro_ids, std::map< std::string, PeptideIdentificationList > &map_run_to_pepids, std::map< std::string, std::vector< ProteinIdentification > > &map_run_to_proids)
Extract protein and peptide identifications for each run. maps are assumed empty.
bool store_smallmolecule_uri_
Definition MzTabFile.h:85
bool store_oligonucleotide_reliability_
Definition MzTabFile.h:88
bool store_osm_reliability_
Definition MzTabFile.h:89
void store(const std::string &filename, const std::vector< ProteinIdentification > &protein_identifications, const PeptideIdentificationList &peptide_identifications, bool first_run_inference_only, bool export_empty_pep_ids=false, bool export_all_psms=false, const std::string &title="ID export from OpenMS")
void generateMzTabSection_(const std::vector< SectionRow > &rows, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, StringList &output, size_t n_header_columns) const
Generate an mzTab section comprising multiple rows of the same type and perform sanity check.
Definition MzTabFile.h:132
std::string generateMzTabSectionRow_(const MzTabPSMSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static void writeProteinData_(SVOutStream &output, const ProteinIdentification &prot_id, Size run_count, std::string input_filename, bool has_coverage, const MapAccPepType &map_run_accession_to_peptides, const std::map< std::string, Size > &map_run_to_num_sub)
static std::string extractNumPeptidesUnambiguous_(std::string common_identifier, std::string protein_accession, const MapAccPepType &map_run_accession_to_peptides)
void storeProteinReliabilityColumn(bool store)
static std::string extractNumPeptidesDistinct_(std::string common_identifier, std::string protein_accession, const MapAccPepType &map_run_accession_to_peptides)
void storeProteinGoTerms(bool store)
std::string generateMzTabSectionRow_(const MzTabOligonucleotideSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
void generateMzTabMetaDataSection_(const MzTabMetaData &map, StringList &sl) const
static std::map< std::string, Size > extractNumberOfSubSamples_(const std::map< std::string, std::vector< ProteinIdentification > > &map_run_to_proids)
bool store_nucleic_acid_reliability_
Definition MzTabFile.h:87
void storeSmallMoleculeUriColumn(bool store)
std::string generateMzTabSectionRow_(const MzTabProteinSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
MzTabFile()
Default constructor.
bool store_peptide_uri_
Definition MzTabFile.h:83
void store(const std::string &filename, const ConsensusMap &cmap, const bool first_run_inference_only, const bool export_unidentified_features, const bool export_unassigned_ids, const bool export_subfeatures, const bool export_empty_pep_ids=false, const bool export_all_psms=false) const
static void sortPSM_(PeptideIdentificationList::iterator begin, PeptideIdentificationList::iterator end)
void store(const std::string &filename, const MzTab &mz_tab) const
static std::pair< int, int > extractIndexPairsFromBrackets_(const std::string &s)
void storePeptideUriColumn(bool store)
bool store_peptide_reliability_
Definition MzTabFile.h:79
std::string generateMzTabSectionRow_(const MzTabSmallMoleculeSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
void storeProteinUriColumn(bool store)
void storePeptideReliabilityColumn(bool store)
std::string generateMzTabSectionRow_(const MzTabNucleicAcidSectionRow &row, const std::vector< std::string > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
bool store_nucleic_acid_uri_
Definition MzTabFile.h:90
File adapter for MzTab-M files.
Definition MzTabMFile.h:24
all meta data of a mzTab file. Please refer to specification for documentation.
Definition MzTab.h:118
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition MzTab.h:455
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition ProteinIdentification.h:55
Stream class for writing to comma/tab/...-separated values files.
Definition SVOutStream.h:32
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
NUC - Nucleic acid section (table-based)
Definition MzTab.h:339
OSM - OSM (oligonucleotide-spectrum match) section (table-based)
Definition MzTab.h:409
OLI - Oligonucleotide section (table-based)
Definition MzTab.h:374
PEP - Peptide section (Table based)
Definition MzTab.h:220
SML Small molecule section (table based)
Definition MzTab.h:309
PSM - PSM section (Table based)
Definition MzTab.h:258
PRT - Protein section (Table based)
Definition MzTab.h:181