OpenMS  2.6.0
MzTabFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/FORMAT/MzTab.h>
38 
43 
44 #include <boost/math/special_functions/fpclassify.hpp>
45 
46 #include <vector>
47 #include <algorithm>
48 
49 namespace OpenMS
50 {
51  class String;
52  class SVOutStream;
58  class OPENMS_DLLAPI MzTabFile
59  {
60  public:
62  MzTabFile();
64  ~MzTabFile();
65 
66  typedef std::map<std::pair<String, String>, std::vector<PeptideHit> > MapAccPepType;
67 
68  // store MzTab file
69  void store(const String& filename, const MzTab& mz_tab) const;
70 
71  // stream IDs to file
72  void store(
73  const String& filename,
74  const std::vector<ProteinIdentification>& protein_identifications,
75  const std::vector<PeptideIdentification>& peptide_identifications,
76  bool first_run_inference_only,
77  bool export_empty_pep_ids = false,
78  const String& title = "ID export from OpenMS");
79 
80  // stream ConsensusMap to file
81  void store(
82  const String& filename,
83  const ConsensusMap& cmap,
84  const bool first_run_inference_only,
85  const bool export_unidentified_features,
86  const bool export_unassigned_ids,
87  const bool export_subfeatures,
88  const bool export_empty_pep_ids = false) const;
89 
90  // Set store behaviour of optional "reliability" and "uri" columns (default=no)
91  void storeProteinReliabilityColumn(bool store);
92  void storePeptideReliabilityColumn(bool store);
93  void storePSMReliabilityColumn(bool store);
94  void storeSmallMoleculeReliabilityColumn(bool store);
95  void storeProteinUriColumn(bool store);
96  void storePeptideUriColumn(bool store);
97  void storePSMUriColumn(bool store);
98  void storeSmallMoleculeUriColumn(bool store);
99  void storeProteinGoTerms(bool store);
100 
101  // load MzTab file
102  void load(const String& filename, MzTab& mz_tab);
103 
104  protected:
121 
122  void generateMzTabMetaDataSection_(const MzTabMetaData& map, StringList& sl) const;
123 
126  String generateMzTabProteinHeader_(const MzTabProteinSectionRow& reference_row,
127  const Size n_best_search_engine_scores,
128  const std::vector<String>& optional_columns,
129  const MzTabMetaData& meta,
130  size_t& n_columns) const;
131 
132  String generateMzTabSectionRow_(const MzTabProteinSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
133 
134  String generateMzTabPeptideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<String>& optional_columns, size_t& n_columns) const;
135 
136  String generateMzTabSectionRow_(const MzTabPeptideSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
137 
138  String generateMzTabPSMHeader_(Size n_search_engine_scores, const std::vector<String>& optional_columns, size_t& n_columns) const;
139 
140  String generateMzTabSectionRow_(const MzTabPSMSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
141 
142  String generateMzTabSmallMoleculeHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<String>& optional_columns, size_t& n_columns) const;
143 
144  String generateMzTabSectionRow_(const MzTabSmallMoleculeSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
145 
146  String generateMzTabNucleicAcidHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_scores, const std::vector<String>& optional_columns, size_t& n_columns) const;
147 
148  String generateMzTabSectionRow_(const MzTabNucleicAcidSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
149 
150  String generateMzTabOligonucleotideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, const std::vector<String>& optional_columns, size_t& n_columns) const;
151 
152  String generateMzTabSectionRow_(const MzTabOligonucleotideSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
153 
154  String generateMzTabOSMHeader_(Size n_search_engine_scores, const std::vector<String>& optional_columns, size_t& n_columns) const;
155 
156  String generateMzTabSectionRow_(const MzTabOSMSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
157 
159  template <typename SectionRow> void generateMzTabSection_(const std::vector<SectionRow>& rows, const std::vector<String>& optional_columns, const MzTabMetaData& meta, StringList& output, size_t n_header_columns) const
160  {
161  output.reserve(output.size() + rows.size() + 1);
162  for (const auto& row : rows)
163  {
164  size_t n_section_columns = 0;
165  output.push_back(generateMzTabSectionRow_(row, optional_columns, meta, n_section_columns));
166  if (n_header_columns != n_section_columns) throw Exception::Postcondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Header and content differs in columns. Please report this bug to the OpenMS developers.");
167  }
168  }
169 
170  // auxiliary functions
171 
173  static void addOptionalColumnsToSectionRow_(const std::vector<String>& column_names, const std::vector<MzTabOptionalColumnEntry>& column_entries, StringList& output);
174 
175  // extract two integers from string (e.g. search_engine_score[1]_ms_run[2] -> 1,2)
176  static std::pair<int, int> extractIndexPairsFromBrackets_(const String& s);
177 
178  static void sortPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
179 
180  static void keepFirstPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
181 
183  static void partitionIntoRuns_(const std::vector<PeptideIdentification>& pep_ids,
184  const std::vector<ProteinIdentification>& pro_ids,
185  std::map<String, std::vector<PeptideIdentification> >& map_run_to_pepids,
186  std::map<String, std::vector<ProteinIdentification> >& map_run_to_proids
187  );
188 
189 
191  static void createProteinToPeptideLinks_(const std::map<String, std::vector<PeptideIdentification> >& map_run_to_pepids, MapAccPepType& map_run_accession_to_pephits);
192 
194  static String extractProteinAccession_(const PeptideHit& peptide_hit);
195 
197  static String extractPeptideModifications_(const PeptideHit& peptide_hit);
198 
200  static String mapSearchEngineToCvParam_(const String& openms_search_engine_name);
201 
202  static String mapSearchEngineScoreToCvParam_(const String& openms_search_engine_name, double score, String score_type);
203 
204  static String extractNumPeptides_(const String& common_identifier, const String& protein_accession,
205  const MapAccPepType& map_run_accession_to_peptides);
206 
207  // mzTab definition of distinct
208  static String extractNumPeptidesDistinct_(String common_identifier, String protein_accession,
209  const MapAccPepType& map_run_accession_to_peptides);
210 
211  // same as distinct but additional constraint of uniqueness (=maps to exactly one Protein)
212  static String extractNumPeptidesUnambiguous_(String common_identifier, String protein_accession,
213  const MapAccPepType& map_run_accession_to_peptides);
214 
215  static std::map<String, Size> extractNumberOfSubSamples_(const std::map<String, std::vector<ProteinIdentification> >& map_run_to_proids);
216 
217  static void writePeptideHeader_(SVOutStream& output, std::map<String, Size> n_sub_samples);
218 
219  static void writeProteinHeader_(SVOutStream& output, std::map<String, Size> n_sub_samples);
220 
221  static void writeProteinData_(SVOutStream& output,
222  const ProteinIdentification& prot_id,
223  Size run_count,
224  String input_filename,
225  bool has_coverage,
226  const MapAccPepType& map_run_accession_to_peptides,
227  const std::map<String, Size>& map_run_to_num_sub
228  );
229 
230  };
231 
232 } // namespace OpenMS
233 
OpenMS::Exception::Postcondition
Postcondition failed exception.
Definition: Exception.h:180
OpenMS::MzTabProteinSectionRow
PRT - Protein section (Table based)
Definition: MzTab.h:534
OpenMS::FileTypes::IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
ConsensusXMLFile.h
MzTabFile.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::MzTabFile::store_psm_reliability_
bool store_psm_reliability_
Definition: MzTabFile.h:107
FileHandler.h
FileTypes.h
OpenMS::MzTabFile
File adapter for MzTab files.
Definition: MzTabFile.h:58
OpenMS::MzTabFile::store_nucleic_acid_reliability_
bool store_nucleic_acid_reliability_
Definition: MzTabFile.h:114
OpenMS::MzTabFile::store_osm_reliability_
bool store_osm_reliability_
Definition: MzTabFile.h:116
OpenMS::String
A more convenient string class.
Definition: String.h:59
OpenMS::MzTabPeptideSectionRow
PEP - Peptide section (Table based)
Definition: MzTab.h:573
OpenMS::MzTabFile::store_oligonucleotide_reliability_
bool store_oligonucleotide_reliability_
Definition: MzTabFile.h:115
OpenMS::MzTab::exportFeatureMapToMzTab
static MzTab exportFeatureMapToMzTab(const FeatureMap &feature_map, const String &filename)
OpenMS::MzTabFile::store_protein_uri_
bool store_protein_uri_
Definition: MzTabFile.h:109
PeptideEvidence.h
ConsensusMap.h
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:67
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
FeatureXMLFile.h
OpenMS::Constants::c
const double c
OpenMS::ListUtils::contains
static bool contains(const std::vector< T > &container, const E &elem)
Checks whether the element elem is contained in the given container.
Definition: ListUtils.h:146
OpenMS::MzTab
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:800
OpenMS::FileTypes::MZIDENTML
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::MzTabFile::store_peptide_uri_
bool store_peptide_uri_
Definition: MzTabFile.h:110
OpenMS::MzTabPSMSectionRow
PSM - PSM section (Table based)
Definition: MzTab.h:610
ListUtils.h
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::MzTabFile::store_nucleic_acid_goterms_
bool store_nucleic_acid_goterms_
Definition: MzTabFile.h:120
OpenMS::MzTabFile::store_oligonucleotide_uri_
bool store_oligonucleotide_uri_
Definition: MzTabFile.h:118
OpenMS::MzTabMetaData
all meta data of a mzTab file. Please refer to specification for documentation.
Definition: MzTab.h:469
OpenMS::MzTabFile::store_psm_uri_
bool store_psm_uri_
Definition: MzTabFile.h:111
OpenMS::MzIdentMLFile
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
PeptideHit.h
OpenMS::FileTypes::FEATUREXML
OpenMS feature file (.featureXML)
Definition: FileTypes.h:65
OpenMS::SVOutStream
Stream class for writing to comma/tab/...-separated values files.
Definition: SVOutStream.h:54
ProteinHit.h
OpenMS::FeatureXMLFile::load
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
OpenMS::MzTabNucleicAcidSectionRow
NUC - Nucleic acid section (table-based)
Definition: MzTab.h:684
ProteinIdentification.h
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:58
MathFunctions.h
OpenMS::MzTabFile::store
void store(const String &filename, const MzTab &mz_tab) const
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::MzTabFile::store_peptide_reliability_
bool store_peptide_reliability_
Definition: MzTabFile.h:106
MzTab.h
OpenMS::MzTabFile::store_osm_uri_
bool store_osm_uri_
Definition: MzTabFile.h:119
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:80
OpenMS::FeatureMap::getUnassignedPeptideIdentifications
const std::vector< PeptideIdentification > & getUnassignedPeptideIdentifications() const
non-mutable access to the unassigned peptide identifications
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
MetaInfoInterfaceUtils.h
OpenMS::MzIdentMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
ModificationsDB.h
OpenMS::FeatureMap::setProteinIdentifications
void setProteinIdentifications(const std::vector< ProteinIdentification > &protein_identifications)
sets the protein identifications
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:97
OpenMS::FeatureXMLFile
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:68
OpenMS::MzTabOligonucleotideSectionRow
OLI - Oligonucleotide section (table-based)
Definition: MzTab.h:719
OpenMS::MzTabOSMSectionRow
OSM - OSM (oligonucleotide-spectrum match) section (table-based)
Definition: MzTab.h:754
OpenMS::MzTabFile::generateMzTabSection_
void generateMzTabSection_(const std::vector< SectionRow > &rows, const std::vector< String > &optional_columns, const MzTabMetaData &meta, StringList &output, size_t n_header_columns) const
Generate an mzTab section comprising multiple rows of the same type and perform sanity check.
Definition: MzTabFile.h:159
OpenMS::MzTabSmallMoleculeSectionRow
SML Small molecule section (table based)
Definition: MzTab.h:654
OpenMS::MzTabFile::store_protein_goterms_
bool store_protein_goterms_
Definition: MzTabFile.h:113
OpenMS::MzTabFile::store_smallmolecule_reliability_
bool store_smallmolecule_reliability_
Definition: MzTabFile.h:108
OpenMS::Exception::BaseException::what
const char * what() const noexcept override
Returns the error message of the exception.
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:195
PeptideIdentification.h
OpenMS::MzTabFile::MapAccPepType
std::map< std::pair< String, String >, std::vector< PeptideHit > > MapAccPepType
Definition: MzTabFile.h:66
OpenMS::MzTabFile::store_nucleic_acid_uri_
bool store_nucleic_acid_uri_
Definition: MzTabFile.h:117
MzIdentMLFile.h
TOPPBase.h
OpenMS::MzTabFile::store_smallmolecule_uri_
bool store_smallmolecule_uri_
Definition: MzTabFile.h:112
OpenMS::FeatureMap::getProteinIdentifications
const std::vector< ProteinIdentification > & getProteinIdentifications() const
non-mutable access to the protein identifications
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:62
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63
OpenMS::MzTabFile::store_protein_reliability_
bool store_protein_reliability_
Definition: MzTabFile.h:105
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55
StringListUtils.h