OpenMS
InspectOutfile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Martin Langwisch $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
18 
19 
20 namespace OpenMS
21 {
31  class OPENMS_DLLAPI InspectOutfile
32  {
33 public:
36 
38  InspectOutfile(const InspectOutfile & inspect_outfile);
39 
41  virtual ~InspectOutfile();
42 
44  InspectOutfile & operator=(const InspectOutfile & inspect_outfile);
45 
47  bool operator==(const InspectOutfile & inspect_outfile) const;
48 
60  std::vector<Size> load(const String & result_filename, std::vector<PeptideIdentification> & peptide_identifications, ProteinIdentification & protein_identification, const double p_value_threshold, const String & database_filename = "");
61 
69  std::vector<Size> getWantedRecords(const String & result_filename, double p_value_threshold);
70 
78  void compressTrieDB(const String & database_filename, const String & index_filename, std::vector<Size> & wanted_records, const String & snd_database_filename, const String & snd_index_filename, bool append = false);
79 
84  void generateTrieDB(const String & source_database_filename, const String & database_filename, const String & index_filename, bool append = false, const String& species = "");
85 
86 
89  void getACAndACType(String line, String & accession, String & accession_type);
90 
95  void getPrecursorRTandMZ(const std::vector<std::pair<String, std::vector<std::pair<Size, Size> > > > & files_and_peptide_identification_with_scan_number, std::vector<PeptideIdentification> & ids);
96 
102  void getLabels(const String & source_database_filename, String & ac_label, String & sequence_start_label, String & sequence_end_label, String & comment_label, String & species_label);
103 
108  std::vector<Size> getSequences(const String & database_filename, const std::map<Size, Size> & wanted_records, std::vector<String> & sequences);
109 
115  void getExperiment(PeakMap & exp, String & type, const String & in_filename)
116  {
117  type.clear();
118  exp.reset();
119  //input file type
120  FileHandler fh;
121  FileTypes::Type in_type = fh.getTypeByContent(in_filename);
122  if (in_type == FileTypes::UNKNOWN)
123  {
124  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Could not determine type of the file. Aborting!", in_filename);
125  }
126  type = FileTypes::typeToName(in_type);
127  fh.loadExperiment(in_filename, exp, {in_type});
128  }
129 
135  bool getSearchEngineAndVersion(const String & cmd_output, ProteinIdentification & protein_identification);
136 
140  void readOutHeader(const String & filename, const String & header_line, Int & spectrum_file_column, Int & scan_column, Int & peptide_column, Int & protein_column, Int & charge_column, Int & MQ_score_column, Int & p_value_column, Int & record_number_column, Int & DB_file_pos_column, Int & spec_file_pos_column, Size & number_of_columns);
141 
142 protected:
147  static const Size db_pos_length_;
148  static const Size trie_db_pos_length_;
149  static const Size protein_name_length_;
150  static const Size record_length_;
151  static const char trie_delimiter_;
152  static const String score_type_;
153  };
154 
155 } //namespace OpenMS
156 
Parse Error exception.
Definition: Exception.h:579
Facilitates file handling by file type recognition.
Definition: FileHandler.h:45
static FileTypes::Type getTypeByContent(const String &filename)
Determines the file type of a file by parsing the first few lines.
void loadExperiment(const String &filename, PeakMap &exp, const std::vector< FileTypes::Type > allowed_types=std::vector< FileTypes::Type >(), ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=false, const bool compute_hash=false)
Loads a file into an MSExperiment.
Definition: InspectOutfile.h:32
InspectOutfile & operator=(const InspectOutfile &inspect_outfile)
assignment operator
static const Size record_length_
length of the whole record
Definition: InspectOutfile.h:150
static const Size trie_db_pos_length_
length of 2)
Definition: InspectOutfile.h:148
static const Size db_pos_length_
length of 1)
Definition: InspectOutfile.h:147
void getExperiment(PeakMap &exp, String &type, const String &in_filename)
Definition: InspectOutfile.h:115
void getLabels(const String &source_database_filename, String &ac_label, String &sequence_start_label, String &sequence_end_label, String &comment_label, String &species_label)
InspectOutfile(const InspectOutfile &inspect_outfile)
copy constructor
bool operator==(const InspectOutfile &inspect_outfile) const
equality operator
virtual ~InspectOutfile()
destructor
static const String score_type_
type of score
Definition: InspectOutfile.h:152
void compressTrieDB(const String &database_filename, const String &index_filename, std::vector< Size > &wanted_records, const String &snd_database_filename, const String &snd_index_filename, bool append=false)
void getPrecursorRTandMZ(const std::vector< std::pair< String, std::vector< std::pair< Size, Size > > > > &files_and_peptide_identification_with_scan_number, std::vector< PeptideIdentification > &ids)
std::vector< Size > getSequences(const String &database_filename, const std::map< Size, Size > &wanted_records, std::vector< String > &sequences)
std::vector< Size > getWantedRecords(const String &result_filename, double p_value_threshold)
void generateTrieDB(const String &source_database_filename, const String &database_filename, const String &index_filename, bool append=false, const String &species="")
void readOutHeader(const String &filename, const String &header_line, Int &spectrum_file_column, Int &scan_column, Int &peptide_column, Int &protein_column, Int &charge_column, Int &MQ_score_column, Int &p_value_column, Int &record_number_column, Int &DB_file_pos_column, Int &spec_file_pos_column, Size &number_of_columns)
read the header of an inspect output file and retrieve various information
bool getSearchEngineAndVersion(const String &cmd_output, ProteinIdentification &protein_identification)
get the search engine and its version from the output of the InsPecT executable without parameters
void getACAndACType(String line, String &accession, String &accession_type)
std::vector< Size > load(const String &result_filename, std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &protein_identification, const double p_value_threshold, const String &database_filename="")
static const char trie_delimiter_
the sequences in the trie database are delimited by this character
Definition: InspectOutfile.h:151
static const Size protein_name_length_
length of 3)
Definition: InspectOutfile.h:149
InspectOutfile()
default constructor
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
void reset()
Clear all internal data (spectra, ranges, metadata)
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
void append(const T &i, String &target)
Definition: StringConversions.h:133
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Type
Actual file types enum.
Definition: FileTypes.h:31
@ UNKNOWN
Unknown file extension.
Definition: FileTypes.h:32
static String typeToName(Type type)
Returns the name/extension of the type.