OpenMS  2.6.0
MsInspectFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 #include <OpenMS/KERNEL/Feature.h>
41 #include <OpenMS/FORMAT/TextFile.h>
42 
43 #include <fstream>
44 #include <vector>
45 
46 namespace OpenMS
47 {
60  class OPENMS_DLLAPI MsInspectFile
61  {
62 public:
64  MsInspectFile();
66  virtual ~MsInspectFile();
67 
76  template <typename FeatureMapType>
77  void load(const String& filename, FeatureMapType& feature_map)
78  {
79  // load input
80  TextFile input(filename);
81 
82  // reset map
83  FeatureMapType fmap;
84  feature_map = fmap;
85 
86  bool first_line = true;
87  for (TextFile::ConstIterator it = input.begin(); it != input.end(); ++it)
88  {
89  String line = *it;
90 
91  //ignore comment lines
92  if (line.empty() || line[0] == '#') continue;
93 
94  //skip leader line
95  if (first_line)
96  {
97  first_line = false;
98  continue;
99  }
100 
101  //split lines: scan\ttime\tmz\taccurateMZ\tmass\tintensity\tcharge\tchargeStates\tkl\tbackground\tmedian\tpeaks\tscanFirst\tscanLast\tscanCount\ttotalIntensity\tsumSquaresDist\tdescription
102  std::vector<String> parts;
103  line.split('\t', parts);
104 
105  if (parts.size() < 18)
106  {
107  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "", String("Failed to convert line ") + String((it - input.begin()) + 1) + ". Not enough columns (expected 18 or more, got " + String(parts.size()) + ")");
108  }
109 
110  //create feature
111  Feature f;
112  Size column_to_convert = 0;
113  try
114  {
115  column_to_convert = 1;
116  f.setRT(parts[1].toDouble());
117  column_to_convert = 2;
118  f.setMZ(parts[2].toDouble());
119  column_to_convert = 5;
120  f.setIntensity(parts[5].toDouble());
121  column_to_convert = 6;
122  f.setCharge(parts[6].toInt());
123  column_to_convert = 8;
124  f.setOverallQuality(parts[8].toDouble());
125 
126  column_to_convert = 3;
127  f.setMetaValue("accurateMZ", parts[3]);
128  column_to_convert = 4;
129  f.setMetaValue("mass", parts[4].toDouble());
130  column_to_convert = 7;
131  f.setMetaValue("chargeStates", parts[7].toInt());
132  column_to_convert = 9;
133  f.setMetaValue("background", parts[9].toDouble());
134  column_to_convert = 10;
135  f.setMetaValue("median", parts[10].toDouble());
136  column_to_convert = 11;
137  f.setMetaValue("peaks", parts[11].toInt());
138  column_to_convert = 12;
139  f.setMetaValue("scanFirst", parts[12].toInt());
140  column_to_convert = 13;
141  f.setMetaValue("scanLast", parts[13].toInt());
142  column_to_convert = 14;
143  f.setMetaValue("scanCount", parts[14].toInt());
144  column_to_convert = 15;
145  f.setMetaValue("totalIntensity", parts[15].toDouble());
146  column_to_convert = 16;
147  f.setMetaValue("sumSquaresDist", parts[16].toDouble());
148  }
149  catch ( Exception::BaseException& )
150  {
151  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "", String("Failed to convert value in column ") + String(column_to_convert + 1) + " into a number (line '" + String((it - input.begin()) + 1) + ")");
152  }
153  f.setMetaValue("description", parts[17]);
154  feature_map.push_back(f);
155  }
156 
157  }
158 
166  template <typename SpectrumType>
167  void store(const String& filename, const SpectrumType& spectrum) const
168  {
169  std::cerr << "Store() for MsInspectFile not implemented. Filename was: " << filename << ", spec of size " << spectrum.size() << "\n";
170  throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
171  }
172 
173  };
174 } // namespace OpenMS
175 
OpenMS::MsInspectFile
File adapter for MsInspect files.
Definition: MsInspectFile.h:60
LogStream.h
DefaultParamHandler.h
OpenMS::File::rename
static bool rename(const String &from, const String &to, bool overwrite_existing=true, bool verbose=true)
Rename a file.
OpenMS::ProteinIdentification::SearchParameters::digestion_enzyme
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:273
OpenMS::PeptideHit::getCharge
Int getCharge() const
returns the charge of the peptide
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
OpenMS::TextFile::end
ConstIterator end() const
Gives access to the underlying text buffer.
FileHandler.h
TextFile.h
OpenMS::Exception::IllegalArgument
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::ResidueModification
Representation of a modification.
Definition: ResidueModification.h:76
OpenMS::PeptideHit::getSequence
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
OpenMS::PeptideIdentification::insertHit
void insertHit(const PeptideHit &hit)
Appends a peptide hit.
OpenMS::DigestionEnzymeDB< DigestionEnzymeProtein, ProteaseDB >::getInstance
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:69
OpenMS::PeptideIdentification::setIdentifier
void setIdentifier(const String &id)
sets the identifier
OpenMS::FileTypes::PEPXML
TPP pepXML file (.pepXML)
Definition: FileTypes.h:75
OpenMS::ProteinIdentification::setSearchEngineVersion
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
OpenMS::MzMLFile
File adapter for MzML files.
Definition: MzMLFile.h:55
OpenMS::String
A more convenient string class.
Definition: String.h:59
JavaInfo.h
OpenMS::MSExperiment::begin
Iterator begin()
Definition: MSExperiment.h:157
OpenMS::PeptideEvidence::setProteinAccession
void setProteinAccession(const String &s)
set the protein accession the peptide matches to. If not available set to empty string.
MzMLFile.h
OpenMS::ResidueModification::getId
const String & getId() const
returns the identifier of the modification
Feature.h
OpenMS::PeptideIdentification::setRT
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
OpenMS::DigestionEnzymeDB::getAllNames
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
OpenMS::PeakFileOptions::addMSLevel
void addMSLevel(int level)
adds a desired MS level for peaks to load
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
OpenMS::ProteinHit
Representation of a protein hit.
Definition: ProteinHit.h:58
OpenMS::ProteinIdentification::MONOISOTOPIC
Definition: ProteinIdentification.h:249
OpenMS::PeptideEvidence::setAABefore
void setAABefore(const char acid)
sets the amino acid single letter code before the sequence (preceding amino acid in the protein)....
OpenMS::AASequence::toString
String toString() const
returns the peptide as string with modifications embedded in brackets
OpenMS::ProteaseDB::getAllMSGFNames
void getAllMSGFNames(std::vector< String > &all_names) const
returns all the enzyme names available for MSGFPlus
OpenMS::File::remove
static bool remove(const String &file)
Removes a file (if it exists).
SpectrumMetaDataLookup.h
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::SpectrumSettings::PROFILE
profile data
Definition: SpectrumSettings.h:74
OpenMS::Peak2D::setIntensity
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:172
OpenMS::PeptideIdentification::getHits
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
OpenMS::DateTime::now
static DateTime now()
Returns the current date and time.
OPENMS_LOG_FATAL_ERROR
#define OPENMS_LOG_FATAL_ERROR
Macro to be used if fatal error are reported (processing stops)
Definition: LogStream.h:450
OpenMS::ModificationsDB::getInstance
static ModificationsDB * getInstance()
Returns a pointer to the modifications DB (singleton)
OpenMS::ProteinIdentification::SearchParameters::charges
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:264
OpenMS::ProteinHit::setAccession
void setAccession(const String &accession)
sets the accession of the protein
OpenMS::MsInspectFile::load
void load(const String &filename, FeatureMapType &feature_map)
Loads a MsInspect file into a featureXML.
Definition: MsInspectFile.h:77
OpenMS::MzIdentMLFile
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
OpenMS::DigestionEnzymeDB::getEnzyme
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:98
OpenMS::File::exists
static bool exists(const String &file)
Method used to test if a file exists.
OpenMS::Peak2D::setRT
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
OpenMS::PeptideEvidence::N_TERMINAL_AA
static const char N_TERMINAL_AA
Definition: PeptideEvidence.h:60
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::PeptideHit::addPeptideEvidence
void addPeptideEvidence(const PeptideEvidence &peptide_evidence)
adds information on a peptide that is (potentially) identified by this PSM
Exception.h
OpenMS::ProteinIdentification::SearchParameters::variable_modifications
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:267
OpenMS::ProteinIdentification::SearchParameters::mass_type
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:265
OpenMS::ResidueModification::getOrigin
char getOrigin() const
Returns the origin (i.e. modified amino acid)
ProteaseDB.h
OpenMS::PeptideIdentification::sort
void sort()
Sorts the hits by score.
OpenMS::ProteinIdentification::setPrimaryMSRunPath
void setPrimaryMSRunPath(const StringList &s, bool raw=false)
OpenMS::MetaInfoInterface::setMetaValue
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
OpenMS::MzMLFile::load
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
OpenMS::SpectrumSettings::SpectrumType
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:70
OpenMS::MSExperiment::iterator
Base::iterator iterator
Definition: MSExperiment.h:124
OpenMS::ProteinIdentification::setScoreType
void setScoreType(const String &type)
Sets the protein score type.
int
OpenMS::ProteinIdentification::SearchParameters::db
String db
The used database.
Definition: ProteinIdentification.h:261
OpenMS::PeptideEvidence::UNKNOWN_AA
static const char UNKNOWN_AA
Definition: PeptideEvidence.h:57
QProcess
ProteinIdentification.h
OpenMS::ResidueModification::getDiffMonoMass
double getDiffMonoMass() const
returns the diff monoisotopic mass, or 0.0 if not set
OpenMS::PeptideEvidence
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
OpenMS::PeptideIdentification::setHigherScoreBetter
void setHigherScoreBetter(bool value)
sets the peptide score orientation
OpenMS::Exception::BaseException
Exception base class.
Definition: Exception.h:89
OpenMS::String::split
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
OpenMS::Peak2D::setMZ
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
PepXMLFile.h
OpenMS::Feature::setOverallQuality
void setOverallQuality(QualityType q)
Set the overall quality.
OpenMS::String::toQString
QString toQString() const
Conversion to Qt QString.
CsvFile.h
OpenMS::Exception::NotImplemented
Not implemented exception.
Definition: Exception.h:436
OpenMS::ProteinIdentification::setSearchParameters
void setSearchParameters(const SearchParameters &search_parameters)
Sets the search parameters.
OpenMS::PeakFileOptions::setFillData
void setFillData(bool only)
sets whether to fill the actual data into the container (spectrum/chromatogram)
seqan::find
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::ProteinIdentification::SearchParameters::fixed_modifications
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:266
OpenMS::MzIdentMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
ModificationsDB.h
OpenMS::CsvFile
This class handles csv files. Currently only loading is implemented.
Definition: CsvFile.h:49
OpenMS::JavaInfo::canRun
static bool canRun(const String &java_executable, bool verbose_on_error=true)
Determine if Java is installed and reachable.
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::ProteinIdentification::setSearchEngine
void setSearchEngine(const String &search_engine)
Sets the search engine type.
OpenMS::ProteinIdentification::SearchParameters::precursor_mass_tolerance
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:271
OpenMS::Constants::UserParam::ISOTOPE_ERROR
const std::string ISOTOPE_ERROR
OpenMS::TextFile::begin
ConstIterator begin() const
Gives access to the underlying text buffer.
OPENMS_LOG_ERROR
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
OpenMS::File::TempDir
Class representing a temporary directory.
Definition: File.h:63
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
OpenMS::Exception::ParseError
Parse Error exception.
Definition: Exception.h:622
OpenMS::ListUtils
Collection of utility functions for management of vectors.
Definition: ListUtils.h:77
OpenMS::String::substr
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
OpenMS::Feature
An LC-MS feature.
Definition: Feature.h:70
OpenMS::MSExperiment::end
Iterator end()
Definition: MSExperiment.h:167
OpenMS::File::findDatabase
static String findDatabase(const String &db_name)
OpenMS::MSExperiment::getSpectra
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
OpenMS::PeptideHit::getScore
double getScore() const
returns the PSM score
OpenMS::PepXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
OpenMS::Exception::FileNotWritable
File not writable exception.
Definition: Exception.h:551
OpenMS::File::readable
static bool readable(const String &file)
Return true if the file exists and is readable.
OpenMS::MSExperiment::getPrimaryMSRunPath
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
OpenMS::PeptideEvidence::C_TERMINAL_AA
static const char C_TERMINAL_AA
Definition: PeptideEvidence.h:61
OpenMS::TOPPBase::ExitCodes
ExitCodes
Exit codes.
Definition: TOPPBase.h:149
String.h
OpenMS::ModificationsDB::getAllSearchModifications
void getAllSearchModifications(std::vector< String > &modifications) const
Collects all modifications that can be used for identification searches.
OpenMS::ProteinIdentification::setDateTime
void setDateTime(const DateTime &date)
Sets the date of the protein identification run.
OpenMS::DigestionEnzymeProtein::getMSGFID
Int getMSGFID() const
returns the MSGFPlus enzyme id
OpenMS::ProteinIdentification::SearchParameters::precursor_mass_tolerance_ppm
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:272
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:111
OpenMS::PepXMLFile
Used to load and store PepXML files.
Definition: PepXMLFile.h:63
OpenMS::AASequence::fromString
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:195
OpenMS::ResidueModification::getTermSpecificityName
String getTermSpecificityName(TermSpecificity term_spec=NUMBER_OF_TERM_SPECIFICITY) const
Returns the name of the terminal specificity.
OpenMS::ProteinIdentification::setIdentifier
void setIdentifier(const String &id)
Sets the identifier.
OpenMS::Exception::FileEmpty
File is empty.
Definition: Exception.h:594
OpenMS::ProteinIdentification::setHits
void setHits(const std::vector< ProteinHit > &hits)
Sets the protein hits.
OpenMS::MSSpectrum
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:258
OpenMS::PeptideIdentification::setMZ
void setMZ(double mz)
sets the MZ of the MS2 spectrum
OpenMS::TextFile::ConstIterator
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
OpenMS::ProteinIdentification::SearchParameters::enzyme_term_specificity
EnzymaticDigestion::Specificity enzyme_term_specificity
The number of required cutting-rule matching termini during search (none=0, semi=1,...
Definition: ProteinIdentification.h:274
seqan::position
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:561
OpenMS::ModificationsDB::getModification
const ResidueModification * getModification(Size index) const
Returns the modification with the given index. note: out-of-bounds check is only performed in debug m...
OpenMS::DateTime
DateTime Class.
Definition: DateTime.h:54
File.h
OpenMS::PeptideEvidence::setAAAfter
void setAAAfter(const char acid)
sets the amino acid single letter code after the sequence (subsequent amino acid in the protein)....
MzIdentMLFile.h
OpenMS::BaseFeature::setCharge
void setCharge(const ChargeType &ch)
Set charge state.
OpenMS::MzMLFile::getOptions
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
OpenMS::DefaultParamHandler::writeParametersToMetaValues
static void writeParametersToMetaValues(const Param &write_this, MetaInfoInterface &write_here, const String &prefix="")
Writes all parameters to meta values.
OpenMS::MsInspectFile::store
void store(const String &filename, const SpectrumType &spectrum) const
Stores a featureXML as a MsInspect file.
Definition: MsInspectFile.h:167
TOPPBase.h
OpenMS::PeptideIdentification::setScoreType
void setScoreType(const String &type)
sets the peptide score type
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:63
OpenMS::SpectrumMetaDataLookup::addMissingRTsToPeptideIDs
static bool addMissingRTsToPeptideIDs(std::vector< PeptideIdentification > &peptides, const String &filename, bool stop_on_error=false)
Add missing retention time values to peptide identifications based on raw data.
OpenMS::FileHandler::swapExtension
static String swapExtension(const String &filename, const FileTypes::Type new_type)
Tries to find and remove a known file extension, and append the new one.
OpenMS::DateTime::getDate
void getDate(UInt &month, UInt &day, UInt &year) const
Fills the arguments with the date.
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55