OpenMS
FASTAFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Chris Bielow, Nora Wild $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 
15 #include <fstream>
16 #include <utility>
17 #include <vector>
18 
19 namespace OpenMS
20 {
34  class OPENMS_DLLAPI FASTAFile : public ProgressLogger
35  {
36  public:
45  struct FASTAEntry
46  {
50 
51  FASTAEntry() = default;
52 
53  FASTAEntry(const String& id, const String& desc, const String& seq) :
54  identifier(id),
55  description(desc),
56  sequence(seq)
57  {
58  }
59 
60  FASTAEntry(const FASTAEntry& rhs) = default;
61 
62  FASTAEntry(FASTAEntry&& rhs) noexcept
63  :
64  identifier(::std::move(rhs.identifier)),
65  description(::std::move(rhs.description)),
66  sequence(::std::move(rhs.sequence))
67  {
68  }
69 
70 
71  FASTAEntry& operator=(const FASTAEntry& rhs) = default;
72 
73  bool operator==(const FASTAEntry& rhs) const
74  {
75  return identifier == rhs.identifier
76  && description == rhs.description
77  && sequence == rhs.sequence;
78  }
79 
80  bool headerMatches(const FASTAEntry& rhs) const
81  {
82  return identifier == rhs.identifier &&
83  description == rhs.description;
84  }
85 
86  bool sequenceMatches(const FASTAEntry& rhs) const
87  {
88  return sequence == rhs.sequence;
89  }
90  };
91 
93  FASTAFile() = default;
94 
96  ~FASTAFile() override = default;
97 
103  void readStart(const String& filename);
104 
106  void readStartWithProgress(const String& filename, const String& progress_label);
107 
115  bool readNext(FASTAEntry& protein);
116 
120 
122  std::streampos position();
123 
125  bool atEnd();
126 
128  bool setPosition(const std::streampos& pos);
129 
134  void writeStart(const String& filename);
135 
141  void writeNext(const FASTAEntry& protein);
142 
146  void writeEnd();
147 
148 
155  void load(const String& filename, std::vector<FASTAEntry>& data) const;
156 
163  void store(const String& filename, const std::vector<FASTAEntry>& data) const;
164 
165  protected:
170  bool readEntry_(std::string& id, std::string& description, std::string& seq);
171 
172  std::fstream infile_;
173  std::ofstream outfile_;
174  Size entries_read_{0};
175  std::streampos fileSize_{};
176  std::string seq_;
177  std::string id_;
178  std::string description_;
179  };
180 
181 } // namespace OpenMS
This class serves for reading in and writing FASTA files If the protein/gene sequence contains unusua...
Definition: FASTAFile.h:35
void writeEnd()
Closes the file (flush). Called implicitly when FASTAFile object goes out of scope.
void load(const String &filename, std::vector< FASTAEntry > &data) const
loads a FASTA file given by 'filename' and stores the information in 'data' This uses more RAM than r...
bool readNext(FASTAEntry &protein)
Reads the next FASTA entry from file. If you want to read all entries in one go, use load().
bool setPosition(const std::streampos &pos)
seek stream to pos
std::string seq_
sequence of currently read protein
Definition: FASTAFile.h:176
void readStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed reading using readNext().
void readStartWithProgress(const String &filename, const String &progress_label)
same as readStart(), but does internal progress logging whenever readNextWithProgress() is called
~FASTAFile() override=default
Destructor.
void writeStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed writing using writeNext().
std::ofstream outfile_
filestream for writing; init using FastaFile::writeStart()
Definition: FASTAFile.h:173
bool readNextWithProgress(FASTAEntry &protein)
FASTAFile()=default
Default constructor.
bool atEnd()
is stream at EOF?
bool readEntry_(std::string &id, std::string &description, std::string &seq)
Reads a protein entry from the current file position and returns the ID and sequence.
std::string id_
identifier of currently read protein
Definition: FASTAFile.h:177
void writeNext(const FASTAEntry &protein)
Stores the data given by protein. Call writeStart() once before calling writeNext()....
std::fstream infile_
filestream for reading; init using FastaFile::readStart()
Definition: FASTAFile.h:172
std::string description_
description of currently read protein
Definition: FASTAFile.h:178
void store(const String &filename, const std::vector< FASTAEntry > &data) const
stores the data given by 'data' at the file 'filename'
std::streampos position()
current stream position when reading a file
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
bool headerMatches(const FASTAEntry &rhs) const
Definition: FASTAFile.h:80
String sequence
Definition: FASTAFile.h:49
String description
Definition: FASTAFile.h:48
FASTAEntry(const String &id, const String &desc, const String &seq)
Definition: FASTAFile.h:53
FASTAEntry(const FASTAEntry &rhs)=default
bool operator==(const FASTAEntry &rhs) const
Definition: FASTAFile.h:73
String identifier
Definition: FASTAFile.h:47
FASTAEntry & operator=(const FASTAEntry &rhs)=default
bool sequenceMatches(const FASTAEntry &rhs) const
Definition: FASTAFile.h:86
FASTAEntry(FASTAEntry &&rhs) noexcept
Definition: FASTAFile.h:62