OpenMS
FASTAFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Chris Bielow, Nora Wild $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 
15 #include <fstream>
16 #include <utility>
17 #include <vector>
18 
19 namespace OpenMS
20 {
34  class OPENMS_DLLAPI FASTAFile : public ProgressLogger
35  {
36  public:
45  struct FASTAEntry
46  {
50 
51  FASTAEntry() = default;
52 
53  FASTAEntry(const String& id, const String& desc, const String& seq) :
54  identifier(id),
55  description(desc),
56  sequence(seq)
57  {
58  }
59 
60  FASTAEntry(const FASTAEntry& rhs) = default;
61 
62  FASTAEntry(FASTAEntry&& rhs) noexcept
63  :
64  identifier(::std::move(rhs.identifier)),
65  description(::std::move(rhs.description)),
66  sequence(::std::move(rhs.sequence))
67  {
68  }
69 
70 
71  FASTAEntry& operator=(const FASTAEntry& rhs) = default;
72 
73  bool operator==(const FASTAEntry& rhs) const
74  {
75  return identifier == rhs.identifier
76  && description == rhs.description
77  && sequence == rhs.sequence;
78  }
79 
80  bool headerMatches(const FASTAEntry& rhs) const
81  {
82  return identifier == rhs.identifier &&
83  description == rhs.description;
84  }
85 
86  bool sequenceMatches(const FASTAEntry& rhs) const
87  {
88  return sequence == rhs.sequence;
89  }
90  };
91 
93  FASTAFile() = default;
94 
96  ~FASTAFile() override = default;
97 
103  void readStart(const String& filename);
104 
112  bool readNext(FASTAEntry& protein);
113 
115  std::streampos position();
116 
118  bool atEnd();
119 
121  bool setPosition(const std::streampos& pos);
122 
127  void writeStart(const String& filename);
128 
134  void writeNext(const FASTAEntry& protein);
135 
139  void writeEnd();
140 
141 
148  void load(const String& filename, std::vector<FASTAEntry>& data) const;
149 
156  void store(const String& filename, const std::vector<FASTAEntry>& data) const;
157 
158  protected:
163  bool readEntry_(std::string& id, std::string& description, std::string& seq);
164 
165  std::fstream infile_;
166  std::ofstream outfile_;
167  Size entries_read_{0};
168  std::streampos fileSize_{};
169  std::string seq_;
170  std::string id_;
171  std::string description_;
172  };
173 
174 } // namespace OpenMS
This class serves for reading in and writing FASTA files If the protein/gene sequence contains unusua...
Definition: FASTAFile.h:35
void writeEnd()
Closes the file (flush). Called implicitly when FASTAFile object goes out of scope.
void load(const String &filename, std::vector< FASTAEntry > &data) const
loads a FASTA file given by 'filename' and stores the information in 'data' This uses more RAM than r...
bool readNext(FASTAEntry &protein)
Reads the next FASTA entry from file. If you want to read all entries in one go, use load().
bool setPosition(const std::streampos &pos)
seek stream to pos
std::string seq_
sequence of currently read protein
Definition: FASTAFile.h:169
void readStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed reading using readNext().
~FASTAFile() override=default
Destructor.
void writeStart(const String &filename)
Prepares a FASTA file given by 'filename' for streamed writing using writeNext().
std::ofstream outfile_
filestream for writing; init using FastaFile::writeStart()
Definition: FASTAFile.h:166
FASTAFile()=default
Default constructor.
bool atEnd()
is stream at EOF?
bool readEntry_(std::string &id, std::string &description, std::string &seq)
Reads a protein entry from the current file position and returns the ID and sequence.
std::string id_
identifier of currently read protein
Definition: FASTAFile.h:170
void writeNext(const FASTAEntry &protein)
Stores the data given by protein. Call writeStart() once before calling writeNext()....
std::fstream infile_
filestream for reading; init using FastaFile::readStart()
Definition: FASTAFile.h:165
std::string description_
description of currently read protein
Definition: FASTAFile.h:171
void store(const String &filename, const std::vector< FASTAEntry > &data) const
stores the data given by 'data' at the file 'filename'
std::streampos position()
current stream position
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
bool headerMatches(const FASTAEntry &rhs) const
Definition: FASTAFile.h:80
String sequence
Definition: FASTAFile.h:49
String description
Definition: FASTAFile.h:48
FASTAEntry(const String &id, const String &desc, const String &seq)
Definition: FASTAFile.h:53
FASTAEntry(const FASTAEntry &rhs)=default
bool operator==(const FASTAEntry &rhs) const
Definition: FASTAFile.h:73
String identifier
Definition: FASTAFile.h:47
FASTAEntry & operator=(const FASTAEntry &rhs)=default
bool sequenceMatches(const FASTAEntry &rhs) const
Definition: FASTAFile.h:86
FASTAEntry(FASTAEntry &&rhs) noexcept
Definition: FASTAFile.h:62