OpenMS  2.4.0
FASTAContainer.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 #include <functional>
42 #include <fstream>
43 #include <memory>
44 #include <utility>
45 #include <vector>
46 
47 namespace OpenMS
48 {
49 
50  struct TFI_File;
51  struct TFI_Vector;
52 
75 template<typename TBackend>
76 class FASTAContainer; // prototype
77 
86 template<>
87 class FASTAContainer<TFI_File>
88 {
89 public:
90  FASTAContainer() = delete;
91 
93  FASTAContainer(const String& FASTA_file)
94  : f_(),
95  offsets_(),
96  data_fg_(),
97  data_bg_(),
98  chunk_offset_(0)
99  {
100  f_.readStart(FASTA_file);
101  }
102 
104  size_t getChunkOffset() const
105  {
106  return chunk_offset_;
107  }
108 
116  {
117  chunk_offset_ += data_fg_.size();
118  data_fg_.swap(data_bg_);
119  data_bg_.clear(); // just in case someone calls activateCache() multiple times...
120  return !data_fg_.empty();
121  }
122 
129  bool cacheChunk(int suggested_size)
130  {
131  data_bg_.clear();
132  data_bg_.reserve(suggested_size);
134  for (int i = 0; i < suggested_size; ++i)
135  {
136  std::streampos spos = f_.position();
137  if (!f_.readNext(p)) break;
138  data_bg_.push_back(std::move(p));
139  offsets_.push_back(spos);
140  }
141  return !data_bg_.empty();
142  }
143 
145  size_t chunkSize() const
146  {
147  return data_fg_.size();
148  }
149 
157  const FASTAFile::FASTAEntry& chunkAt(size_t pos) const
158  {
159  return data_fg_[pos];
160  }
161 
175  bool readAt(FASTAFile::FASTAEntry& protein, size_t pos)
176  {
177  // check if position is currently cached...
178  if (chunk_offset_ <= pos && pos < chunk_offset_ + chunkSize())
179  {
180  protein = data_fg_[pos - chunk_offset_];
181  return true;
182  }
183  // read anew from disk...
184  if (pos >= offsets_.size())
185  {
186  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, pos, offsets_.size());
187  }
188  std::streampos spos = f_.position(); // save old position
189  if (!f_.setPosition(offsets_[pos])) return false;
190  bool r = f_.readNext(protein);
191  f_.setPosition(spos); // restore old position
192  return r;
193  }
194 
196  bool empty() const
197  { // trusting the FASTA file can be read...
198  return f_.atEnd() && offsets_.empty();
199  }
200 
202  void reset()
203  {
204  f_.setPosition(0);
205  offsets_.clear();
206  data_fg_.clear();
207  data_bg_.clear();
208  chunk_offset_ = 0;
209  }
210 
211 
217  size_t size() const
218  {
219  return offsets_.size();
220  }
221 
222 private:
224  std::vector<std::streampos> offsets_;
225  std::vector<FASTAFile::FASTAEntry> data_fg_;
226  std::vector<FASTAFile::FASTAEntry> data_bg_;
227  size_t chunk_offset_;
228 };
229 
236 template<>
237 class FASTAContainer<TFI_Vector>
238 {
239 public:
240  FASTAContainer() = delete;
241 
247  FASTAContainer(const std::vector<FASTAFile::FASTAEntry>& data)
248  : data_(data)
249  {
250  }
251 
253  size_t getChunkOffset() const
254  {
255  return 0;
256  }
257 
263  {
264  if (!activate_count_)
265  {
266  activate_count_ = 1;
267  return true;
268  }
269  return false;
270  }
271 
275  bool cacheChunk(int /*suggested_size*/)
276  {
277  if (!cache_count_)
278  {
279  cache_count_ = 1;
280  return true;
281  }
282  return false;
283  }
284 
289  size_t chunkSize() const
290  {
291  return data_.size();
292  }
293 
295  const FASTAFile::FASTAEntry& chunkAt(size_t pos) const
296  {
297  return data_[pos];
298  }
299 
301  bool readAt(FASTAFile::FASTAEntry& protein, size_t pos) const
302  {
303  protein = data_[pos];
304  return true;
305  }
306 
308  bool empty() const
309  {
310  return data_.empty();
311  }
312 
314  size_t size() const
315  {
316  return data_.size();
317  }
318 
320  void reset()
321  {
322  activate_count_ = 0;
323  cache_count_ = 0;
324  }
325 
326 private:
327  const std::vector<FASTAFile::FASTAEntry>& data_;
328  int activate_count_ = 0;
329  int cache_count_ = 0;
330 };
331 
332 } // namespace OpenMS
333 
std::vector< FASTAFile::FASTAEntry > data_bg_
prefetched (background) data; will become the next active data
Definition: FASTAContainer.h:226
A more convenient string class.
Definition: String.h:57
const std::vector< FASTAFile::FASTAEntry > & data_
reference to existing data
Definition: FASTAContainer.h:327
bool readAt(FASTAFile::FASTAEntry &protein, size_t pos) const
fast access to an entry
Definition: FASTAContainer.h:301
Int overflow exception.
Definition: Exception.h:254
size_t size() const
calls size() on underlying vector
Definition: FASTAContainer.h:314
size_t getChunkOffset() const
how many entries were read and got swapped out already
Definition: FASTAContainer.h:104
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
bool activateCache()
no-op (since data is already fully available as vector)
Definition: FASTAContainer.h:262
bool readAt(FASTAFile::FASTAEntry &protein, size_t pos)
Retrieve a FASTA entry at global position (must not be behind the currently active chunk...
Definition: FASTAContainer.h:175
void reset()
required for template parameters!
Definition: FASTAContainer.h:320
const FASTAFile::FASTAEntry & chunkAt(size_t pos) const
fast access to chunked (i.e. all) entries
Definition: FASTAContainer.h:295
std::vector< std::streampos > offsets_
internal byte offsets into FASTA file for random access reading of previous entries.
Definition: FASTAContainer.h:224
template parameter for vector-based FASTA access
Definition: FASTAContainer.h:76
FASTAContainer(const std::vector< FASTAFile::FASTAEntry > &data)
C&#39;tor for already existing data (by reference).
Definition: FASTAContainer.h:247
bool cacheChunk(int suggested_size)
Prefetch a new cache in the background, with up to suggestedSize entries (or fewer upon reaching EOF)...
Definition: FASTAContainer.h:129
bool cacheChunk(int)
no-op (since data is already fully available as vector)
Definition: FASTAContainer.h:275
std::vector< FASTAFile::FASTAEntry > data_fg_
active (foreground) data
Definition: FASTAContainer.h:225
size_t chunk_offset_
number of entries before the current chunk
Definition: FASTAContainer.h:227
void reset()
resets reading of the FASTA file, enables fresh reading of the FASTA from the beginning ...
Definition: FASTAContainer.h:202
FASTAContainer(const String &FASTA_file)
C&#39;tor with FASTA filename.
Definition: FASTAContainer.h:93
size_t size() const
NOT the number of entries in the FASTA file, but merely the number of already read entries (since we ...
Definition: FASTAContainer.h:217
size_t chunkSize() const
number of entries in active cache
Definition: FASTAContainer.h:145
const FASTAFile::FASTAEntry & chunkAt(size_t pos) const
Retrieve a FASTA entry at cache position pos (fast)
Definition: FASTAContainer.h:157
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
size_t getChunkOffset() const
always 0, since this specialization requires/supports no chunking
Definition: FASTAContainer.h:253
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
FASTAFile f_
FASTA file connection.
Definition: FASTAContainer.h:223
bool activateCache()
Swaps in the background cache of entries, read previously via cacheChunk()
Definition: FASTAContainer.h:115
bool empty() const
is the FASTA file empty?
Definition: FASTAContainer.h:196
bool empty() const
calls empty() on underlying vector
Definition: FASTAContainer.h:308
size_t chunkSize() const
active data spans the full range, i.e. size of container
Definition: FASTAContainer.h:289