OpenMS
Loading...
Searching...
No Matches
ParquetFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Justin Sing $
6// $Authors: Justin Sing $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
14#include <OpenMS/SYSTEM/File.h>
15
16#include <arrow/api.h>
17#include <arrow/io/file.h>
18#include <parquet/arrow/writer.h>
19#include <parquet/arrow/reader.h>
20
21#include <memory>
22#include <string>
23#include <vector>
24
25namespace OpenMS
26{
27
47 class OPENMS_DLLAPI ParquetFile
48 {
49 public:
53
62 static void appendOrThrow(const arrow::Status& status, const std::string& column);
63
74 template <typename BuilderT>
75 static std::shared_ptr<arrow::Array> finishArray(BuilderT& builder, const std::string& name)
76 {
77 std::shared_ptr<arrow::Array> array;
78 auto status = builder.Finish(&array);
79 if (!status.ok())
80 {
81 throw_finish_error_(name, status.ToString());
82 }
83 return array;
84 }
85
87
91
102 static void writeTable(const std::shared_ptr<arrow::Table>& table, const String& filename,
103 int64_t row_group_size = 262144);
104
116 static std::shared_ptr<arrow::Table> readTable(const String& filename);
122 static std::shared_ptr<arrow::Table> readTable(const std::shared_ptr<arrow::io::RandomAccessFile>& infile);
123
125
129
141 static std::shared_ptr<arrow::Array> getColumn(const std::shared_ptr<arrow::Table>& table,
142 const std::string& name);
143
154 static std::shared_ptr<arrow::Array> getOptionalColumn(const std::shared_ptr<arrow::Table>& table,
155 const std::string& name);
156
158
162
178 static int64_t getInt64(const std::shared_ptr<arrow::Array>& array, int64_t row,
179 int64_t default_value, bool allow_null);
180
196 static double getDouble(const std::shared_ptr<arrow::Array>& array, int64_t row,
197 double default_value, bool allow_null);
198
214 static bool getBool(const std::shared_ptr<arrow::Array>& array, int64_t row,
215 bool default_value, bool allow_null);
216
229 static std::string getString(const std::shared_ptr<arrow::Array>& array, int64_t row);
230
243 static std::vector<std::string> getStringList(const std::shared_ptr<arrow::Array>& array, int64_t row);
244
246
250
258 static std::string jsonEscape(const String& input);
259
264 static int64_t rowCount(const String& filename);
265
267
268
269 private:
271 static void throw_finish_error_(const std::string& name, const std::string& error);
272 };
273
274} // namespace OpenMS
Shared utilities for reading, writing, and packaging Parquet-based file formats.
Definition ParquetFile.h:48
static void throw_finish_error_(const std::string &name, const std::string &error)
Internal helper to throw a consistent error from finishArray.
static std::shared_ptr< arrow::Array > finishArray(BuilderT &builder, const std::string &name)
Finish an Arrow builder and return the resulting Array.
Definition ParquetFile.h:75
static void writeTable(const std::shared_ptr< arrow::Table > &table, const String &filename, int64_t row_group_size=262144)
Write an Arrow Table to a Parquet file.
static std::string jsonEscape(const String &input)
Escape a string for safe embedding into JSON values.
static int64_t rowCount(const String &filename)
Return the number of rows in a parquet file using the low-level parquet reader metadata....
static std::shared_ptr< arrow::Table > readTable(const String &filename)
Read a Parquet file into an Arrow Table.
static void appendOrThrow(const arrow::Status &status, const std::string &column)
Append a value to an Arrow builder, throwing on failure.
static std::vector< std::string > getStringList(const std::shared_ptr< arrow::Array > &array, int64_t row)
Read a list of strings from an Arrow Array.
static int64_t getInt64(const std::shared_ptr< arrow::Array > &array, int64_t row, int64_t default_value, bool allow_null)
Read an integer value from an Arrow Array with type coercion.
static double getDouble(const std::shared_ptr< arrow::Array > &array, int64_t row, double default_value, bool allow_null)
Read a floating-point value from an Arrow Array with type coercion.
static std::string getString(const std::shared_ptr< arrow::Array > &array, int64_t row)
Read a string value from an Arrow Array.
static std::shared_ptr< arrow::Array > getOptionalColumn(const std::shared_ptr< arrow::Table > &table, const std::string &name)
Get an optional column from an Arrow Table by name.
static bool getBool(const std::shared_ptr< arrow::Array > &array, int64_t row, bool default_value, bool allow_null)
Read a boolean value from an Arrow Array with type coercion.
static std::shared_ptr< arrow::Array > getColumn(const std::shared_ptr< arrow::Table > &table, const std::string &name)
Get a required column from an Arrow Table by name.
static std::shared_ptr< arrow::Table > readTable(const std::shared_ptr< arrow::io::RandomAccessFile > &infile)
Read a Parquet file from an Arrow RandomAccessFile into an Arrow Table.
A more convenient string class.
Definition String.h:32
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19