OpenMS
Loading...
Searching...
No Matches
ArrowIOHelpers.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
13#include <OpenMS/FORMAT/MSExperimentArrowExport.h> // for ParquetWriteConfig
14
15#include <cstdint>
16#include <memory>
17#include <unordered_set>
18#include <vector>
19
20// Forward declarations
21namespace arrow
22{
23 class Array;
24 class Table;
25}
26
27namespace OpenMS
28{
29class MetaInfoInterface;
30
40namespace ArrowIOHelpers
41{
49 OPENMS_DLLAPI std::string generateUuidV4();
50
59 OPENMS_DLLAPI bool writeTableToParquet(
60 const std::shared_ptr<arrow::Table>& table,
61 const std::string& filename,
62 const ParquetWriteConfig& config = ParquetWriteConfig{});
63
75 OPENMS_DLLAPI bool concatenateAndWriteToParquet(
76 const std::vector<std::shared_ptr<arrow::Table>>& tables,
77 const std::string& filename,
78 const ParquetWriteConfig& config = ParquetWriteConfig{});
79
80 // ---------------------------------------------------------------------------
81 // Read helpers
82 // ---------------------------------------------------------------------------
83
90 OPENMS_DLLAPI std::shared_ptr<arrow::Array> getColumn(
91 const std::shared_ptr<arrow::Table>& table,
92 const std::string& name,
93 bool required = true);
94
96 OPENMS_DLLAPI std::string getStringValue(
97 const std::shared_ptr<arrow::Array>& array,
98 int64_t row);
99
101 OPENMS_DLLAPI double getDoubleValue(
102 const std::shared_ptr<arrow::Array>& array,
103 int64_t row,
104 double default_val = 0.0);
105
107 OPENMS_DLLAPI float getFloatValue(
108 const std::shared_ptr<arrow::Array>& array,
109 int64_t row,
110 float default_val = 0.0f);
111
113 OPENMS_DLLAPI int32_t getInt32Value(
114 const std::shared_ptr<arrow::Array>& array,
115 int64_t row,
116 int32_t default_val = 0);
117
119 OPENMS_DLLAPI int64_t getInt64Value(
120 const std::shared_ptr<arrow::Array>& array,
121 int64_t row,
122 int64_t default_val = 0);
123
125 OPENMS_DLLAPI bool getBoolValue(
126 const std::shared_ptr<arrow::Array>& array,
127 int64_t row,
128 bool default_val = false);
129
131 OPENMS_DLLAPI bool isNull(
132 const std::shared_ptr<arrow::Array>& array,
133 int64_t row);
134
141 OPENMS_DLLAPI void readMetaValues(
142 const std::shared_ptr<arrow::Array>& array,
143 int64_t row,
144 MetaInfoInterface& target,
145 const std::unordered_set<std::string>& excluded_keys = {});
146}
147
148} // namespace OpenMS
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition MetaInfoInterface.h:35
int32_t getInt32Value(const std::shared_ptr< arrow::Array > &array, int64_t row, int32_t default_val=0)
Read an int32 at row, or default_val if null.
bool writeTableToParquet(const std::shared_ptr< arrow::Table > &table, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{})
Write an Arrow table to a Parquet file.
int64_t getInt64Value(const std::shared_ptr< arrow::Array > &array, int64_t row, int64_t default_val=0)
Read an int64 at row, or default_val if null.
bool getBoolValue(const std::shared_ptr< arrow::Array > &array, int64_t row, bool default_val=false)
Read a bool at row, or default_val if null.
float getFloatValue(const std::shared_ptr< arrow::Array > &array, int64_t row, float default_val=0.0f)
Read a float at row, or default_val if null.
double getDoubleValue(const std::shared_ptr< arrow::Array > &array, int64_t row, double default_val=0.0)
Read a double at row, or default_val if null.
std::string generateUuidV4()
Generate a lowercase hyphenated RFC 4122 version-4 UUID string.
void readMetaValues(const std::shared_ptr< arrow::Array > &array, int64_t row, MetaInfoInterface &target, const std::unordered_set< std::string > &excluded_keys={})
Read metavalues from a list<struct{name,value,value_type}> column.
std::shared_ptr< arrow::Array > getColumn(const std::shared_ptr< arrow::Table > &table, const std::string &name, bool required=true)
Fetch a named column from a table, combining chunks if needed.
std::string getStringValue(const std::shared_ptr< arrow::Array > &array, int64_t row)
Read a string at row, or "" if null/out-of-bounds.
bool isNull(const std::shared_ptr< arrow::Array > &array, int64_t row)
Whether array is null at row (or unset)
bool concatenateAndWriteToParquet(const std::vector< std::shared_ptr< arrow::Table > > &tables, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{})
Concatenate a vector of Arrow tables and write the result to a Parquet file.
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Definition ArrowIOHelpers.h:22
Configuration for Parquet file writing.
Definition MSExperimentArrowExport.h:136