OpenMS
Loading...
Searching...
No Matches
ArrowExport.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
12
13#ifdef WITH_PARQUET
14
17
18#include <cstdint>
19#include <memory>
20#include <vector>
21#include <string>
22
23// Forward declarations for Arrow C Data Interface structs (opaque pointers only)
24// Full definitions are in <arrow/c/abi.h>, included only in ArrowExport.cpp
25struct ArrowSchema;
26struct ArrowArray;
27
28// Forward declarations - avoid exposing Arrow types in header
29namespace arrow
30{
31 class Table;
32}
33
34namespace OpenMS
35{
36
52enum class ArrowExportFormat
53{
54 Long,
55 SemiWide
56};
57
69struct OPENMS_DLLAPI ArrowSpectraExportConfig
70{
72 ArrowExportFormat format = ArrowExportFormat::Long;
73
75 std::vector<UInt> ms_levels;
76
78 double min_rt = 0;
79
81 double max_rt = 0;
82
84 double min_mz = 0;
85
87 double max_mz = 0;
88
95 std::vector<std::string> columns;
96
98 bool include_precursor_info = true;
99
101 bool include_ion_mobility = true;
102};
103
104
113struct OPENMS_DLLAPI ArrowChromatogramExportConfig
114{
116 ArrowExportFormat format = ArrowExportFormat::Long;
117
119 double min_rt = 0;
120
122 double max_rt = 0;
123
125 std::vector<std::string> columns;
126};
127
128
144struct OPENMS_DLLAPI ParquetWriteConfig
145{
147 enum class Compression
148 {
149 NONE,
150 SNAPPY,
151 GZIP,
152 LZ4,
153 ZSTD
154 };
155
157 Compression compression = Compression::ZSTD;
158
163 int compression_level = 3;
164
168 int64_t row_group_size = 128 * 1024 * 1024;
169
173 bool write_statistics = true;
174
177 int64_t data_page_size = 1024 * 1024;
178};
179
180
193class OPENMS_DLLAPI ArrowExport
194{
195public:
236 static std::shared_ptr<arrow::Table> exportSpectraToArrow(
237 const MSExperiment& exp,
238 const ArrowSpectraExportConfig& config = ArrowSpectraExportConfig{});
239
240
251 static std::vector<std::string> getSpectraArrowColumnNames(
252 const MSExperiment& exp,
253 const ArrowSpectraExportConfig& config = ArrowSpectraExportConfig{});
254
255
281 static std::shared_ptr<arrow::Table> exportChromatogramsToArrow(
282 const MSExperiment& exp,
283 const ArrowChromatogramExportConfig& config = ArrowChromatogramExportConfig{});
284
285
293 static std::vector<std::string> getChromatogramArrowColumnNames(
294 const MSExperiment& exp,
295 const ArrowChromatogramExportConfig& config = ArrowChromatogramExportConfig{});
296
297
314 static bool exportSpectraToArrowCDataInterface(
315 const MSExperiment& exp,
316 const ArrowSpectraExportConfig& config,
317 ::ArrowSchema* out_schema,
318 ::ArrowArray* out_array);
319
320
330 static bool exportChromatogramsToArrowCDataInterface(
331 const MSExperiment& exp,
332 const ArrowChromatogramExportConfig& config,
333 ::ArrowSchema* out_schema,
334 ::ArrowArray* out_array);
335
336
382 static bool exportSpectraToParquet(
383 const MSExperiment& exp,
384 const String& filename,
385 const ArrowSpectraExportConfig& config = ArrowSpectraExportConfig{},
386 const ParquetWriteConfig& parquet_config = ParquetWriteConfig{});
387
388
401 static bool exportChromatogramsToParquet(
402 const MSExperiment& exp,
403 const String& filename,
404 const ArrowChromatogramExportConfig& config = ArrowChromatogramExportConfig{},
405 const ParquetWriteConfig& parquet_config = ParquetWriteConfig{});
406}; // class ArrowExport
407
408} // namespace OpenMS
409
410#endif // WITH_PARQUET
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19