OpenMS
Loading...
Searching...
No Matches
MSExperimentArrowExport.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
12
13#ifdef WITH_PARQUET
14
17
18#include <cstdint>
19#include <vector>
20#include <string>
21
22// Forward declarations for Arrow C Data Interface structs (opaque pointers only)
23// Full definitions are in <arrow/c/abi.h>, included only in MSExperimentArrowExport.cpp
24struct ArrowSchema;
25struct ArrowArray;
26
27namespace OpenMS
28{
29
45enum class ArrowExportFormat
46{
47 Long,
48 SemiWide
49};
50
62struct OPENMS_DLLAPI ArrowSpectraExportConfig
63{
65 ArrowExportFormat format = ArrowExportFormat::Long;
66
68 std::vector<UInt> ms_levels;
69
71 double min_rt = 0;
72
74 double max_rt = 0;
75
77 double min_mz = 0;
78
80 double max_mz = 0;
81
88 std::vector<std::string> columns;
89
91 bool include_precursor_info = true;
92
94 bool include_ion_mobility = true;
95};
96
97
106struct OPENMS_DLLAPI ArrowChromatogramExportConfig
107{
109 ArrowExportFormat format = ArrowExportFormat::Long;
110
112 double min_rt = 0;
113
115 double max_rt = 0;
116
118 std::vector<std::string> columns;
119};
120
121
137struct OPENMS_DLLAPI ParquetWriteConfig
138{
140 enum class Compression
141 {
142 NONE,
143 SNAPPY,
144 GZIP,
145 LZ4,
146 ZSTD
147 };
148
150 Compression compression = Compression::ZSTD;
151
156 int compression_level = 3;
157
161 int64_t row_group_size = 128 * 1024 * 1024;
162
166 bool write_statistics = true;
167
170 int64_t data_page_size = 1024 * 1024;
171};
172
173
186class OPENMS_DLLAPI MSExperimentArrowExport
187{
188public:
199 static std::vector<std::string> getSpectraArrowColumnNames(
200 const MSExperiment& exp,
201 const ArrowSpectraExportConfig& config = ArrowSpectraExportConfig{});
202
203
211 static std::vector<std::string> getChromatogramArrowColumnNames(
212 const MSExperiment& exp,
213 const ArrowChromatogramExportConfig& config = ArrowChromatogramExportConfig{});
214
215
232 static bool exportSpectraToArrowCDataInterface(
233 const MSExperiment& exp,
234 const ArrowSpectraExportConfig& config,
235 ::ArrowSchema* out_schema,
236 ::ArrowArray* out_array);
237
238
248 static bool exportChromatogramsToArrowCDataInterface(
249 const MSExperiment& exp,
250 const ArrowChromatogramExportConfig& config,
251 ::ArrowSchema* out_schema,
252 ::ArrowArray* out_array);
253
254
326 static bool exportSpectraToParquet(
327 const MSExperiment& exp,
328 const String& filename,
329 const ArrowSpectraExportConfig& config = ArrowSpectraExportConfig{},
330 const ParquetWriteConfig& parquet_config = ParquetWriteConfig{});
331
332
345 static bool exportChromatogramsToParquet(
346 const MSExperiment& exp,
347 const String& filename,
348 const ArrowChromatogramExportConfig& config = ArrowChromatogramExportConfig{},
349 const ParquetWriteConfig& parquet_config = ParquetWriteConfig{});
350}; // class MSExperimentArrowExport
351
352} // namespace OpenMS
353
354#endif // WITH_PARQUET
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19