OpenMS
Loading...
Searching...
No Matches
XICParquetFile.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Justin Sing $
6// $Authors: Justin Sing $
7// --------------------------------------------------------------------------
8
9#pragma once
10
14
15#include <vector>
16
17namespace OpenMS
18{
59 class OPENMS_DLLAPI XICParquetFile
60 {
61 public:
65 struct OPENMS_DLLAPI XICChromatogram
66 {
67 Int64 run_id{0};
69 Int64 ms_level{0};
70
71 bool has_precursor_id{false};
72 Int64 precursor_id{0};
73 bool has_transition_id{false};
74 Int64 transition_id{0};
76 bool has_precursor_charge{false};
77 Int64 precursor_charge{0};
78 bool has_product_charge{false};
79 Int64 product_charge{0};
80 bool has_detecting_transition{false};
81 Int64 detecting_transition{0};
82 bool has_precursor_decoy{false};
83 Int64 precursor_decoy{0};
84 bool has_product_decoy{false};
85 Int64 product_decoy{0};
86 bool has_transition_ordinal{false};
87 Int64 transition_ordinal{0};
90
91 std::vector<double> rt;
92 std::vector<double> intensity;
93 };
94
98 struct OPENMS_DLLAPI XICRunInfo
99 {
100 Int64 run_id{0};
102 };
103
113 struct OPENMS_DLLAPI XICAnalyte
114 {
115 bool has_precursor_id{false};
116 Int64 precursor_id{0};
118 bool has_precursor_charge{false};
119 Int64 precursor_charge{0};
120 bool has_precursor_decoy{false};
121 Int64 precursor_decoy{0};
122
123 bool has_transition_id{false};
124 Int64 transition_id{0};
125 bool has_product_charge{false};
126 Int64 product_charge{0};
127 bool has_transition_ordinal{false};
128 Int64 transition_ordinal{0};
129 bool has_detecting_transition{false};
130 Int64 detecting_transition{0};
131 bool has_product_decoy{false};
132 Int64 product_decoy{0};
135
136 std::vector<Int64> transition_ids;
137 std::vector<Int64> product_charges;
138 std::vector<Int64> transition_ordinals;
139 std::vector<Int64> detecting_transitions;
140 std::vector<Int64> product_decoys;
141 std::vector<String> transition_types;
142 std::vector<String> annotations;
143 };
144
150 explicit XICParquetFile(const String& filename);
151
157 explicit XICParquetFile(const std::vector<String>& filenames);
158 XICParquetFile(const XICParquetFile& rhs) = default;
160
168 const String& getFilename() const;
169
175 const std::vector<String>& getFilenames() const;
176
182 void load(std::vector<XICChromatogram>& output) const;
183
198 void getChromatograms(std::vector<XICChromatogram>& output,
199 Int64 precursor_id = -1,
200 Int64 transition_id = -1,
201 const String& modified_sequence = "",
202 Int64 precursor_charge = -1,
203 Int64 product_charge = -1,
204 Int64 ms_level = -1,
205 Int64 run_id = -1,
206 const String& filter = "") const;
207
214 void getChromatograms(std::vector<XICChromatogram>& output,
215 const ParquetFilter& filter) const;
216
223 void getChromatograms(std::vector<XICChromatogram>& output,
224 const ParquetFilterBuilder& filter) const;
225
232 void getRuns(std::vector<XICRunInfo>& output) const;
233
249 void getAnalytes(std::vector<XICAnalyte>& output,
250 const std::vector<String>& columns = {},
251 bool nest_transitions = true) const;
252
258 void getColumns(std::vector<String>& output) const;
259
260 private:
261 void getChromatograms_(std::vector<XICChromatogram>& output,
262 const FilterExpression& extra_filter,
263 Int64 precursor_id,
264 Int64 transition_id,
265 const String& modified_sequence,
266 Int64 precursor_charge,
267 Int64 product_charge,
268 Int64 ms_level,
269 Int64 run_id,
270 const String& filter) const;
271
273 std::vector<String> filenames_;
274 };
275
282} // namespace OpenMS
Fluent builder for ParquetFilter objects.
Definition ParquetFilter.h:240
Typed filter builder for parquet-backed datasets.
Definition ParquetFilter.h:74
A more convenient string class.
Definition String.h:34
Reader for OpenSWATH chromatogram Parquet files (.xic).
Definition XICParquetFile.h:60
String modified_sequence
Definition XICParquetFile.h:75
void getAnalytes(std::vector< XICAnalyte > &output, const std::vector< String > &columns={}, bool nest_transitions=true) const
Return unique analyte metadata.
void getChromatograms(std::vector< XICChromatogram > &output, Int64 precursor_id=-1, Int64 transition_id=-1, const String &modified_sequence="", Int64 precursor_charge=-1, Int64 product_charge=-1, Int64 ms_level=-1, Int64 run_id=-1, const String &filter="") const
Load chromatograms with optional filtering.
std::vector< Int64 > transition_ids
Definition XICParquetFile.h:136
XICParquetFile(const String &filename)
Construct from a single .xic file.
std::vector< String > filenames_
Definition XICParquetFile.h:273
String transition_type
Definition XICParquetFile.h:88
void getChromatograms(std::vector< XICChromatogram > &output, const ParquetFilter &filter) const
Return chromatograms using a typed filter expression.
std::vector< String > transition_types
Definition XICParquetFile.h:141
void getColumns(std::vector< String > &output) const
Return the parquet schema column names.
String filename_
Definition XICParquetFile.h:272
void getChromatograms_(std::vector< XICChromatogram > &output, const FilterExpression &extra_filter, Int64 precursor_id, Int64 transition_id, const String &modified_sequence, Int64 precursor_charge, Int64 product_charge, Int64 ms_level, Int64 run_id, const String &filter) const
std::vector< Int64 > transition_ordinals
Definition XICParquetFile.h:138
std::vector< Int64 > detecting_transitions
Definition XICParquetFile.h:139
const String & getFilename() const
Return the primary filename.
XICParquetFile & operator=(const XICParquetFile &rhs)=default
String source_file
Definition XICParquetFile.h:68
std::vector< double > rt
Definition XICParquetFile.h:91
XICParquetFile(const XICParquetFile &rhs)=default
void getChromatograms(std::vector< XICChromatogram > &output, const ParquetFilterBuilder &filter) const
Return chromatograms using a typed filter builder.
const std::vector< String > & getFilenames() const
Return all filenames associated with this instance.
String annotation
Definition XICParquetFile.h:89
void load(std::vector< XICChromatogram > &output) const
Load all chromatograms from the file(s).
std::vector< double > intensity
Definition XICParquetFile.h:92
XICParquetFile(const std::vector< String > &filenames)
Construct from multiple .xic files.
std::vector< String > annotations
Definition XICParquetFile.h:142
std::vector< Int64 > product_charges
Definition XICParquetFile.h:137
std::vector< Int64 > product_decoys
Definition XICParquetFile.h:140
void getRuns(std::vector< XICRunInfo > &output) const
Return unique run metadata (run_id, source_file).
Analyte metadata container.
Definition XICParquetFile.h:114
Lightweight chromatogram container for XIC parquet rows.
Definition XICParquetFile.h:66
Unique run information (run_id, source_file).
Definition XICParquetFile.h:99
int64_t Int64
Signed integer type (64bit)
Definition Types.h:40
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
XICParquetFile::XICAnalyte XICAnalyte
Convenience alias for the nested analyte type.
Definition XICParquetFile.h:281
XICParquetFile::XICChromatogram XICChromatogram
Convenience alias for the nested XIC chromatogram type.
Definition XICParquetFile.h:277
XICParquetFile::XICRunInfo XICRunInfo
Convenience alias for the nested run info type.
Definition XICParquetFile.h:279
Simple conjunction/disjunction of conditions.
Definition ParquetFilter.h:42