OpenMS
Loading...
Searching...
No Matches
OpenSwathOSWParquetReader.h
Go to the documentation of this file.
1
2// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
3// SPDX-License-Identifier: BSD-3-Clause
4//
5// --------------------------------------------------------------------------
6// $Maintainer: Justin Sing $
7// $Authors: Justin Sing $
8// --------------------------------------------------------------------------
9
10#pragma once
11
14#include <map>
15#include <vector>
16
17namespace OpenMS
18{
28 class OPENMS_DLLAPI OpenSwathOSWParquetReader
29 {
30 public:
32 struct Row
33 {
34 int64_t feature_id = 0;
35 int64_t run_id = 0;
36 int64_t precursor_id = 0;
37 double exp_rt = 0.0;
38 double ms2_area_intensity = 0.0;
39 double ms2_total_area_intensity = 0.0;
40 double ms2_apex_intensity = 0.0;
41 int precursor_charge = 0;
42 bool decoy = false;
43 int64_t transition_count = 0;
45 };
46
50 {
51 // Core per-feature columns (all length N)
52 std::vector<int64_t> feature_id;
53 std::vector<int64_t> run_id;
54 std::vector<int64_t> precursor_id;
55 std::vector<double> exp_rt;
56 std::vector<int> precursor_charge;
57 std::vector<bool> decoy;
58 std::vector<int64_t> transition_count;
59 std::vector<String> group_id;
60
61 // Discovered MS2 score columns and their column vectors (each vector length N)
62 // ms2_columns[i] corresponds to ms2_values[i]
63 std::vector<String> ms2_columns;
64 std::vector<std::vector<double>> ms2_values;
65
66 // Discovered MS1 score columns and their column vectors (only present if requested)
67 std::vector<String> ms1_columns;
68 std::vector<std::vector<double>> ms1_values;
69 };
70
73 {
74 // Core per-transition columns (all length N)
75 std::vector<int64_t> feature_id;
76 std::vector<int64_t> run_id;
77 std::vector<int64_t> precursor_id;
78 std::vector<double> exp_rt;
79 std::vector<int> precursor_charge;
80
81 std::vector<int64_t> transition_id;
82 std::vector<int> product_charge;
83 std::vector<bool> decoy; // transition-level decoy flag
84
85 // basic transition-level peak metrics
86 std::vector<double> area_intensity;
87 std::vector<double> total_area_intensity;
88 std::vector<double> apex_intensity;
89 std::vector<double> apex_rt;
90 std::vector<double> rt_fwhm;
91 std::vector<double> masserror_ppm;
92 std::vector<double> total_mi;
93
94 // Discovered transition-level var_ columns and their column vectors
95 std::vector<String> transition_var_columns;
96 std::vector<std::vector<double>> transition_var_values;
97
98 // Group id (run_feature_precursor_transition style)
99 std::vector<String> group_id;
100 };
101
108 {
109 std::vector<int64_t> id_run;
110 std::vector<int64_t> id_peptide;
111 std::vector<int64_t> transition_group_id;
112 std::vector<bool> decoy;
113 std::vector<int64_t> run_id;
114 std::vector<String> filename;
115 std::vector<double> RT;
116 std::vector<double> assay_rt; // FEATURE.EXP_RT - FEATURE.DELTA_RT
117 std::vector<double> delta_rt; // FEATURE.DELTA_RT
118 std::vector<double> assay_RT; // PRECURSOR.LIBRARY_RT
119 std::vector<double> delta_RT; // FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT
120 std::vector<int64_t> id; // FEATURE.ID
121 std::vector<int> Charge; // PRECURSOR.CHARGE
122 std::vector<double> mz; // PRECURSOR.PRECURSOR_MZ
123 std::vector<double> Intensity; // FEATURE_MS2.AREA_INTENSITY
124
125 // aggregated MS1 metrics
126 std::vector<double> aggr_prec_Peak_Area;
127 std::vector<double> aggr_prec_Peak_Apex;
128
129 std::vector<double> leftWidth;
130 std::vector<double> rightWidth;
131
132 // ion-mobility columns (may be NULL)
133 std::vector<double> EXP_IM;
134 std::vector<double> IM_leftWidth;
135 std::vector<double> IM_rightWidth;
136
137 // Discovered score columns (var_ms1_ and var_ms2_) and their values
138 std::vector<String> ms2_columns;
139 std::vector<std::vector<double>> ms2_values;
140 std::vector<String> ms1_columns;
141 std::vector<std::vector<double>> ms1_values;
142 };
143
146
159
165 void load(const String& oswpq_dir);
166
168 const String& oswpqPath() const { return oswpq_dir_; }
169
171 const std::vector<Row>& rows() const { return rows_; }
172
191 PeakGroupFeatureScoresResult fetchPeakGroupFeatures(const String& oswpq_dir, const String& level = "ms2", const String& main_score = "") const;
192
221
252 UnscoredResult fetchUnscoredData(const String& oswpq_dir) const;
253
254 private:
255 std::vector<Row> rows_;
256 // store last-loaded path so Python-side code can call fetch methods without re-supplying the path
258 };
259
260} // namespace OpenMS
Reader for OpenSwath OSW Parquet output.
Definition OpenSwathOSWParquetReader.h:29
std::vector< double > area_intensity
Definition OpenSwathOSWParquetReader.h:86
std::vector< int64_t > transition_id
Definition OpenSwathOSWParquetReader.h:81
std::vector< String > ms2_columns
Definition OpenSwathOSWParquetReader.h:63
std::vector< double > IM_leftWidth
Definition OpenSwathOSWParquetReader.h:134
std::vector< double > apex_rt
Definition OpenSwathOSWParquetReader.h:89
const std::vector< Row > & rows() const
Return extracted rows.
Definition OpenSwathOSWParquetReader.h:171
std::vector< String > transition_var_columns
Definition OpenSwathOSWParquetReader.h:95
OpenSwathOSWParquetReader(const String &oswpq_dir)
Convenience constructor that loads from the given oswpq path.
std::vector< double > delta_rt
Definition OpenSwathOSWParquetReader.h:117
std::vector< String > ms1_columns
Definition OpenSwathOSWParquetReader.h:67
std::vector< double > Intensity
Definition OpenSwathOSWParquetReader.h:123
std::vector< double > assay_RT
Definition OpenSwathOSWParquetReader.h:118
std::vector< String > filename
Definition OpenSwathOSWParquetReader.h:114
std::vector< std::vector< double > > ms2_values
Definition OpenSwathOSWParquetReader.h:64
String group_id
Definition OpenSwathOSWParquetReader.h:44
std::vector< int64_t > id_peptide
Definition OpenSwathOSWParquetReader.h:110
PeakGroupFeatureScoresResult fetchPeakGroupFeatures(const String &oswpq_dir, const String &level="ms2", const String &main_score="") const
Extract MS2-level feature rows across all runs.
std::vector< int64_t > transition_count
Definition OpenSwathOSWParquetReader.h:58
std::vector< double > RT
Definition OpenSwathOSWParquetReader.h:115
std::vector< Row > rows_
Definition OpenSwathOSWParquetReader.h:255
std::vector< double > total_area_intensity
Definition OpenSwathOSWParquetReader.h:87
std::vector< double > masserror_ppm
Definition OpenSwathOSWParquetReader.h:91
std::vector< bool > decoy
Definition OpenSwathOSWParquetReader.h:57
std::vector< int64_t > run_id
Definition OpenSwathOSWParquetReader.h:53
std::vector< int64_t > transition_group_id
Definition OpenSwathOSWParquetReader.h:111
std::vector< double > delta_RT
Definition OpenSwathOSWParquetReader.h:119
std::vector< int > precursor_charge
Definition OpenSwathOSWParquetReader.h:56
std::vector< double > mz
Definition OpenSwathOSWParquetReader.h:122
std::vector< double > leftWidth
Definition OpenSwathOSWParquetReader.h:129
std::vector< int64_t > id_run
Definition OpenSwathOSWParquetReader.h:109
std::vector< int64_t > precursor_id
Definition OpenSwathOSWParquetReader.h:54
std::vector< int64_t > feature_id
Definition OpenSwathOSWParquetReader.h:52
std::vector< std::vector< double > > ms1_values
Definition OpenSwathOSWParquetReader.h:68
UnscoredResult fetchUnscoredData(const String &oswpq_dir) const
Read an "unscored" table and return a column-oriented result.
const String & oswpqPath() const
Return the originally provided oswpq path (may be empty)
Definition OpenSwathOSWParquetReader.h:168
void load(const String &oswpq_dir)
Load and extract rows from an OSW Parquet directory or .oswpq archive.
std::vector< String > group_id
Definition OpenSwathOSWParquetReader.h:59
TransitionFeaturesResult fetchTransitionFeatures(const String &oswpq_dir) const
Extract transition-level feature rows across all runs (SOA)
std::vector< double > aggr_prec_Peak_Area
Definition OpenSwathOSWParquetReader.h:126
std::vector< int64_t > id
Definition OpenSwathOSWParquetReader.h:120
String oswpq_dir_
Definition OpenSwathOSWParquetReader.h:257
std::vector< double > IM_rightWidth
Definition OpenSwathOSWParquetReader.h:135
std::vector< double > total_mi
Definition OpenSwathOSWParquetReader.h:92
OpenSwathOSWParquetReader()=default
Default constructor.
std::vector< double > aggr_prec_Peak_Apex
Definition OpenSwathOSWParquetReader.h:127
std::vector< double > EXP_IM
Definition OpenSwathOSWParquetReader.h:133
std::vector< double > rightWidth
Definition OpenSwathOSWParquetReader.h:130
std::vector< double > exp_rt
Definition OpenSwathOSWParquetReader.h:55
std::vector< int > Charge
Definition OpenSwathOSWParquetReader.h:121
std::vector< double > rt_fwhm
Definition OpenSwathOSWParquetReader.h:90
std::vector< double > assay_rt
Definition OpenSwathOSWParquetReader.h:116
std::vector< double > apex_intensity
Definition OpenSwathOSWParquetReader.h:88
std::vector< int > product_charge
Definition OpenSwathOSWParquetReader.h:82
std::vector< std::vector< double > > transition_var_values
Definition OpenSwathOSWParquetReader.h:96
Single extracted row combining feature + precursor + run metadata.
Definition OpenSwathOSWParquetReader.h:33
Result container for transition-level features.
Definition OpenSwathOSWParquetReader.h:73
Result container for an unscored table.
Definition OpenSwathOSWParquetReader.h:108
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19