OpenMS
Loading...
Searching...
No Matches
RankData.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Justin Sing $
6// $Authors: Justin Sing $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <vector>
12#include <algorithm>
13#include <cmath>
14#include <limits>
15#include <stdexcept>
16#include <type_traits>
17
18namespace OpenMS {
19namespace Math {
20
41 struct RankData
42 {
43 enum class Method
44 {
45 Average,
46 Min,
47 Max,
48 Dense,
50 };
51
52 enum class NaNPolicy
53 {
55 Omit,
56 Raise
57 };
58
59 template <class T>
60 static std::vector<double> rankdata(const std::vector<T>& a,
61 Method method = Method::Average,
63 {
64 using D = double;
65 const std::size_t n = a.size();
66 std::vector<double> ranks(n, std::numeric_limits<double>::quiet_NaN());
67
68 if (n == 0) return ranks; // empty -> empty
69
70 // Detect NaNs (only meaningful for floating types)
71 auto is_nan_at = [&](std::size_t i) -> bool {
72 if constexpr (std::is_floating_point<T>::value) {
73 return std::isnan(a[i]);
74 } else {
75 (void)i; // suppress unused parameter warning
76 return false;
77 }
78 };
79
80 bool any_nan = false;
81 if constexpr (std::is_floating_point<T>::value)
82 {
83 for (std::size_t i = 0; i < n; ++i) { if (is_nan_at(i)) { any_nan = true; break; } }
84 }
85
86 // Handle nan_policy
87 if (any_nan)
88 {
89 switch (nan_policy)
90 {
92 // All-NaN output (already initialized as NaN)
93 return ranks;
95 throw std::invalid_argument("RankData::rankdata: NaNs present but nan_policy=Raise.");
96 case NaNPolicy::Omit:
97 // Continue; we'll rank only non-NaN elements and keep NaN at NaN positions.
98 break;
99 }
100 }
101
102 // Collect indices to consider (omit NaNs if requested)
103 std::vector<std::size_t> idx;
104 idx.reserve(n);
105 for (std::size_t i = 0; i < n; ++i)
106 {
107 if (nan_policy == NaNPolicy::Omit && is_nan_at(i)) continue;
108 idx.push_back(i);
109 }
110
111 if (idx.empty())
112 {
113 // All NaN with 'omit' -> return NaN vector (already NaN)
114 return ranks;
115 }
116
117 // Stable sort indices by value ascending to mimic SciPy's stable behavior
118 // (important for 'ordinal' and tie handling)
119 std::stable_sort(idx.begin(), idx.end(),
120 [&](std::size_t i, std::size_t j)
121 {
122 // Comparison uses promoted double to unify behavior across T
123 const D vi = static_cast<D>(a[i]);
124 const D vj = static_cast<D>(a[j]);
125 return vi < vj;
126 });
127
128 // Helper to assign a constant rank to a tied block [lo, hi) in the sorted index array
129 auto assign_block_rank = [&](std::size_t lo, std::size_t hi, double value)
130 {
131 for (std::size_t k = lo; k < hi; ++k)
132 {
133 ranks[idx[k]] = value;
134 }
135 };
136
137 // Main logic per tie method
138 switch (method)
139 {
140 case Method::Ordinal:
141 {
142 // Distinct ranks, breaking ties by original order due to stable sort.
143 // Rank is position in sorted order + 1
144 for (std::size_t pos = 0; pos < idx.size(); ++pos)
145 {
146 ranks[idx[pos]] = static_cast<double>(pos + 1);
147 }
148 break;
149 }
150
151 case Method::Dense:
152 {
153 // For each block of equal values, assign a compact rank 1..m
154 std::size_t lo = 0;
155 std::size_t dense_rank = 1;
156 while (lo < idx.size())
157 {
158 std::size_t hi = lo + 1;
159 const D v = static_cast<D>(a[idx[lo]]);
160 while (hi < idx.size() && static_cast<D>(a[idx[hi]]) == v) ++hi;
161
162 assign_block_rank(lo, hi, static_cast<double>(dense_rank));
163 ++dense_rank;
164 lo = hi;
165 }
166 break;
167 }
168
169 case Method::Min:
170 case Method::Max:
171 case Method::Average:
172 {
173 // For each block of equal values, compute min/max/average of standard ranks.
174 // "Standard" rank of sorted position p is (p + 1) in 1-based indexing.
175 std::size_t lo = 0;
176 while (lo < idx.size())
177 {
178 std::size_t hi = lo + 1;
179 const D v = static_cast<D>(a[idx[lo]]);
180 while (hi < idx.size() && static_cast<D>(a[idx[hi]]) == v) ++hi;
181
182 // 1-based ranks for this block span [lo+1, hi]
183 const double r_min = static_cast<double>(lo + 1);
184 const double r_max = static_cast<double>(hi);
185 double r_value = 0.0;
186 if (method == Method::Min) r_value = r_min;
187 else if (method == Method::Max) r_value = r_max;
188 else r_value = 0.5 * (r_min + r_max); // average
189
190 assign_block_rank(lo, hi, r_value);
191 lo = hi;
192 }
193 break;
194 }
195 }
196
197 return ranks;
198 }
199
200 // Concrete forwarders for Cython (avoid C++ templates on the Python side)
201 static std::vector<double> rankdata_double(const std::vector<double>& a,
204 { return rankdata<double>(a, m, p); }
205
206 static std::vector<double> rankdata_float(const std::vector<float>& a,
209 { return rankdata<float>(a, m, p); }
210
211 static std::vector<double> rankdata_int(const std::vector<int>& a,
214 { return rankdata<int>(a, m, p); }
215 };
216
218 inline std::vector<double>
219 rankdata_double(std::vector<double> a,
222 {
223 return RankData::rankdata<double>(a, m, p);
224 }
225
227 inline std::vector<double>
228 rankdata_float(std::vector<float> a,
231 {
232 return RankData::rankdata<float>(a, m, p);
233 }
234
236 inline std::vector<double>
237 rankdata_int(std::vector<int> a,
240 {
241 return RankData::rankdata<int>(a, m, p);
242 }
243
244} // namespace Math
245} // namespace OpenMS
std::vector< double > rankdata_double(std::vector< double > a, RankData::Method m=RankData::Method::Average, RankData::NaNPolicy p=RankData::NaNPolicy::Propagate)
Free function overloads (by-value) for double input.
Definition RankData.h:219
std::vector< double > rankdata_float(std::vector< float > a, RankData::Method m=RankData::Method::Average, RankData::NaNPolicy p=RankData::NaNPolicy::Propagate)
Free function overloads (by-value) for float input.
Definition RankData.h:228
std::vector< double > rankdata_int(std::vector< int > a, RankData::Method m=RankData::Method::Average, RankData::NaNPolicy p=RankData::NaNPolicy::Propagate)
Free function overloads (by-value) for int input.
Definition RankData.h:237
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Rank items (1-based) with SciPy-like tie handling.
Definition RankData.h:42
static std::vector< double > rankdata(const std::vector< T > &a, Method method=Method::Average, NaNPolicy nan_policy=NaNPolicy::Propagate)
Definition RankData.h:60
Method
Definition RankData.h:44
NaNPolicy
Definition RankData.h:53
static std::vector< double > rankdata_float(const std::vector< float > &a, Method m=Method::Average, NaNPolicy p=NaNPolicy::Propagate)
Definition RankData.h:206
static std::vector< double > rankdata_int(const std::vector< int > &a, Method m=Method::Average, NaNPolicy p=NaNPolicy::Propagate)
Definition RankData.h:211
static std::vector< double > rankdata_double(const std::vector< double > &a, Method m=Method::Average, NaNPolicy p=NaNPolicy::Propagate)
Definition RankData.h:201