OpenMS
Loading...
Searching...
No Matches
MultipleTesting.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Justin Sing $
6// $Authors: Justin Sing $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <algorithm>
12#include <cmath>
13#include <limits>
14#include <numeric>
15#include <stdexcept>
16#include <string>
17#include <type_traits>
18#include <vector>
19
20#include <OpenMS/config.h>
23
24namespace OpenMS
25{
26namespace Math
27{
52struct OPENMS_DLLAPI Pi0Result
53{
54 double pi0 = 1.0;
55 std::vector<double> pi0_lambda;
56 std::vector<double> lambda_;
57 bool pi0_smooth = false;
58};
59
75struct OPENMS_DLLAPI MultipleTesting
76{
78 enum class Pi0Method
79 {
80 Smoother,
81 Bootstrap
82 };
83
85 enum class LfdrTransform
86 {
87 Probit,
88 Logit
89 };
90
92 static std::string pi0MethodToString(Pi0Method m);
93
95 static Pi0Method toPi0Method(const std::string& s);
96
98 static std::string lfdrTransformToString(LfdrTransform t);
99
101 static LfdrTransform toLfdrTransform(const std::string& s);
102
114 static std::vector<double> qValue(const std::vector<double>& p_values, double pi0, bool pfdr = false);
115
126 static Pi0Result pi0Est(const std::vector<double>& p_values,
127 const std::vector<double>& lambda_ = std::vector<double>(),
128 Pi0Method method = Pi0Method::Smoother,
129 int smooth_df = 3,
130 bool smooth_log_pi0 = false);
131
146 static std::vector<double> lfdr(const std::vector<double>& p_values,
147 double pi0,
148 bool trunc = true,
149 bool monotone = true,
150 LfdrTransform transf = LfdrTransform::Probit,
151 double adj = 1.5,
152 double eps = 1e-8,
153 std::size_t gridsize = 512,
154 double cut = 3.0);
155
163 static std::vector<double> pNorm(const std::vector<double>& stat, const std::vector<double>& stat0);
164
171 template <class T>
172 static std::vector<double> computeModelFDR(const std::vector<T>& data_in);
173
181 template <class T>
182 static std::vector<double> pEmp(const std::vector<T>& stat, const std::vector<T>& stat0);
183};
184
185// Template implementation for MultipleTesting::computeModelFDR
186template <class T>
187inline std::vector<double> MultipleTesting::computeModelFDR(const std::vector<T>& data_in)
188{
189 using D = double;
190 const std::size_t n = data_in.size();
191 std::vector<double> fdr(n, std::numeric_limits<double>::quiet_NaN());
192
193 if (n == 0) return fdr;
194
195 auto is_nan_at = [&](std::size_t i) -> bool {
196 if constexpr (std::is_floating_point<T>::value) return std::isnan(data_in[i]);
197 else return false;
198 };
199
200 bool any_nan = false;
201 if constexpr (std::is_floating_point<T>::value)
202 {
203 for (std::size_t i = 0; i < n; ++i) { if (is_nan_at(i)) { any_nan = true; break; } }
204 }
205 if (any_nan)
206 {
207 return fdr; // propagate
208 }
209
210 // argsort (stable)
211 std::vector<std::size_t> order(n);
212 for (std::size_t i = 0; i < n; ++i) order[i] = i;
213 std::stable_sort(order.begin(), order.end(), [&](std::size_t i, std::size_t j)
214 {
215 return static_cast<D>(data_in[i]) < static_cast<D>(data_in[j]);
216 });
217
218 // sorted data
219 std::vector<D> data_sorted(n);
220 for (std::size_t i = 0; i < n; ++i) data_sorted[i] = static_cast<D>(data_in[order[i]]);
221
222 // ranks for sorted data using 'max' tie method
223 std::vector<double> ranks = RankData::rankdata<double>(data_sorted, RankData::Method::Max, RankData::NaNPolicy::Propagate);
224
225 // cumulative sum of sorted data
226 std::vector<D> cumsum(n);
227 D acc = 0.0;
228 for (std::size_t i = 0; i < n; ++i)
229 {
230 acc += data_sorted[i];
231 cumsum[i] = acc;
232 }
233
234 // populate fdr in original order
235 for (std::size_t i = 0; i < n; ++i)
236 {
237 const double r_d = ranks[i];
238 if (std::isnan(r_d))
239 {
240 fdr[order[i]] = std::numeric_limits<double>::quiet_NaN();
241 continue;
242 }
243 const std::size_t r_idx = static_cast<std::size_t>(static_cast<Int64>(r_d) - 1);
244 const double denom = r_d;
245 const double numer = cumsum[std::min(r_idx, cumsum.size() - 1)];
246 fdr[order[i]] = numer / denom;
247 }
248
249 return fdr;
250}
251
252// Template implementation for MultipleTesting::pEmp
253template <class T>
254inline std::vector<double> MultipleTesting::pEmp(const std::vector<T>& stat, const std::vector<T>& stat0)
255{
256 using D = double;
257 const std::size_t m = stat.size();
258 const std::size_t m0 = stat0.size();
259 if (m == 0 || m0 == 0) throw std::invalid_argument("pEmp: input arrays must be non-empty");
260
261 // concatenate
262 std::vector<D> statc;
263 statc.reserve(m + m0);
264 for (auto v : stat) statc.push_back(static_cast<D>(v));
265 for (auto v : stat0) statc.push_back(static_cast<D>(v));
266
267 // v flags: True for stat, False for stat0
268 std::vector<char> v;
269 v.reserve(m + m0);
270 v.insert(v.end(), m, 1);
271 v.insert(v.end(), m0, 0);
272
273 // argsort descending (stable)
274 const std::size_t N = statc.size();
275 std::vector<std::size_t> perm(N);
276 for (std::size_t i = 0; i < N; ++i) perm[i] = i;
277 std::stable_sort(perm.begin(), perm.end(), [&](std::size_t i, std::size_t j)
278 { return statc[i] > statc[j]; });
279
280 // apply permutation to v
281 std::vector<char> vperm(N);
282 for (std::size_t i = 0; i < N; ++i) vperm[i] = v[perm[i]];
283
284 // u: positions of True entries
285 std::vector<std::size_t> u;
286 for (std::size_t i = 0; i < N; ++i) if (vperm[i]) u.push_back(i);
287 if (u.size() != m) throw std::runtime_error("pemp: internal error, unexpected u size");
288
289 std::vector<double> p(m);
290 for (std::size_t i = 0; i < m; ++i)
291 {
292 p[i] = static_cast<double>(static_cast<Int64>(u[i]) - static_cast<Int64>(i)) / static_cast<double>(m0);
293 }
294
295 // ranks: floor(rankdata(-stat)) - 1
296 std::vector<D> neg_stat(m);
297 for (std::size_t i = 0; i < m; ++i) neg_stat[i] = -static_cast<D>(stat[i]);
298 std::vector<double> ranks = RankData::rankdata<double>(neg_stat, RankData::Method::Average, RankData::NaNPolicy::Propagate);
299
300 std::vector<double> out(m);
301 for (std::size_t i = 0; i < m; ++i)
302 {
303 double rf = std::floor(ranks[i]);
304 std::size_t idx = static_cast<std::size_t>(static_cast<Int64>(rf) - 1);
305 if (idx >= p.size()) idx = p.size() - 1;
306 out[i] = p[idx];
307 }
308
309 // enforce minimum 1/m0
310 const double minp = 1.0 / static_cast<double>(m0);
311 for (auto& vv : out) if (vv <= minp) vv = minp;
312
313 return out;
314}
315
316// -------------------------------------------------------------------------
317// Backward-compatible free function wrappers
318// -------------------------------------------------------------------------
319
321inline std::vector<double> qValue(const std::vector<double>& p_values, double pi0, bool pfdr = false)
322{
323 return MultipleTesting::qValue(p_values, pi0, pfdr);
324}
325
327OPENMS_DLLAPI Pi0Result pi0Est(const std::vector<double>& p_values,
328 const std::vector<double>& lambda_ = std::vector<double>(),
329 const std::string& pi0_method = "smoother",
330 int smooth_df = 3,
331 bool smooth_log_pi0 = false);
332
334OPENMS_DLLAPI std::vector<double> lfdr(const std::vector<double>& p_values,
335 double pi0,
336 bool trunc = true,
337 bool monotone = true,
338 const std::string& transf = "probit",
339 double adj = 1.5,
340 double eps = 1e-8,
341 std::size_t gridsize = 512,
342 double cut = 3.0);
343
345inline std::vector<double> pNorm(const std::vector<double>& stat, const std::vector<double>& stat0)
346{
347 return MultipleTesting::pNorm(stat, stat0);
348}
349
351template <class T>
352inline std::vector<double> computeModelFDR(const std::vector<T>& data_in)
353{
354 return MultipleTesting::computeModelFDR(data_in);
355}
356
358template <class T>
359inline std::vector<double> pEmp(const std::vector<T>& stat, const std::vector<T>& stat0)
360{
361 return MultipleTesting::pEmp(stat, stat0);
362}
363
364} // namespace Math
365} // namespace OpenMS
int64_t Int64
Signed integer type (64bit)
Definition Types.h:40
std::vector< double > lambda_
Definition MultipleTesting.h:56
std::vector< double > pNorm(const std::vector< double > &stat, const std::vector< double > &stat0)
Backward-compatible wrapper for MultipleTesting::pNorm.
Definition MultipleTesting.h:345
std::vector< double > qValue(const std::vector< double > &p_values, double pi0, bool pfdr=false)
Backward-compatible wrapper for MultipleTesting::qValue.
Definition MultipleTesting.h:321
Pi0Result pi0Est(const std::vector< double > &p_values, const std::vector< double > &lambda_=std::vector< double >(), const std::string &pi0_method="smoother", int smooth_df=3, bool smooth_log_pi0=false)
Backward-compatible wrapper for MultipleTesting::pi0Est (string-based API)
std::vector< double > pEmp(const std::vector< T > &stat, const std::vector< T > &stat0)
Backward-compatible wrapper for MultipleTesting::pEmp.
Definition MultipleTesting.h:359
std::vector< double > lfdr(const std::vector< double > &p_values, double pi0, bool trunc=true, bool monotone=true, const std::string &transf="probit", double adj=1.5, double eps=1e-8, std::size_t gridsize=512, double cut=3.0)
Backward-compatible wrapper for MultipleTesting::lfdr (string-based API)
std::vector< double > pi0_lambda
Definition MultipleTesting.h:55
std::vector< double > computeModelFDR(const std::vector< T > &data_in)
Backward-compatible wrapper for MultipleTesting::computeModelFDR.
Definition MultipleTesting.h:352
Result of pi0 estimation for multiple testing correction.
Definition MultipleTesting.h:53
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Statistical functions for multiple testing correction.
Definition MultipleTesting.h:76
static LfdrTransform toLfdrTransform(const std::string &s)
Convert string to LfdrTransform enum (throws if invalid)
static Pi0Result pi0Est(const std::vector< double > &p_values, const std::vector< double > &lambda_=std::vector< double >(), Pi0Method method=Pi0Method::Smoother, int smooth_df=3, bool smooth_log_pi0=false)
Estimate the proportion of true null hypotheses (pi0).
Pi0Method
Method for estimating proportion of true null hypotheses (pi0)
Definition MultipleTesting.h:79
static std::vector< double > pNorm(const std::vector< double > &stat, const std::vector< double > &stat0)
Compute tail probabilities under a fitted normal distribution.
static std::string pi0MethodToString(Pi0Method m)
Convert Pi0Method enum to string representation.
static std::vector< double > pEmp(const std::vector< T > &stat, const std::vector< T > &stat0)
Compute empirical p-values from test statistics and null distribution.
Definition MultipleTesting.h:254
static std::string lfdrTransformToString(LfdrTransform t)
Convert LfdrTransform enum to string representation.
static std::vector< double > lfdr(const std::vector< double > &p_values, double pi0, bool trunc=true, bool monotone=true, LfdrTransform transf=LfdrTransform::Probit, double adj=1.5, double eps=1e-8, std::size_t gridsize=512, double cut=3.0)
Estimate local false discovery rate (local FDR) from p-values.
LfdrTransform
Transformation for local FDR estimation.
Definition MultipleTesting.h:86
static Pi0Method toPi0Method(const std::string &s)
Convert string to Pi0Method enum (throws if invalid)
static std::vector< double > qValue(const std::vector< double > &p_values, double pi0, bool pfdr=false)
Calculate q-values (FDR-adjusted p-values) for multiple testing correction.
static std::vector< double > computeModelFDR(const std::vector< T > &data_in)
Compute model-based FDR estimates from posterior error probabilities.
Definition MultipleTesting.h:187