OpenMS
Loading...
Searching...
No Matches
MultipleTesting.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Justin Sing $
6// $Authors: Justin Sing $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <algorithm>
12#include <cmath>
13#include <limits>
14#include <numeric>
15#include <stdexcept>
16#include <string>
17#include <type_traits>
18#include <vector>
19
20#include <OpenMS/config.h>
23
24namespace OpenMS
25{
26namespace Math
27{
52struct OPENMS_DLLAPI Pi0Result
53{
54 double pi0 = 1.0;
55 std::vector<double> pi0_lambda;
56 std::vector<double> lambda_;
57 bool pi0_smooth = false;
58};
59
75struct OPENMS_DLLAPI MultipleTesting
76{
78 enum class Pi0Method
79 {
80 Smoother,
81 Bootstrap
82 };
83
85 enum class LfdrTransform
86 {
87 Probit,
88 Logit
89 };
90
92 static std::string pi0MethodToString(Pi0Method m);
93
95 static Pi0Method toPi0Method(const std::string& s);
96
98 static std::string lfdrTransformToString(LfdrTransform t);
99
101 static LfdrTransform toLfdrTransform(const std::string& s);
102
114 static std::vector<double> qValue(const std::vector<double>& p_values, double pi0, bool pfdr = false);
115
126 static Pi0Result pi0Est(const std::vector<double>& p_values,
127 const std::vector<double>& lambda_ = std::vector<double>(),
128 Pi0Method method = Pi0Method::Smoother,
129 int smooth_df = 3,
130 bool smooth_log_pi0 = false);
131
146 static std::vector<double> lfdr(const std::vector<double>& p_values,
147 double pi0,
148 bool trunc = true,
149 bool monotone = true,
150 LfdrTransform transf = LfdrTransform::Probit,
151 double adj = 1.5,
152 double eps = 1e-8,
153 std::size_t gridsize = 512,
154 double cut = 3.0);
155
163 static std::vector<double> pNorm(const std::vector<double>& stat, const std::vector<double>& stat0);
164
179 template <class T>
180 static std::vector<double> computeModelFDR(const std::vector<T>& data_in);
181
189 template <class T>
190 static std::vector<double> pEmp(const std::vector<T>& stat, const std::vector<T>& stat0);
191};
192
193// Template implementation for MultipleTesting::computeModelFDR
194template <class T>
195inline std::vector<double> MultipleTesting::computeModelFDR(const std::vector<T>& data_in)
196{
197 using D = double;
198 const std::size_t n = data_in.size();
199 std::vector<double> fdr(n, std::numeric_limits<double>::quiet_NaN());
200
201 if (n == 0) return fdr;
202
203 auto is_nan_at = [&](std::size_t i) -> bool {
204 if constexpr (std::is_floating_point<T>::value) return std::isnan(data_in[i]);
205 else return false;
206 };
207
208 bool any_nan = false;
209 if constexpr (std::is_floating_point<T>::value)
210 {
211 for (std::size_t i = 0; i < n; ++i) { if (is_nan_at(i)) { any_nan = true; break; } }
212 }
213 if (any_nan)
214 {
215 return fdr; // propagate
216 }
217
218 // argsort (stable)
219 std::vector<std::size_t> order(n);
220 for (std::size_t i = 0; i < n; ++i) order[i] = i;
221 std::stable_sort(order.begin(), order.end(), [&](std::size_t i, std::size_t j)
222 {
223 return static_cast<D>(data_in[i]) < static_cast<D>(data_in[j]);
224 });
225
226 // sorted data
227 std::vector<D> data_sorted(n);
228 for (std::size_t i = 0; i < n; ++i) data_sorted[i] = static_cast<D>(data_in[order[i]]);
229
230 // ranks for sorted data using 'max' tie method
231 std::vector<double> ranks = RankData::rankdata<double>(data_sorted, RankData::Method::Max, RankData::NaNPolicy::Propagate);
232
233 // cumulative sum of sorted data
234 std::vector<D> cumsum(n);
235 D acc = 0.0;
236 for (std::size_t i = 0; i < n; ++i)
237 {
238 acc += data_sorted[i];
239 cumsum[i] = acc;
240 }
241
242 // populate fdr in original order
243 for (std::size_t i = 0; i < n; ++i)
244 {
245 const double r_d = ranks[i];
246 if (std::isnan(r_d))
247 {
248 fdr[order[i]] = std::numeric_limits<double>::quiet_NaN();
249 continue;
250 }
251 const std::size_t r_idx = static_cast<std::size_t>(static_cast<Int64>(r_d) - 1);
252 const double denom = r_d;
253 const double numer = cumsum[std::min(r_idx, cumsum.size() - 1)];
254 fdr[order[i]] = numer / denom;
255 }
256
257 return fdr;
258}
259
260// Template implementation for MultipleTesting::pEmp
261template <class T>
262inline std::vector<double> MultipleTesting::pEmp(const std::vector<T>& stat, const std::vector<T>& stat0)
263{
264 using D = double;
265 const std::size_t m = stat.size();
266 const std::size_t m0 = stat0.size();
267 if (m == 0 || m0 == 0) throw std::invalid_argument("pEmp: input arrays must be non-empty");
268
269 // concatenate
270 std::vector<D> statc;
271 statc.reserve(m + m0);
272 for (auto v : stat) statc.push_back(static_cast<D>(v));
273 for (auto v : stat0) statc.push_back(static_cast<D>(v));
274
275 // v flags: True for stat, False for stat0
276 std::vector<char> v;
277 v.reserve(m + m0);
278 v.insert(v.end(), m, 1);
279 v.insert(v.end(), m0, 0);
280
281 // argsort descending (stable)
282 const std::size_t N = statc.size();
283 std::vector<std::size_t> perm(N);
284 for (std::size_t i = 0; i < N; ++i) perm[i] = i;
285 std::stable_sort(perm.begin(), perm.end(), [&](std::size_t i, std::size_t j)
286 { return statc[i] > statc[j]; });
287
288 // apply permutation to v
289 std::vector<char> vperm(N);
290 for (std::size_t i = 0; i < N; ++i) vperm[i] = v[perm[i]];
291
292 // u: positions of True entries
293 std::vector<std::size_t> u;
294 for (std::size_t i = 0; i < N; ++i) if (vperm[i]) u.push_back(i);
295 if (u.size() != m) throw std::runtime_error("pemp: internal error, unexpected u size");
296
297 std::vector<double> p(m);
298 for (std::size_t i = 0; i < m; ++i)
299 {
300 p[i] = static_cast<double>(static_cast<Int64>(u[i]) - static_cast<Int64>(i)) / static_cast<double>(m0);
301 }
302
303 // ranks: floor(rankdata(-stat)) - 1
304 std::vector<D> neg_stat(m);
305 for (std::size_t i = 0; i < m; ++i) neg_stat[i] = -static_cast<D>(stat[i]);
306 std::vector<double> ranks = RankData::rankdata<double>(neg_stat, RankData::Method::Average, RankData::NaNPolicy::Propagate);
307
308 std::vector<double> out(m);
309 for (std::size_t i = 0; i < m; ++i)
310 {
311 double rf = std::floor(ranks[i]);
312 std::size_t idx = static_cast<std::size_t>(static_cast<Int64>(rf) - 1);
313 if (idx >= p.size()) idx = p.size() - 1;
314 out[i] = p[idx];
315 }
316
317 // enforce minimum 1/m0
318 const double minp = 1.0 / static_cast<double>(m0);
319 for (auto& vv : out) if (vv <= minp) vv = minp;
320
321 return out;
322}
323
324// -------------------------------------------------------------------------
325// Backward-compatible free function wrappers
326// -------------------------------------------------------------------------
327
329inline std::vector<double> qValue(const std::vector<double>& p_values, double pi0, bool pfdr = false)
330{
331 return MultipleTesting::qValue(p_values, pi0, pfdr);
332}
333
335OPENMS_DLLAPI Pi0Result pi0Est(const std::vector<double>& p_values,
336 const std::vector<double>& lambda_ = std::vector<double>(),
337 const std::string& pi0_method = "smoother",
338 int smooth_df = 3,
339 bool smooth_log_pi0 = false);
340
342OPENMS_DLLAPI std::vector<double> lfdr(const std::vector<double>& p_values,
343 double pi0,
344 bool trunc = true,
345 bool monotone = true,
346 const std::string& transf = "probit",
347 double adj = 1.5,
348 double eps = 1e-8,
349 std::size_t gridsize = 512,
350 double cut = 3.0);
351
353inline std::vector<double> pNorm(const std::vector<double>& stat, const std::vector<double>& stat0)
354{
355 return MultipleTesting::pNorm(stat, stat0);
356}
357
359template <class T>
360inline std::vector<double> computeModelFDR(const std::vector<T>& data_in)
361{
362 return MultipleTesting::computeModelFDR(data_in);
363}
364
366template <class T>
367inline std::vector<double> pEmp(const std::vector<T>& stat, const std::vector<T>& stat0)
368{
369 return MultipleTesting::pEmp(stat, stat0);
370}
371
372} // namespace Math
373} // namespace OpenMS
int64_t Int64
Signed integer type (64bit)
Definition Types.h:40
std::vector< double > lambda_
Definition MultipleTesting.h:56
std::vector< double > pNorm(const std::vector< double > &stat, const std::vector< double > &stat0)
Backward-compatible wrapper for MultipleTesting::pNorm.
Definition MultipleTesting.h:353
std::vector< double > qValue(const std::vector< double > &p_values, double pi0, bool pfdr=false)
Backward-compatible wrapper for MultipleTesting::qValue.
Definition MultipleTesting.h:329
Pi0Result pi0Est(const std::vector< double > &p_values, const std::vector< double > &lambda_=std::vector< double >(), const std::string &pi0_method="smoother", int smooth_df=3, bool smooth_log_pi0=false)
Backward-compatible wrapper for MultipleTesting::pi0Est (string-based API)
std::vector< double > pEmp(const std::vector< T > &stat, const std::vector< T > &stat0)
Backward-compatible wrapper for MultipleTesting::pEmp.
Definition MultipleTesting.h:367
std::vector< double > lfdr(const std::vector< double > &p_values, double pi0, bool trunc=true, bool monotone=true, const std::string &transf="probit", double adj=1.5, double eps=1e-8, std::size_t gridsize=512, double cut=3.0)
Backward-compatible wrapper for MultipleTesting::lfdr (string-based API)
std::vector< double > pi0_lambda
Definition MultipleTesting.h:55
std::vector< double > computeModelFDR(const std::vector< T > &data_in)
Backward-compatible wrapper for MultipleTesting::computeModelFDR.
Definition MultipleTesting.h:360
Result of pi0 estimation for multiple testing correction.
Definition MultipleTesting.h:53
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Statistical functions for multiple testing correction.
Definition MultipleTesting.h:76
static LfdrTransform toLfdrTransform(const std::string &s)
Convert string to LfdrTransform enum (throws if invalid)
static Pi0Result pi0Est(const std::vector< double > &p_values, const std::vector< double > &lambda_=std::vector< double >(), Pi0Method method=Pi0Method::Smoother, int smooth_df=3, bool smooth_log_pi0=false)
Estimate the proportion of true null hypotheses (pi0).
Pi0Method
Method for estimating proportion of true null hypotheses (pi0)
Definition MultipleTesting.h:79
static std::vector< double > pNorm(const std::vector< double > &stat, const std::vector< double > &stat0)
Compute tail probabilities under a fitted normal distribution.
static std::string pi0MethodToString(Pi0Method m)
Convert Pi0Method enum to string representation.
static std::vector< double > pEmp(const std::vector< T > &stat, const std::vector< T > &stat0)
Compute empirical p-values from test statistics and null distribution.
Definition MultipleTesting.h:262
static std::string lfdrTransformToString(LfdrTransform t)
Convert LfdrTransform enum to string representation.
static std::vector< double > lfdr(const std::vector< double > &p_values, double pi0, bool trunc=true, bool monotone=true, LfdrTransform transf=LfdrTransform::Probit, double adj=1.5, double eps=1e-8, std::size_t gridsize=512, double cut=3.0)
Estimate local false discovery rate (local FDR) from p-values.
LfdrTransform
Transformation for local FDR estimation.
Definition MultipleTesting.h:86
static Pi0Method toPi0Method(const std::string &s)
Convert string to Pi0Method enum (throws if invalid)
static std::vector< double > qValue(const std::vector< double > &p_values, double pi0, bool pfdr=false)
Calculate q-values (FDR-adjusted p-values) for multiple testing correction.
static std::vector< double > computeModelFDR(const std::vector< T > &data_in)
Compute model-based FDR estimates from posterior error probabilities.
Definition MultipleTesting.h:195