nightly/html/MultipleTesting_8h_source.html

// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin

// SPDX-License-Identifier: BSD-3-Clause

//

// --------------------------------------------------------------------------

// $Maintainer: Justin Sing $

// $Authors: Justin Sing $

// --------------------------------------------------------------------------


#pragma once


#include <algorithm>

#include <cmath>

#include <limits>

#include <numeric>

#include <stdexcept>

#include <string>

#include <type_traits>

#include <vector>


#include <OpenMS/config.h>

#include <OpenMS/CONCEPT/Types.h>

#include <OpenMS/MATH/STATISTICS/RankData.h>


namespace OpenMS

{

namespace Math

{


struct OPENMS_DLLAPI Pi0Result

{

  double pi0 = 1.0;

  std::vector<double> pi0_lambda;

  std::vector<double> lambda_;

  bool pi0_smooth = false;

};


struct OPENMS_DLLAPI MultipleTesting

{


  enum class Pi0Method

  {

    Smoother,

    Bootstrap

  };


  enum class LfdrTransform

  {

    Probit,

    Logit

  };


  static std::string pi0MethodToString(Pi0Method m);


  static Pi0Method toPi0Method(const std::string& s);


  static std::string lfdrTransformToString(LfdrTransform t);


  static LfdrTransform toLfdrTransform(const std::string& s);


  static std::vector<double> qValue(const std::vector<double>& p_values, double pi0, bool pfdr = false);


  static Pi0Result pi0Est(const std::vector<double>& p_values,

                          const std::vector<double>& lambda_ = std::vector<double>(),

                          Pi0Method method = Pi0Method::Smoother,

                          int smooth_df = 3,

                          bool smooth_log_pi0 = false);


  static std::vector<double> lfdr(const std::vector<double>& p_values,

                                  double pi0,

                                  bool trunc = true,

                                  bool monotone = true,

                                  LfdrTransform transf = LfdrTransform::Probit,

                                  double adj = 1.5,

                                  double eps = 1e-8,

                                  std::size_t gridsize = 512,

                                  double cut = 3.0);


  static std::vector<double> pNorm(const std::vector<double>& stat, const std::vector<double>& stat0);


  template <class T>

  static std::vector<double> computeModelFDR(const std::vector<T>& data_in);


  template <class T>

  static std::vector<double> pEmp(const std::vector<T>& stat, const std::vector<T>& stat0);

};


// Template implementation for MultipleTesting::computeModelFDR

template <class T>


inline std::vector<double> MultipleTesting::computeModelFDR(const std::vector<T>& data_in)

{

  using D = double;

  const std::size_t n = data_in.size();

  std::vector<double> fdr(n, std::numeric_limits<double>::quiet_NaN());


  if (n == 0) return fdr;


  auto is_nan_at = [&](std::size_t i) -> bool {

    if constexpr (std::is_floating_point<T>::value) return std::isnan(data_in[i]);

    else return false;

  };


  bool any_nan = false;

  if constexpr (std::is_floating_point<T>::value)

  {

    for (std::size_t i = 0; i < n; ++i) { if (is_nan_at(i)) { any_nan = true; break; } }

  }

  if (any_nan)

  {

    return fdr; // propagate

  }


  // argsort (stable)

  std::vector<std::size_t> order(n);

  for (std::size_t i = 0; i < n; ++i) order[i] = i;

  std::stable_sort(order.begin(), order.end(), [&](std::size_t i, std::size_t j)

                   {

                     return static_cast<D>(data_in[i]) < static_cast<D>(data_in[j]);

                   });


  // sorted data

  std::vector<D> data_sorted(n);

  for (std::size_t i = 0; i < n; ++i) data_sorted[i] = static_cast<D>(data_in[order[i]]);


  // ranks for sorted data using 'max' tie method

  std::vector<double> ranks = RankData::rankdata<double>(data_sorted, RankData::Method::Max, RankData::NaNPolicy::Propagate);


  // cumulative sum of sorted data

  std::vector<D> cumsum(n);

  D acc = 0.0;

  for (std::size_t i = 0; i < n; ++i)

  {

    acc += data_sorted[i];

    cumsum[i] = acc;

  }


  // populate fdr in original order

  for (std::size_t i = 0; i < n; ++i)

  {

    const double r_d = ranks[i];

    if (std::isnan(r_d))

    {

      fdr[order[i]] = std::numeric_limits<double>::quiet_NaN();

      continue;

    }

    const std::size_t r_idx = static_cast<std::size_t>(static_cast<Int64>(r_d) - 1);

    const double denom = r_d;

    const double numer = cumsum[std::min(r_idx, cumsum.size() - 1)];

    fdr[order[i]] = numer / denom;

  }


  return fdr;

}


// Template implementation for MultipleTesting::pEmp

template <class T>


inline std::vector<double> MultipleTesting::pEmp(const std::vector<T>& stat, const std::vector<T>& stat0)

{

  using D = double;

  const std::size_t m = stat.size();

  const std::size_t m0 = stat0.size();

  if (m == 0 || m0 == 0) throw std::invalid_argument("pEmp: input arrays must be non-empty");


  // concatenate

  std::vector<D> statc;

  statc.reserve(m + m0);

  for (auto v : stat) statc.push_back(static_cast<D>(v));

  for (auto v : stat0) statc.push_back(static_cast<D>(v));


  // v flags: True for stat, False for stat0

  std::vector<char> v;

  v.reserve(m + m0);

  v.insert(v.end(), m, 1);

  v.insert(v.end(), m0, 0);


  // argsort descending (stable)

  const std::size_t N = statc.size();

  std::vector<std::size_t> perm(N);

  for (std::size_t i = 0; i < N; ++i) perm[i] = i;

  std::stable_sort(perm.begin(), perm.end(), [&](std::size_t i, std::size_t j)

                   { return statc[i] > statc[j]; });


  // apply permutation to v

  std::vector<char> vperm(N);

  for (std::size_t i = 0; i < N; ++i) vperm[i] = v[perm[i]];


  // u: positions of True entries

  std::vector<std::size_t> u;

  for (std::size_t i = 0; i < N; ++i) if (vperm[i]) u.push_back(i);

  if (u.size() != m) throw std::runtime_error("pemp: internal error, unexpected u size");


  std::vector<double> p(m);

  for (std::size_t i = 0; i < m; ++i)

  {

    p[i] = static_cast<double>(static_cast<Int64>(u[i]) - static_cast<Int64>(i)) / static_cast<double>(m0);

  }


  // ranks: floor(rankdata(-stat)) - 1

  std::vector<D> neg_stat(m);

  for (std::size_t i = 0; i < m; ++i) neg_stat[i] = -static_cast<D>(stat[i]);

  std::vector<double> ranks = RankData::rankdata<double>(neg_stat, RankData::Method::Average, RankData::NaNPolicy::Propagate);


  std::vector<double> out(m);

  for (std::size_t i = 0; i < m; ++i)

  {

    double rf = std::floor(ranks[i]);

    std::size_t idx = static_cast<std::size_t>(static_cast<Int64>(rf) - 1);

    if (idx >= p.size()) idx = p.size() - 1;

    out[i] = p[idx];

  }


  // enforce minimum 1/m0

  const double minp = 1.0 / static_cast<double>(m0);

  for (auto& vv : out) if (vv <= minp) vv = minp;


  return out;

}


// -------------------------------------------------------------------------

// Backward-compatible free function wrappers

// -------------------------------------------------------------------------


inline std::vector<double> qValue(const std::vector<double>& p_values, double pi0, bool pfdr = false)

{

  return MultipleTesting::qValue(p_values, pi0, pfdr);

}


OPENMS_DLLAPI Pi0Result pi0Est(const std::vector<double>& p_values,

                               const std::vector<double>& lambda_ = std::vector<double>(),

                               const std::string& pi0_method = "smoother",

                               int smooth_df = 3,

                               bool smooth_log_pi0 = false);


OPENMS_DLLAPI std::vector<double> lfdr(const std::vector<double>& p_values,

                                       double pi0,

                                       bool trunc = true,

                                       bool monotone = true,

                                       const std::string& transf = "probit",

                                       double adj = 1.5,

                                       double eps = 1e-8,

                                       std::size_t gridsize = 512,

                                       double cut = 3.0);


inline std::vector<double> pNorm(const std::vector<double>& stat, const std::vector<double>& stat0)

{

  return MultipleTesting::pNorm(stat, stat0);

}


template <class T>


inline std::vector<double> computeModelFDR(const std::vector<T>& data_in)

{

  return MultipleTesting::computeModelFDR(data_in);

}


template <class T>


inline std::vector<double> pEmp(const std::vector<T>& stat, const std::vector<T>& stat0)

{

  return MultipleTesting::pEmp(stat, stat0);

}


} // namespace Math

} // namespace OpenMS

RankData.h

Types.h

OpenMS::Int64
int64_t Int64
Signed integer type (64bit)
Definition Types.h:40

OpenMS::Math::Pi0Result::lambda_
std::vector< double > lambda_
Definition MultipleTesting.h:56

OpenMS::Math::pNorm
std::vector< double > pNorm(const std::vector< double > &stat, const std::vector< double > &stat0)
Backward-compatible wrapper for MultipleTesting::pNorm.
Definition MultipleTesting.h:345

OpenMS::Math::qValue
std::vector< double > qValue(const std::vector< double > &p_values, double pi0, bool pfdr=false)
Backward-compatible wrapper for MultipleTesting::qValue.
Definition MultipleTesting.h:321

OpenMS::Math::pi0Est
Pi0Result pi0Est(const std::vector< double > &p_values, const std::vector< double > &lambda_=std::vector< double >(), const std::string &pi0_method="smoother", int smooth_df=3, bool smooth_log_pi0=false)
Backward-compatible wrapper for MultipleTesting::pi0Est (string-based API)

OpenMS::Math::pEmp
std::vector< double > pEmp(const std::vector< T > &stat, const std::vector< T > &stat0)
Backward-compatible wrapper for MultipleTesting::pEmp.
Definition MultipleTesting.h:359

OpenMS::Math::lfdr
std::vector< double > lfdr(const std::vector< double > &p_values, double pi0, bool trunc=true, bool monotone=true, const std::string &transf="probit", double adj=1.5, double eps=1e-8, std::size_t gridsize=512, double cut=3.0)
Backward-compatible wrapper for MultipleTesting::lfdr (string-based API)

OpenMS::Math::Pi0Result::pi0_lambda
std::vector< double > pi0_lambda
Definition MultipleTesting.h:55

OpenMS::Math::computeModelFDR
std::vector< double > computeModelFDR(const std::vector< T > &data_in)
Backward-compatible wrapper for MultipleTesting::computeModelFDR.
Definition MultipleTesting.h:352

OpenMS::Math::Pi0Result
Result of pi0 estimation for multiple testing correction.
Definition MultipleTesting.h:53

OpenMS
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19

OpenMS::Math::MultipleTesting
Statistical functions for multiple testing correction.
Definition MultipleTesting.h:76

OpenMS::Math::MultipleTesting::toLfdrTransform
static LfdrTransform toLfdrTransform(const std::string &s)
Convert string to LfdrTransform enum (throws if invalid)

OpenMS::Math::MultipleTesting::pi0Est
static Pi0Result pi0Est(const std::vector< double > &p_values, const std::vector< double > &lambda_=std::vector< double >(), Pi0Method method=Pi0Method::Smoother, int smooth_df=3, bool smooth_log_pi0=false)
Estimate the proportion of true null hypotheses (pi0).

OpenMS::Math::MultipleTesting::Pi0Method
Pi0Method
Method for estimating proportion of true null hypotheses (pi0)
Definition MultipleTesting.h:79

OpenMS::Math::MultipleTesting::pNorm
static std::vector< double > pNorm(const std::vector< double > &stat, const std::vector< double > &stat0)
Compute tail probabilities under a fitted normal distribution.

OpenMS::Math::MultipleTesting::pi0MethodToString
static std::string pi0MethodToString(Pi0Method m)
Convert Pi0Method enum to string representation.

OpenMS::Math::MultipleTesting::pEmp
static std::vector< double > pEmp(const std::vector< T > &stat, const std::vector< T > &stat0)
Compute empirical p-values from test statistics and null distribution.
Definition MultipleTesting.h:254

OpenMS::Math::MultipleTesting::lfdrTransformToString
static std::string lfdrTransformToString(LfdrTransform t)
Convert LfdrTransform enum to string representation.

OpenMS::Math::MultipleTesting::lfdr
static std::vector< double > lfdr(const std::vector< double > &p_values, double pi0, bool trunc=true, bool monotone=true, LfdrTransform transf=LfdrTransform::Probit, double adj=1.5, double eps=1e-8, std::size_t gridsize=512, double cut=3.0)
Estimate local false discovery rate (local FDR) from p-values.

OpenMS::Math::MultipleTesting::LfdrTransform
LfdrTransform
Transformation for local FDR estimation.
Definition MultipleTesting.h:86

OpenMS::Math::MultipleTesting::toPi0Method
static Pi0Method toPi0Method(const std::string &s)
Convert string to Pi0Method enum (throws if invalid)

OpenMS::Math::MultipleTesting::qValue
static std::vector< double > qValue(const std::vector< double > &p_values, double pi0, bool pfdr=false)
Calculate q-values (FDR-adjusted p-values) for multiple testing correction.

OpenMS::Math::MultipleTesting::computeModelFDR
static std::vector< double > computeModelFDR(const std::vector< T > &data_in)
Compute model-based FDR estimates from posterior error probabilities.
Definition MultipleTesting.h:187

OpenMS::Math::RankData::Method::Max
@ Max

OpenMS::Math::RankData::Method::Average
@ Average

OpenMS::Math::RankData::NaNPolicy::Propagate
@ Propagate