54 template <
typename IteratorType>
70 template <
typename IteratorType>
86 template <
typename IteratorType1,
typename IteratorType2>
88 IteratorType1 begin_b, IteratorType1 end_b,
89 IteratorType2 begin_a, IteratorType2 end_a)
91 if ((begin_b == end_b) ^ (begin_a == end_a))
102 template <
typename IteratorType>
103 static double sum(IteratorType begin, IteratorType end)
105 return std::accumulate(begin, end, 0.0);
115 template <
typename IteratorType>
116 static double mean(IteratorType begin, IteratorType end)
119 return sum(begin, end) / std::distance(begin, end);
133 template <
typename IteratorType>
134 static double median(IteratorType begin, IteratorType end,
140 std::sort(begin, end);
143 Size size = std::distance(begin, end);
146 IteratorType it1 = begin;
147 std::advance(it1, size / 2 - 1);
148 IteratorType it2 = it1;
149 std::advance(it2, 1);
150 return (*it1 + *it2) / 2.0;
154 IteratorType it = begin;
155 std::advance(it, (size - 1) / 2);
180 template <
typename IteratorType>
181 double MAD(IteratorType begin, IteratorType end,
double median_of_numbers)
183 std::vector<double> diffs;
184 diffs.reserve(std::distance(begin, end));
185 for (IteratorType it = begin; it != end; ++it)
187 diffs.push_back(fabs(*it - median_of_numbers));
189 return median(diffs.begin(), diffs.end(),
false);
210 template <
typename IteratorType>
213 double mean_value {0};
214 for (IteratorType it = begin; it != end; ++it)
216 mean_value += fabs(*it - mean_of_numbers);
218 return mean_value / std::distance(begin, end);
236 template <
typename IteratorType>
244 std::sort(begin, end);
247 Size size = std::distance(begin, end);
250 return static_cast<double>(*begin);
254 return median(begin, begin + (size/2)-1,
true);
256 return median(begin, begin + (size/2),
true);
274 template <
typename IteratorType>
276 IteratorType begin, IteratorType end,
bool sorted =
false)
281 std::sort(begin, end);
284 Size size = std::distance(begin, end);
287 return static_cast<double>(*(begin + (size - 1)));
289 return median(begin + (size/2)+1, end,
true);
317 template <
typename IteratorType>
318 static double quantile(IteratorType begin, IteratorType end,
double q)
321 "Math::quantile expects a sorted range. Sort before calling.");
325 const Size n = std::distance(begin, end);
330 if (q < 0.0 || q > 1.0)
335 if (n == 1)
return static_cast<double>(*begin);
337 const double pos = q *
static_cast<double>(n - 1);
338 const Size i =
static_cast<Size>(std::floor(pos));
339 const double frac = pos -
static_cast<double>(i);
341 const auto it_i = begin +
static_cast<typename std::iterator_traits<IteratorType>::difference_type
>(i);
342 if (frac == 0.0)
return static_cast<double>(*it_i);
344 const auto it_ip1 = it_i + 1;
345 return (1.0 - frac) *
static_cast<double>(*it_i) + frac *
static_cast<double>(*it_ip1);
362 template <
typename IteratorType>
365 std::vector<double> v;
366 v.reserve(std::distance(begin, end));
367 for (
auto it = begin; it != end; ++it)
369 if (std::isfinite(*it)) v.push_back(
static_cast<double>(*it));
371 if (v.size() < 4)
return std::numeric_limits<double>::infinity();
373 std::sort(v.begin(), v.end());
374 const double q1 =
quantile(v.begin(), v.end(), 0.25);
375 const double q3 =
quantile(v.begin(), v.end(), 0.75);
376 const double iqr = q3 - q1;
377 if (!(iqr > 0.0))
return std::numeric_limits<double>::infinity();
391 template <
typename IteratorType>
394 size_t n = 0, n_tail = 0;
395 for (
auto it = begin; it != end; ++it)
397 const double x =
static_cast<double>(*it);
398 if (!std::isfinite(x))
continue;
400 if (x > threshold) ++n_tail;
402 return (n == 0) ? 0.0 :
static_cast<double>(n_tail) /
static_cast<double>(n);
423 template <
typename IteratorType>
426 std::vector<double> v;
427 v.reserve(std::distance(begin, end));
428 for (
auto it = begin; it != end; ++it)
430 const double x =
static_cast<double>(*it);
431 if (!std::isfinite(x))
continue;
434 if (v.empty())
return 0.0;
436 if (std::isfinite(upper_fence))
440 if (x > upper_fence) x = upper_fence;
441 if (x < 0.0) x = 0.0;
444 std::sort(v.begin(), v.end());
445 return quantile(v.begin(), v.end(), q);
482 template <
typename IteratorType>
485 double r_sparse = 0.01,
486 double r_dense = 0.10)
491 std::vector<double> v;
492 v.reserve(std::distance(begin, end));
493 for (
auto it = begin; it != end; ++it)
495 if (std::isfinite(*it)) v.push_back(
static_cast<double>(*it));
502 std::sort(v.begin(), v.end());
503 const double half_raw =
quantile(v.begin(), v.end(), q);
507 const double r = std::isfinite(uf) ?
tailFractionAbove(v.begin(), v.end(), uf) : 0.0;
512 if (r_dense <= r_sparse)
514 w = (r > r_sparse) ? 1.0 : 0.0;
518 const double t = (r - r_sparse) / (r_dense - r_sparse);
519 w = std::max(0.0, std::min(1.0, t));
527 res.
blended = (1.0 - w) * half_rob + w * half_raw;
540 template <
typename IteratorType>
541 static double variance(IteratorType begin, IteratorType end,
542 double mean = std::numeric_limits<double>::max())
545 double sum_value = 0.0;
546 if (
mean == std::numeric_limits<double>::max())
550 for (IteratorType iter=begin; iter!=end; ++iter)
552 double diff = *iter -
mean;
553 sum_value += diff * diff;
555 return sum_value / (std::distance(begin, end)-1);
567 template <
typename IteratorType>
568 static double sd(IteratorType begin, IteratorType end,
569 double mean = std::numeric_limits<double>::max())
582 template <
typename IteratorType>
583 static double absdev(IteratorType begin, IteratorType end,
584 double mean = std::numeric_limits<double>::max())
587 if (
mean == std::numeric_limits<double>::max())
603 template <
typename IteratorType1,
typename IteratorType2>
604 static double covariance(IteratorType1 begin_a, IteratorType1 end_a,
605 IteratorType2 begin_b, IteratorType2 end_b)
610 double sum_value = 0.0;
613 IteratorType1 iter_a = begin_a;
614 IteratorType2 iter_b = begin_b;
615 for (; iter_a != end_a; ++iter_a, ++iter_b)
619 sum_value += (*iter_a - mean_a) * (*iter_b - mean_b);
623 Size n = std::distance(begin_a, end_a);
624 return sum_value / (n-1);
636 template <
typename IteratorType1,
typename IteratorType2>
638 IteratorType2 begin_b, IteratorType2 end_b)
643 SignedSize dist = std::distance(begin_a, end_a);
645 IteratorType1 iter_a = begin_a;
646 IteratorType2 iter_b = begin_b;
647 for (; iter_a != end_a; ++iter_a, ++iter_b)
652 double tmp(*iter_a - *iter_b);
673 template <
typename IteratorType1,
typename IteratorType2>
675 IteratorType2 begin_b, IteratorType2 end_b)
689 template <
typename IteratorType1,
typename IteratorType2>
691 IteratorType2 begin_b, IteratorType2 end_b)
696 SignedSize dist = std::distance(begin_a, end_a);
698 IteratorType1 iter_a = begin_a;
699 IteratorType2 iter_b = begin_b;
700 for (; iter_a != end_a; ++iter_a, ++iter_b)
704 if ((*iter_a < 0 && *iter_b >= 0) || (*iter_a >= 0 && *iter_b < 0))
713 return double(correct) / dist;
728 template <
typename IteratorType1,
typename IteratorType2>
730 IteratorType1 begin_a, IteratorType1 end_a,
731 IteratorType2 begin_b, IteratorType2 end_b)
740 IteratorType1 iter_a = begin_a;
741 IteratorType2 iter_b = begin_b;
742 for (; iter_a != end_a; ++iter_a, ++iter_b)
747 if (*iter_a < 0 && *iter_b >= 0)
751 else if (*iter_a < 0 && *iter_b < 0)
755 else if (*iter_a >= 0 && *iter_b >= 0)
759 else if (*iter_a >= 0 && *iter_b < 0)
767 return (tp * tn - fp * fn) / std::sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
781 template <
typename IteratorType1,
typename IteratorType2>
783 IteratorType1 begin_a, IteratorType1 end_a,
784 IteratorType2 begin_b, IteratorType2 end_b)
790 SignedSize dist = std::distance(begin_a, end_a);
791 double avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
792 double avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
794 double numerator = 0;
795 double denominator_a = 0;
796 double denominator_b = 0;
797 IteratorType1 iter_a = begin_a;
798 IteratorType2 iter_b = begin_b;
799 for (; iter_a != end_a; ++iter_a, ++iter_b)
803 double temp_a = *iter_a - avg_a;
804 double temp_b = *iter_b - avg_b;
805 numerator += (temp_a * temp_b);
806 denominator_a += (temp_a * temp_a);
807 denominator_b += (temp_b * temp_b);
811 return numerator / std::sqrt(denominator_a * denominator_b);
815 template <
typename Value>
821 Size n = (w.size() - 1);
823 std::vector<std::pair<Size, Value> > w_idx;
824 for (
Size j = 0; j < w.size(); ++j)
826 w_idx.push_back(std::make_pair(j, w[j]));
829 std::sort(w_idx.begin(), w_idx.end(),
830 [](
const auto& pair1,
const auto& pair2) { return pair1.second < pair2.second; });
835 if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second))
837 w_idx[i].second = Value(i + 1);
843 for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
847 rank = 0.5 * (i + z + 1);
849 for (
Size v = i; v <= z - 1; ++v)
851 w_idx[v].second = rank;
857 w_idx[n].second = Value(n + 1);
859 for (
Size j = 0; j < w.size(); ++j)
861 w[w_idx[j].first] = w_idx[j].second;
876 template <
typename IteratorType1,
typename IteratorType2>
878 IteratorType1 begin_a, IteratorType1 end_a,
879 IteratorType2 begin_b, IteratorType2 end_b)
885 SignedSize dist = std::distance(begin_a, end_a);
886 std::vector<double> ranks_data;
887 ranks_data.reserve(dist);
888 std::vector<double> ranks_model;
889 ranks_model.reserve(dist);
890 IteratorType1 iter_a = begin_a;
891 IteratorType2 iter_b = begin_b;
892 for (; iter_a != end_a; ++iter_a, ++iter_b)
897 ranks_model.push_back(*iter_a);
898 ranks_data.push_back(*iter_b);
907 double mu = double(ranks_data.size() + 1) / 2.;
911 double sum_model_data = 0;
912 double sqsum_data = 0;
913 double sqsum_model = 0;
915 for (
Int i = 0; i < dist; ++i)
917 sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
918 sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
919 sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
923 if (!sqsum_data || !sqsum_model)
928 return sum_model_data / (std::sqrt(sqsum_data) * std::sqrt(sqsum_model));
948 sort(data.begin(), data.end());
Invalid range exception.
Definition Exception.h:257
Invalid value exception.
Definition Exception.h:306
int Int
Signed integer type.
Definition Types.h:72
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition Types.h:104
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition openms/include/OpenMS/CONCEPT/Macros.h:91
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition StatisticFunctions.h:782
double MAD(IteratorType begin, IteratorType end, double median_of_numbers)
median absolute deviation (MAD)
Definition StatisticFunctions.h:181
static void checkIteratorsAreValid(IteratorType1 begin_b, IteratorType1 end_b, IteratorType2 begin_a, IteratorType2 end_a)
Helper function checking if an iterator and a co-iterator both have a next element.
Definition StatisticFunctions.h:87
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition StatisticFunctions.h:116
static double matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition StatisticFunctions.h:729
static void checkIteratorsEqual(IteratorType begin, IteratorType end)
Helper function checking if two iterators are equal.
Definition StatisticFunctions.h:71
static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b,...
Definition StatisticFunctions.h:690
static double rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition StatisticFunctions.h:877
static double quantile3rd(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the third quantile of a range of values.
Definition StatisticFunctions.h:275
static double covariance(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the covariance of two ranges of values.
Definition StatisticFunctions.h:604
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition StatisticFunctions.h:103
static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
Helper function checking if two iterators are not equal.
Definition StatisticFunctions.h:55
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition StatisticFunctions.h:134
static double quantile1st(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the first quantile of a range of values.
Definition StatisticFunctions.h:237
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition StatisticFunctions.h:637
static double rootMeanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the root mean square error (RMSE) for the values in [begin_a, end_a) and [begin_b,...
Definition StatisticFunctions.h:674
double MeanAbsoluteDeviation(IteratorType begin, IteratorType end, double mean_of_numbers)
mean absolute deviation (MeanAbsoluteDeviation)
Definition StatisticFunctions.h:211
static double sd(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the standard deviation of a range of values.
Definition StatisticFunctions.h:568
static double absdev(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the absolute deviation of a range of values.
Definition StatisticFunctions.h:583
double tailFractionAbove(IteratorType begin, IteratorType end, double threshold)
Fraction of values above a threshold.
Definition StatisticFunctions.h:392
double half_raw
Definition StatisticFunctions.h:40
double tail_fraction
Definition StatisticFunctions.h:43
double blended
Definition StatisticFunctions.h:39
T1::value_type quantile(const T1 &x, double q)
Returns the value of the q th quantile (0-1) in a sorted non-empty vector x.
Definition MathFunctions.h:460
double upper_fence
Definition StatisticFunctions.h:42
double tukeyUpperFence(IteratorType begin, IteratorType end, double k=1.5)
Tukey upper fence (UF) for outlier detection.
Definition StatisticFunctions.h:363
double weight
Definition StatisticFunctions.h:44
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition StatisticFunctions.h:816
double winsorizedQuantile(IteratorType begin, IteratorType end, double q, double upper_fence)
Quantile after winsorizing at an upper fence.
Definition StatisticFunctions.h:424
static double variance(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Definition StatisticFunctions.h:541
double half_rob
Definition StatisticFunctions.h:41
AdaptiveQuantileResult adaptiveQuantile(IteratorType begin, IteratorType end, double q, double k=1.5, double r_sparse=0.01, double r_dense=0.10)
Adaptive quantile that blends RAW and IQR-winsorized quantiles based on tail density beyond the Tukey...
Definition StatisticFunctions.h:483
Result of adaptiveQuantile computation.
Definition StatisticFunctions.h:38
std::string toStr(int i)
Definition StringUtils.h:101
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Helper class to gather (and dump) some statistics from a e.g. vector<double>.
Definition StatisticFunctions.h:934
double lowerq
Definition StatisticFunctions.h:960
double variance
Definition StatisticFunctions.h:960
SummaryStatistics()=default
T::value_type max
Definition StatisticFunctions.h:961
SummaryStatistics(T &data)
Definition StatisticFunctions.h:938
double median
Definition StatisticFunctions.h:960
size_t count
Definition StatisticFunctions.h:962
double mean
Definition StatisticFunctions.h:960
double upperq
Definition StatisticFunctions.h:960
T::value_type min
Definition StatisticFunctions.h:961