32 template <
typename IteratorType>
48 template <
typename IteratorType>
64 template <
typename IteratorType1,
typename IteratorType2>
66 IteratorType1 begin_b, IteratorType1 end_b,
67 IteratorType2 begin_a, IteratorType2 end_a)
69 if (begin_b != end_b && begin_a == end_a)
80 template <
typename IteratorType>
81 static double sum(IteratorType begin, IteratorType end)
83 return std::accumulate(begin, end, 0.0);
93 template <
typename IteratorType>
94 static double mean(IteratorType begin, IteratorType end)
97 return sum(begin, end) / std::distance(begin, end);
111 template <
typename IteratorType>
112 static double median(IteratorType begin, IteratorType end,
118 std::sort(begin, end);
121 Size size = std::distance(begin, end);
124 IteratorType it1 = begin;
125 std::advance(it1, size / 2 - 1);
126 IteratorType it2 = it1;
127 std::advance(it2, 1);
128 return (*it1 + *it2) / 2.0;
132 IteratorType it = begin;
133 std::advance(it, (size - 1) / 2);
158 template <
typename IteratorType>
159 double MAD(IteratorType begin, IteratorType end,
double median_of_numbers)
161 std::vector<double> diffs;
162 diffs.reserve(std::distance(begin, end));
163 for (IteratorType it = begin; it != end; ++it)
165 diffs.push_back(fabs(*it - median_of_numbers));
167 return median(diffs.begin(), diffs.end(),
false);
188 template <
typename IteratorType>
191 double mean_value {0};
192 for (IteratorType it = begin; it != end; ++it)
194 mean_value += fabs(*it - mean_of_numbers);
196 return mean_value / std::distance(begin, end);
212 template <
typename IteratorType>
220 std::sort(begin, end);
223 Size size = std::distance(begin, end);
226 return median(begin, begin + (size/2)-1,
true);
228 return median(begin, begin + (size/2),
true);
244 template <
typename IteratorType>
246 IteratorType begin, IteratorType end,
bool sorted =
false)
251 std::sort(begin, end);
254 Size size = std::distance(begin, end);
255 return median(begin + (size/2)+1, end,
true);
267 template <
typename IteratorType>
268 static double variance(IteratorType begin, IteratorType end,
269 double mean = std::numeric_limits<double>::max())
272 double sum_value = 0.0;
273 if (
mean == std::numeric_limits<double>::max())
277 for (IteratorType iter=begin; iter!=end; ++iter)
279 double diff = *iter -
mean;
280 sum_value += diff * diff;
282 return sum_value / (std::distance(begin, end)-1);
294 template <
typename IteratorType>
295 static double sd(IteratorType begin, IteratorType end,
296 double mean = std::numeric_limits<double>::max())
309 template <
typename IteratorType>
310 static double absdev(IteratorType begin, IteratorType end,
311 double mean = std::numeric_limits<double>::max())
314 double sum_value = 0.0;
315 if (
mean == std::numeric_limits<double>::max())
319 for (IteratorType iter=begin; iter!=end; ++iter)
321 sum_value += *iter -
mean;
323 return sum_value / std::distance(begin, end);
335 template <
typename IteratorType1,
typename IteratorType2>
336 static double covariance(IteratorType1 begin_a, IteratorType1 end_a,
337 IteratorType2 begin_b, IteratorType2 end_b)
342 double sum_value = 0.0;
345 IteratorType1 iter_a = begin_a;
346 IteratorType2 iter_b = begin_b;
347 for (; iter_a != end_a; ++iter_a, ++iter_b)
351 sum_value += (*iter_a - mean_a) * (*iter_b - mean_b);
355 Size n = std::distance(begin_a, end_a);
356 return sum_value / (n-1);
368 template <
typename IteratorType1,
typename IteratorType2>
370 IteratorType2 begin_b, IteratorType2 end_b)
375 SignedSize dist = std::distance(begin_a, end_a);
377 IteratorType1 iter_a = begin_a;
378 IteratorType2 iter_b = begin_b;
379 for (; iter_a != end_a; ++iter_a, ++iter_b)
384 double tmp(*iter_a - *iter_b);
402 template <
typename IteratorType1,
typename IteratorType2>
404 IteratorType2 begin_b, IteratorType2 end_b)
409 SignedSize dist = std::distance(begin_a, end_a);
411 IteratorType1 iter_a = begin_a;
412 IteratorType2 iter_b = begin_b;
413 for (; iter_a != end_a; ++iter_a, ++iter_b)
417 if ((*iter_a < 0 && *iter_b >= 0) || (*iter_a >= 0 && *iter_b < 0))
426 return double(correct) / dist;
441 template <
typename IteratorType1,
typename IteratorType2>
443 IteratorType1 begin_a, IteratorType1 end_a,
444 IteratorType2 begin_b, IteratorType2 end_b)
453 IteratorType1 iter_a = begin_a;
454 IteratorType2 iter_b = begin_b;
455 for (; iter_a != end_a; ++iter_a, ++iter_b)
460 if (*iter_a < 0 && *iter_b >= 0)
464 else if (*iter_a < 0 && *iter_b < 0)
468 else if (*iter_a >= 0 && *iter_b >= 0)
472 else if (*iter_a >= 0 && *iter_b < 0)
480 return (tp * tn - fp * fn) / std::sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
494 template <
typename IteratorType1,
typename IteratorType2>
496 IteratorType1 begin_a, IteratorType1 end_a,
497 IteratorType2 begin_b, IteratorType2 end_b)
503 SignedSize dist = std::distance(begin_a, end_a);
504 double avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
505 double avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
507 double numerator = 0;
508 double denominator_a = 0;
509 double denominator_b = 0;
510 IteratorType1 iter_a = begin_a;
511 IteratorType2 iter_b = begin_b;
512 for (; iter_a != end_a; ++iter_a, ++iter_b)
516 double temp_a = *iter_a - avg_a;
517 double temp_b = *iter_b - avg_b;
518 numerator += (temp_a * temp_b);
519 denominator_a += (temp_a * temp_a);
520 denominator_b += (temp_b * temp_b);
524 return numerator / std::sqrt(denominator_a * denominator_b);
528 template <
typename Value>
534 Size n = (w.size() - 1);
536 std::vector<std::pair<Size, Value> > w_idx;
537 for (
Size j = 0; j < w.size(); ++j)
539 w_idx.push_back(std::make_pair(j, w[j]));
542 std::sort(w_idx.begin(), w_idx.end(),
543 [](
const auto& pair1,
const auto& pair2) { return pair1.second < pair2.second; });
548 if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second))
550 w_idx[i].second = Value(i + 1);
556 for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
560 rank = 0.5 * (i + z + 1);
562 for (
Size v = i; v <= z - 1; ++v)
564 w_idx[v].second = rank;
570 w_idx[n].second = Value(n + 1);
572 for (
Size j = 0; j < w.size(); ++j)
574 w[w_idx[j].first] = w_idx[j].second;
589 template <
typename IteratorType1,
typename IteratorType2>
591 IteratorType1 begin_a, IteratorType1 end_a,
592 IteratorType2 begin_b, IteratorType2 end_b)
598 SignedSize dist = std::distance(begin_a, end_a);
599 std::vector<double> ranks_data;
600 ranks_data.reserve(dist);
601 std::vector<double> ranks_model;
602 ranks_model.reserve(dist);
603 IteratorType1 iter_a = begin_a;
604 IteratorType2 iter_b = begin_b;
605 for (; iter_a != end_a; ++iter_a, ++iter_b)
610 ranks_model.push_back(*iter_a);
611 ranks_data.push_back(*iter_b);
620 double mu = double(ranks_data.size() + 1) / 2.;
624 double sum_model_data = 0;
625 double sqsum_data = 0;
626 double sqsum_model = 0;
628 for (
Int i = 0; i < dist; ++i)
630 sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
631 sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
632 sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
636 if (!sqsum_data || !sqsum_model)
641 return sum_model_data / (std::sqrt(sqsum_data) * std::sqrt(sqsum_model));
661 sort(data.begin(), data.end());
Invalid range exception.
Definition: Exception.h:252
int Int
Signed integer type.
Definition: Types.h:76
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:108
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:403
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:112
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:94
static double covariance(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the covariance of two ranges of values.
Definition: StatisticFunctions.h:336
static double quantile3rd(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the third quantile of a range of values.
Definition: StatisticFunctions.h:245
static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
Helper function checking if two iterators are not equal.
Definition: StatisticFunctions.h:33
static double matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:442
double MeanAbsoluteDeviation(IteratorType begin, IteratorType end, double mean_of_numbers)
mean absolute deviation (MeanAbsoluteDeviation)
Definition: StatisticFunctions.h:189
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:81
static double absdev(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the absolute deviation of a range of values.
Definition: StatisticFunctions.h:310
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:495
static double sd(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the standard deviation of a range of values.
Definition: StatisticFunctions.h:295
double MAD(IteratorType begin, IteratorType end, double median_of_numbers)
median absolute deviation (MAD)
Definition: StatisticFunctions.h:159
static double rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:590
static void checkIteratorsAreValid(IteratorType1 begin_b, IteratorType1 end_b, IteratorType2 begin_a, IteratorType2 end_a)
Helper function checking if an iterator and a co-iterator both have a next element.
Definition: StatisticFunctions.h:65
static double quantile1st(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the first quantile of a range of values.
Definition: StatisticFunctions.h:213
static void checkIteratorsEqual(IteratorType begin, IteratorType end)
Helper function checking if two iterators are equal.
Definition: StatisticFunctions.h:49
static double variance(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the variance of a range of values.
Definition: StatisticFunctions.h:268
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:369
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition: StatisticFunctions.h:529
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
Helper class to gather (and dump) some statistics from a e.g. vector<double>.
Definition: StatisticFunctions.h:647
double lowerq
Definition: StatisticFunctions.h:672
double variance
Definition: StatisticFunctions.h:672
SummaryStatistics()=default
T::value_type max
Definition: StatisticFunctions.h:673
SummaryStatistics(T &data)
Definition: StatisticFunctions.h:651
double median
Definition: StatisticFunctions.h:672
size_t count
Definition: StatisticFunctions.h:674
double mean
Definition: StatisticFunctions.h:672
double upperq
Definition: StatisticFunctions.h:672
T::value_type min
Definition: StatisticFunctions.h:673