34 #ifndef OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H 35 #define OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H 43 #if OPENMS_BOOST_VERSION_MINOR >= 64 44 #include <boost/serialization/array_wrapper.hpp> 46 #include <boost/accumulators/accumulators.hpp> 47 #include <boost/accumulators/statistics/covariance.hpp> 48 #include <boost/accumulators/statistics/mean.hpp> 49 #include <boost/accumulators/statistics/stats.hpp> 50 #include <boost/accumulators/statistics/variance.hpp> 51 #include <boost/accumulators/statistics/variates/covariate.hpp> 52 #include <boost/function/function_base.hpp> 53 #include <boost/lambda/casts.hpp> 54 #include <boost/lambda/lambda.hpp> 59 using std::iterator_traits;
73 template <
typename IteratorType>
89 template <
typename IteratorType>
105 template <
typename IteratorType1,
typename IteratorType2>
107 IteratorType1 begin_b, IteratorType1 end_b,
108 IteratorType2 begin_a, IteratorType2 end_a)
110 if (begin_b != end_b && begin_a == end_a)
121 template <
typename IteratorType>
122 static double sum(IteratorType begin, IteratorType end)
124 return std::accumulate(begin, end, 0.0);
134 template <
typename IteratorType>
135 static double mean(IteratorType begin, IteratorType end)
138 return sum(begin, end) / std::distance(begin, end);
152 template <
typename IteratorType>
153 static double median(IteratorType begin, IteratorType end,
159 std::sort(begin, end);
162 Size size = std::distance(begin, end);
165 IteratorType it1 = begin;
166 std::advance(it1, size / 2 - 1);
167 IteratorType it2 = it1;
168 std::advance(it2, 1);
169 return (*it1 + *it2) / 2.0;
173 IteratorType it = begin;
174 std::advance(it, (size - 1) / 2);
199 template <
typename IteratorType>
200 double MAD(IteratorType begin, IteratorType end,
double median_of_numbers)
202 std::vector<double> diffs;
203 diffs.reserve(std::distance(begin, end));
204 for (IteratorType it = begin; it != end; ++it)
206 diffs.push_back(fabs(*it - median_of_numbers));
208 return median(diffs.begin(), diffs.end(),
false);
224 template <
typename IteratorType>
232 std::sort(begin, end);
235 Size size = std::distance(begin, end);
238 return median(begin, begin + (size/2)-1,
true);
240 return median(begin, begin + (size/2),
true);
256 template <
typename IteratorType>
258 IteratorType begin, IteratorType end,
bool sorted =
false)
263 std::sort(begin, end);
266 Size size = std::distance(begin, end);
267 return median(begin + (size/2)+1, end,
true);
279 template <
typename IteratorType>
280 static double variance(IteratorType begin, IteratorType end,
281 double mean = std::numeric_limits<double>::max())
285 if (
mean == std::numeric_limits<double>::max())
289 for (IteratorType iter=begin; iter!=end; ++iter)
291 double diff = *iter -
mean;
294 return sum / (std::distance(begin, end)-1);
306 template <
typename IteratorType>
307 static double sd(IteratorType begin, IteratorType end,
308 double mean = std::numeric_limits<double>::max())
321 template <
typename IteratorType>
322 static double absdev(IteratorType begin, IteratorType end,
323 double mean = std::numeric_limits<double>::max())
327 if (
mean == std::numeric_limits<double>::max())
331 for (IteratorType iter=begin; iter!=end; ++iter)
335 return sum / std::distance(begin, end);
347 template <
typename IteratorType1,
typename IteratorType2>
348 static double covariance(IteratorType1 begin_a, IteratorType1 end_a,
349 IteratorType2 begin_b, IteratorType2 end_b)
357 IteratorType1 iter_a = begin_a;
358 IteratorType2 iter_b = begin_b;
359 for (; iter_a != end_a; ++iter_a, ++iter_b)
363 sum += (*iter_a - mean_a) * (*iter_b - mean_b);
367 Size n = std::distance(begin_a, end_a);
383 template <
typename IteratorType1,
typename IteratorType2>
385 IteratorType2 begin_b, IteratorType2 end_b)
390 SignedSize dist = std::distance(begin_a, end_a);
392 IteratorType1 iter_a = begin_a;
393 IteratorType2 iter_b = begin_b;
394 for (; iter_a != end_a; ++iter_a, ++iter_b)
399 double tmp(*iter_a - *iter_b);
417 template <
typename IteratorType1,
typename IteratorType2>
419 IteratorType2 begin_b, IteratorType2 end_b)
424 SignedSize dist = std::distance(begin_a, end_a);
426 IteratorType1 iter_a = begin_a;
427 IteratorType2 iter_b = begin_b;
428 for (; iter_a != end_a; ++iter_a, ++iter_b)
432 if ((*iter_a < 0 && *iter_b >= 0) || (*iter_a >= 0 && *iter_b < 0))
441 return double(correct) / dist;
456 template <
typename IteratorType1,
typename IteratorType2>
458 IteratorType1 begin_a, IteratorType1 end_a,
459 IteratorType2 begin_b, IteratorType2 end_b)
468 IteratorType1 iter_a = begin_a;
469 IteratorType2 iter_b = begin_b;
470 for (; iter_a != end_a; ++iter_a, ++iter_b)
475 if (*iter_a < 0 && *iter_b >= 0)
479 else if (*iter_a < 0 && *iter_b < 0)
483 else if (*iter_a >= 0 && *iter_b >= 0)
487 else if (*iter_a >= 0 && *iter_b < 0)
495 return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
509 template <
typename IteratorType1,
typename IteratorType2>
511 IteratorType1 begin_a, IteratorType1 end_a,
512 IteratorType2 begin_b, IteratorType2 end_b)
518 SignedSize dist = std::distance(begin_a, end_a);
519 double avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
520 double avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
522 double numerator = 0;
523 double denominator_a = 0;
524 double denominator_b = 0;
525 IteratorType1 iter_a = begin_a;
526 IteratorType2 iter_b = begin_b;
527 for (; iter_a != end_a; ++iter_a, ++iter_b)
531 double temp_a = *iter_a - avg_a;
532 double temp_b = *iter_b - avg_b;
533 numerator += (temp_a * temp_b);
534 denominator_a += (temp_a * temp_a);
535 denominator_b += (temp_b * temp_b);
539 return numerator / sqrt(denominator_a * denominator_b);
543 template <
typename Value>
549 Size n = (w.size() - 1);
551 std::vector<std::pair<Size, Value> > w_idx;
552 for (
Size j = 0; j < w.size(); ++j)
554 w_idx.push_back(std::make_pair(j, w[j]));
557 std::sort(w_idx.begin(), w_idx.end(),
558 boost::lambda::ret<bool>((&boost::lambda::_1->*& std::pair<Size, Value>::second) <
559 (&boost::lambda::_2->*& std::pair<Size, Value>::second)));
564 if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second))
566 w_idx[i].second = Value(i + 1);
572 for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
576 rank = 0.5 * (i + z + 1);
578 for (
Size v = i; v <= z - 1; ++v)
580 w_idx[v].second = rank;
586 w_idx[n].second = Value(n + 1);
588 for (
Size j = 0; j < w.size(); ++j)
590 w[w_idx[j].first] = w_idx[j].second;
605 template <
typename IteratorType1,
typename IteratorType2>
607 IteratorType1 begin_a, IteratorType1 end_a,
608 IteratorType2 begin_b, IteratorType2 end_b)
614 SignedSize dist = std::distance(begin_a, end_a);
615 std::vector<double> ranks_data;
616 ranks_data.reserve(dist);
617 std::vector<double> ranks_model;
618 ranks_model.reserve(dist);
619 IteratorType1 iter_a = begin_a;
620 IteratorType2 iter_b = begin_b;
621 for (; iter_a != end_a; ++iter_a, ++iter_b)
626 ranks_model.push_back(*iter_a);
627 ranks_data.push_back(*iter_b);
636 double mu =
double(ranks_data.size() + 1) / 2.;
640 double sum_model_data = 0;
641 double sqsum_data = 0;
642 double sqsum_model = 0;
644 for (
Int i = 0; i < dist; ++i)
646 sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
647 sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
648 sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
652 if (!sqsum_data || !sqsum_model)
657 return sum_model_data / (sqrt(sqsum_data) * sqrt(sqsum_model));
680 sort(data.begin(), data.end());
699 #endif // OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H SummaryStatistics()
Definition: StatisticFunctions.h:664
double variance
Definition: StatisticFunctions.h:691
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b) ...
Definition: StatisticFunctions.h:384
T::value_type max
Definition: StatisticFunctions.h:692
double MAD(IteratorType begin, IteratorType end, double median_of_numbers)
median absolute deviation (MAD)
Definition: StatisticFunctions.h:200
double lowerq
Definition: StatisticFunctions.h:691
static double variance(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the variance of a range of values.
Definition: StatisticFunctions.h:280
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:122
static void checkIteratorsAreValid(IteratorType1 begin_b, IteratorType1 end_b, IteratorType2 begin_a, IteratorType2 end_a)
Helper function checking if an iterator and a co-iterator both have a next element.
Definition: StatisticFunctions.h:106
static double quantile1st(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the first quantile of a range of values.
Definition: StatisticFunctions.h:225
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition: StatisticFunctions.h:544
static double covariance(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the covariance of two ranges of values.
Definition: StatisticFunctions.h:348
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:135
static void checkIteratorsEqual(IteratorType begin, IteratorType end)
Helper function checking if two iterators are equal.
Definition: StatisticFunctions.h:90
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:135
static double sd(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the standard deviation of a range of values.
Definition: StatisticFunctions.h:307
static double matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:457
static double absdev(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the absolute deviation of a range of values.
Definition: StatisticFunctions.h:322
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:510
Helper class to gather (and dump) some statistics from a e.g. vector<double>.
Definition: StatisticFunctions.h:662
static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
Helper function checking if two iterators are not equal.
Definition: StatisticFunctions.h:74
double upperq
Definition: StatisticFunctions.h:691
static double quantile3rd(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the third quantile of a range of values.
Definition: StatisticFunctions.h:257
double median
Definition: StatisticFunctions.h:691
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:153
SummaryStatistics(T &data)
Definition: StatisticFunctions.h:670
Invalid range exception.
Definition: Exception.h:286
static double rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:606
size_t count
Definition: StatisticFunctions.h:693
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
T::value_type min
Definition: StatisticFunctions.h:692
static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:418
double mean
Definition: StatisticFunctions.h:691
int Int
Signed integer type.
Definition: Types.h:103