OpenMS  2.5.0
LinearRegression.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
41 
42 #include "Wm5Vector2.h"
43 #include "Wm5ApprLineFit2.h"
44 #include "Wm5LinearSystem.h"
45 
46 #include <cmath>
47 #include <vector>
48 
49 namespace OpenMS
50 {
51  namespace Math
52  {
69  class OPENMS_DLLAPI LinearRegression
70  {
71 public:
72 
75  intercept_(0),
76  slope_(0),
77  x_intercept_(0),
78  lower_(0),
79  upper_(0),
80  t_star_(0),
81  r_squared_(0),
82  stand_dev_residuals_(0),
83  mean_residuals_(0),
84  stand_error_slope_(0),
85  chi_squared_(0),
86  rsd_(0)
87  {
88  }
89 
92  {
93  }
94 
116  template <typename Iterator>
117  void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness = true);
118 
141  template <typename Iterator>
142  void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness = true);
143 
145  double getIntercept() const;
147  double getSlope() const;
149  double getXIntercept() const;
151  double getLower() const;
153  double getUpper() const;
155  double getTValue() const;
157  double getRSquared() const;
159  double getStandDevRes() const;
161  double getMeanRes() const;
163  double getStandErrSlope() const;
165  double getChiSquared() const;
167  double getRSD() const;
168 
169 protected:
170 
172  double intercept_;
174  double slope_;
176  double x_intercept_;
178  double lower_;
180  double upper_;
182  double t_star_;
184  double r_squared_;
192  double chi_squared_;
194  double rsd_;
195 
196 
198  void computeGoodness_(const std::vector<Wm5::Vector2d>& points, double confidence_interval_P);
199 
201  template <typename Iterator>
202  double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept);
203 
205  template <typename Iterator>
206  double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept);
207 
208 private:
209 
213  LinearRegression& operator=(const LinearRegression& arg);
214 
215  }; //class
216 
217 
218  namespace
219  {
220  //given x compute y = slope * x + intercept
221  double computePointY(double x, double slope, double intercept)
222  {
223  return slope * x + intercept;
224  }
225 
226  } //namespace
227 
228  //x, y, w must be of same size
229  template <typename Iterator>
230  double LinearRegression::computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
231  {
232  double chi_squared = 0.0;
233  Iterator xIter = x_begin;
234  Iterator yIter = y_begin;
235  for (; xIter != x_end; ++xIter, ++yIter)
236  {
237  chi_squared += std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
238  }
239 
240  return chi_squared;
241  }
242 
243  //x, y, w must be of same size
244  template <typename Iterator>
245  double LinearRegression::computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
246  {
247  double chi_squared = 0.0;
248  Iterator xIter = x_begin;
249  Iterator yIter = y_begin;
250  Iterator wIter = w_begin;
251  for (; xIter != x_end; ++xIter, ++yIter, ++wIter)
252  {
253  chi_squared += *wIter * std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
254  }
255 
256  return chi_squared;
257  }
258 
259  template <typename Iterator>
260  void LinearRegression::computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness)
261  {
262  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
263 
264  // Compute the unweighted linear fit.
265  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
266  // and the value of Chi squared (sum( (y - evel(x))^2)
267  bool pass = Wm5::HeightLineFit2<double>(static_cast<int>(points.size()), &points.front(), slope_, intercept_);
268  chi_squared_ = computeChiSquare(x_begin, x_end, y_begin, slope_, intercept_);
269 
270  if (pass)
271  {
272  if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);
273  }
274  else
275  {
276  throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
277  "UnableToFit-LinearRegression", String("Could not fit a linear model to the data (") + points.size() + " points).");
278  }
279  }
280 
281  template <typename Iterator>
282  void LinearRegression::computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness)
283  {
284  // Compute the weighted linear fit.
285  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
286  // and the value of Chi squared, the covariances of the intercept and the slope
287  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
288  // Compute sums for linear system. copy&paste from GeometricTools Wm5ApprLineFit2.cpp
289  // and modified to allow weights
290  int numPoints = static_cast<int>(points.size());
291  double sumX = 0, sumY = 0;
292  double sumXX = 0, sumXY = 0;
293  double sumW = 0;
294  Iterator wIter = w_begin;
295 
296  for (int i = 0; i < numPoints; ++i, ++wIter)
297  {
298  sumX += (*wIter) * points[i].X();
299  sumY += (*wIter) * points[i].Y();
300  sumXX += (*wIter) * points[i].X() * points[i].X();
301  sumXY += (*wIter) * points[i].X() * points[i].Y();
302  sumW += (*wIter);
303  }
304  //create matrices to solve Ax = B
305  double A[2][2] =
306  {
307  {sumXX, sumX},
308  {sumX, sumW}
309  };
310  double B[2] =
311  {
312  sumXY,
313  sumY
314  };
315  double X[2];
316 
317  bool nonsingular = Wm5::LinearSystem<double>().Solve2(A, B, X);
318  if (nonsingular)
319  {
320  slope_ = X[0];
321  intercept_ = X[1];
322  }
323  chi_squared_ = computeWeightedChiSquare(x_begin, x_end, y_begin, w_begin, slope_, intercept_);
324 
325  if (nonsingular)
326  {
327  if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);
328  }
329  else
330  {
331  throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
332  "UnableToFit-LinearRegression", "Could not fit a linear model to the data");
333  }
334  }
335 
336  } // namespace Math
337 } // namespace OpenMS
338 
339 
OpenMS::Math::LinearRegression::t_star_
double t_star_
The value of the t-statistic.
Definition: LinearRegression.h:182
OpenMS::Math::LinearRegression::chi_squared_
double chi_squared_
The value of the Chi Squared statistic.
Definition: LinearRegression.h:192
OpenMS::Math::LinearRegression::computeRegression
void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients of the model for the dataset .
Definition: LinearRegression.h:260
OpenMS::Math::LinearRegression::x_intercept_
double x_intercept_
The intercept of the fitted line with the x-axis.
Definition: LinearRegression.h:176
Types.h
OpenMS::Math::LinearRegression::computeChiSquare
double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
Compute the chi squared of a linear fit.
Definition: LinearRegression.h:230
OpenMS::Math::LinearRegression::mean_residuals_
double mean_residuals_
Mean of residuals.
Definition: LinearRegression.h:188
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::Math::LinearRegression::stand_error_slope_
double stand_error_slope_
The standard error of the slope.
Definition: LinearRegression.h:190
OpenMS::Math::LinearRegression::computeWeightedChiSquare
double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
Compute the chi squared of a weighted linear fit.
Definition: LinearRegression.h:245
OpenMS::Math::LinearRegression::r_squared_
double r_squared_
The squared correlation coefficient (Pearson)
Definition: LinearRegression.h:184
OpenMS::Math::LinearRegression::stand_dev_residuals_
double stand_dev_residuals_
The standard deviation of the residuals.
Definition: LinearRegression.h:186
OpenMS::Math::LinearRegression::computeGoodness_
void computeGoodness_(const std::vector< Wm5::Vector2d > &points, double confidence_interval_P)
Computes the goodness of the fitted regression line.
OpenMS::Math::LinearRegression::lower_
double lower_
The lower bound of the confidence interval.
Definition: LinearRegression.h:178
RegressionUtils.h
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::Math::LinearRegression
This class offers functions to perform least-squares fits to a straight line model,...
Definition: LinearRegression.h:69
OpenMS::Math::LinearRegression::computeRegressionWeighted
void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients of the model for the weighted da...
Definition: LinearRegression.h:282
Exception.h
OpenMS::Math::LinearRegression::~LinearRegression
virtual ~LinearRegression()
Destructor.
Definition: LinearRegression.h:91
OpenMS::Math::LinearRegression::intercept_
double intercept_
The intercept of the fitted line with the y-axis.
Definition: LinearRegression.h:172
OpenMS::Math::iteratorRange2Wm5Vectors
std::vector< Wm5::Vector2d > iteratorRange2Wm5Vectors(Iterator x_begin, Iterator x_end, Iterator y_begin)
Copies the distance(x_begin,x_end) elements starting at x_begin and y_begin into the Wm5::Vector.
Definition: RegressionUtils.h:44
Iterator
String.h
OpenMS::Math::LinearRegression::LinearRegression
LinearRegression()
Constructor.
Definition: LinearRegression.h:74
OpenMS::Exception::UnableToFit
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:676
OpenMS::Math::LinearRegression::slope_
double slope_
The slope of the fitted line.
Definition: LinearRegression.h:174
OpenMS::Math::LinearRegression::rsd_
double rsd_
the relative standard deviation
Definition: LinearRegression.h:194
OpenMS::Math::LinearRegression::upper_
double upper_
The upper bound of the confidence interval.
Definition: LinearRegression.h:180