Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
LinearRegression.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_MATH_STATISTICS_LINEARREGRESSION_H
36 #define OPENMS_MATH_STATISTICS_LINEARREGRESSION_H
37 
38 #include <OpenMS/CONCEPT/Types.h>
42 
43 #include "Wm5Vector2.h"
44 #include "Wm5ApprLineFit2.h"
45 #include "Wm5LinearSystem.h"
46 
47 #include <cmath>
48 #include <vector>
49 
50 namespace OpenMS
51 {
52  namespace Math
53  {
70  class OPENMS_DLLAPI LinearRegression
71  {
72 public:
73 
76  intercept_(0),
77  slope_(0),
78  x_intercept_(0),
79  lower_(0),
80  upper_(0),
81  t_star_(0),
82  r_squared_(0),
83  stand_dev_residuals_(0),
84  mean_residuals_(0),
85  stand_error_slope_(0),
86  chi_squared_(0),
87  rsd_(0)
88  {
89  }
90 
93  {
94  }
95 
117  template <typename Iterator>
118  void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness = true);
119 
142  template <typename Iterator>
143  void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness = true);
144 
146  double getIntercept() const;
148  double getSlope() const;
150  double getXIntercept() const;
152  double getLower() const;
154  double getUpper() const;
156  double getTValue() const;
158  double getRSquared() const;
160  double getStandDevRes() const;
162  double getMeanRes() const;
164  double getStandErrSlope() const;
166  double getChiSquared() const;
168  double getRSD() const;
169 
170 protected:
171 
173  double intercept_;
175  double slope_;
177  double x_intercept_;
179  double lower_;
181  double upper_;
183  double t_star_;
185  double r_squared_;
193  double chi_squared_;
195  double rsd_;
196 
197 
199  void computeGoodness_(const std::vector<Wm5::Vector2d>& points, double confidence_interval_P);
200 
202  template <typename Iterator>
203  double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept);
204 
206  template <typename Iterator>
207  double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept);
208 
209 private:
210 
214  LinearRegression& operator=(const LinearRegression& arg);
215 
216  }; //class
217 
218 
219  namespace
220  {
221  //given x compute y = slope * x + intercept
222  double computePointY(double x, double slope, double intercept)
223  {
224  return slope * x + intercept;
225  }
226 
227  } //namespace
228 
229  //x, y, w must be of same size
230  template <typename Iterator>
231  double LinearRegression::computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
232  {
233  double chi_squared = 0.0;
234  Iterator xIter = x_begin;
235  Iterator yIter = y_begin;
236  for (; xIter != x_end; ++xIter, ++yIter)
237  {
238  chi_squared += std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
239  }
240 
241  return chi_squared;
242  }
243 
244  //x, y, w must be of same size
245  template <typename Iterator>
246  double LinearRegression::computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
247  {
248  double chi_squared = 0.0;
249  Iterator xIter = x_begin;
250  Iterator yIter = y_begin;
251  Iterator wIter = w_begin;
252  for (; xIter != x_end; ++xIter, ++yIter, ++wIter)
253  {
254  chi_squared += *wIter * std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
255  }
256 
257  return chi_squared;
258  }
259 
260  template <typename Iterator>
261  void LinearRegression::computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness)
262  {
263  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
264 
265  // Compute the unweighted linear fit.
266  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
267  // and the value of Chi squared (sum( (y - evel(x))^2)
268  bool pass = Wm5::HeightLineFit2<double>(static_cast<int>(points.size()), &points.front(), slope_, intercept_);
269  chi_squared_ = computeChiSquare(x_begin, x_end, y_begin, slope_, intercept_);
270 
271  if (pass)
272  {
273  if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);
274  }
275  else
276  {
277  throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
278  "UnableToFit-LinearRegression", String("Could not fit a linear model to the data (") + points.size() + " points).");
279  }
280  }
281 
282  template <typename Iterator>
283  void LinearRegression::computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness)
284  {
285  // Compute the weighted linear fit.
286  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
287  // and the value of Chi squared, the covariances of the intercept and the slope
288  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
289  // Compute sums for linear system. copy&paste from GeometricTools Wm5ApprLineFit2.cpp
290  // and modified to allow weights
291  int numPoints = static_cast<int>(points.size());
292  double sumX = 0, sumY = 0;
293  double sumXX = 0, sumXY = 0;
294  double sumW = 0;
295  Iterator wIter = w_begin;
296 
297  for (int i = 0; i < numPoints; ++i, ++wIter)
298  {
299  sumX += (*wIter) * points[i].X();
300  sumY += (*wIter) * points[i].Y();
301  sumXX += (*wIter) * points[i].X() * points[i].X();
302  sumXY += (*wIter) * points[i].X() * points[i].Y();
303  sumW += (*wIter);
304  }
305  //create matrices to solve Ax = B
306  double A[2][2] =
307  {
308  {sumXX, sumX},
309  {sumX, sumW}
310  };
311  double B[2] =
312  {
313  sumXY,
314  sumY
315  };
316  double X[2];
317 
318  bool nonsingular = Wm5::LinearSystem<double>().Solve2(A, B, X);
319  if (nonsingular)
320  {
321  slope_ = X[0];
322  intercept_ = X[1];
323  }
324  chi_squared_ = computeWeightedChiSquare(x_begin, x_end, y_begin, w_begin, slope_, intercept_);
325 
326  if (nonsingular)
327  {
328  if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);
329  }
330  else
331  {
332  throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
333  "UnableToFit-LinearRegression", "Could not fit a linear model to the data");
334  }
335  }
336 
337  } // namespace Math
338 } // namespace OpenMS
339 
340 
341 #endif
A more convenient string class.
Definition: String.h:57
virtual ~LinearRegression()
Destructor.
Definition: LinearRegression.h:92
std::vector< Wm5::Vector2d > iteratorRange2Wm5Vectors(Iterator x_begin, Iterator x_end, Iterator y_begin)
Copies the distance(x_begin,x_end) elements starting at x_begin and y_begin into the Wm5::Vector...
Definition: RegressionUtils.h:45
LinearRegression()
Constructor.
Definition: LinearRegression.h:75
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double rsd_
the relative standard deviation
Definition: LinearRegression.h:195
double upper_
The upper bound of the confidence interval.
Definition: LinearRegression.h:181
double chi_squared_
The value of the Chi Squared statistic.
Definition: LinearRegression.h:193
double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
Compute the chi squared of a linear fit.
Definition: LinearRegression.h:231
This class offers functions to perform least-squares fits to a straight line model, .
Definition: LinearRegression.h:70
double x_intercept_
The intercept of the fitted line with the x-axis.
Definition: LinearRegression.h:177
double t_star_
The value of the t-statistic.
Definition: LinearRegression.h:183
double stand_error_slope_
The standard error of the slope.
Definition: LinearRegression.h:191
double mean_residuals_
Mean of residuals.
Definition: LinearRegression.h:189
double slope_
The slope of the fitted line.
Definition: LinearRegression.h:175
void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients of the model for the dataset ...
Definition: LinearRegression.h:261
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:677
double stand_dev_residuals_
The standard deviation of the residuals.
Definition: LinearRegression.h:187
double lower_
The lower bound of the confidence interval.
Definition: LinearRegression.h:179
double r_squared_
The squared correlation coefficient (Pearson)
Definition: LinearRegression.h:185
double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
Compute the chi squared of a weighted linear fit.
Definition: LinearRegression.h:246
double intercept_
The intercept of the fitted line with the y-axis.
Definition: LinearRegression.h:173
void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients of the model for the weighted da...
Definition: LinearRegression.h:283

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:01 using doxygen 1.8.13