OpenMS  2.5.0
RANSAC.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: George Rosenberger $
32 // $Authors: George Rosenberger, Hannes Roest, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
38 
40 
44 
45 #include <algorithm> // std::random_shuffle
46 #include <limits> // std::numeric_limits
47 #include <vector> // std::vector
48 #include <sstream> // stringstream
49 
50 namespace OpenMS
51 {
52 
53  namespace Math
54  {
58  struct RANSACParam
59  {
62  : n(0), k(0), t(0), d(0), relative_d(false), rng(nullptr)
63  {
64  }
66  RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false, int (*p_rng)(int) = nullptr)
67  : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d), rng(p_rng)
68  {
69  if (relative_d)
70  {
71  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
72  }
73  }
74 
75  std::string toString() const
76  {
77  std::stringstream r;
78  r << "RANSAC param:\n n: " << n << "\n k: " << k << " iterations\n t: " << t << " threshold\n d: " << d << " inliers\n\n";
79  return r.str();
80  }
81 
82  size_t n;
83  size_t k;
84  double t;
85  size_t d;
86  bool relative_d;
87  int (*rng)(int);
88  };
89 
95  template<typename TModelType = RansacModelLinear>
96  class RANSAC
97  {
98 public:
99 
101  static std::vector<std::pair<double, double> > ransac(
102  const std::vector<std::pair<double, double> >& pairs,
103  const RANSACParam& p)
104  {
105  return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d, p.rng);
106  }
107 
139  static std::vector<std::pair<double, double> > ransac(
140  const std::vector<std::pair<double, double> >& pairs,
141  size_t n,
142  size_t k,
143  double t,
144  size_t d,
145  bool relative_d = false,
146  int (*rng)(int) = nullptr)
147  {
148  // translate relative percentages into actual numbers
149  if (relative_d)
150  {
151  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
152  d = pairs.size() * d / 100;
153  }
154 
155  // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.
156 
157  if (pairs.size() <= n)
158  {
159  throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
160  String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");
161  }
162 
163  TModelType model;
164 
165  std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;
166  std::vector<std::pair<double, double> > pairs_shuffled = pairs; // mutable data. will be shuffled in every iteration
167  double besterror = std::numeric_limits<double>::max();
168  typename TModelType::ModelParameters coeff;
169  #ifdef DEBUG_RANSAC
170  std::pair<double, double > bestcoeff;
171  double betterrsq = 0;
172  double bestrsq = 0;
173  #endif
174 
175  for (size_t ransac_int=0; ransac_int<k; ransac_int++)
176  {
177  // check if the model already includes all points
178  if (bestdata.size() == pairs.size()) break;
179 
180  if (rng != nullptr)
181  { // use portable RNG in test mode
182  std::random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end(), rng);
183  } else {
184  std::random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());
185  }
186 
187  // test 'maybeinliers'
188  try
189  { // fitting might throw UnableToFit if points are 'unfortunate'
190  coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);
191  }
192  catch (...)
193  {
194  continue;
195  }
196  // apply model to remaining data; pick inliers
197  alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);
198  // ... and add data
199  if (alsoinliers.size() > d
200  || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)
201  {
202  betterdata.clear();
203  std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );
204  betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );
205  typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());
206  double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);
207  #ifdef DEBUG_RANSAC
208  betterrsq = model.rm_rsq(betterdata);
209  #endif
210 
211  // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);
212  // If the number of points is equal, we trust rss.
213  // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good
214  // these points fit, since its a simple absolute SUM() of residual error over all points.
215  if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))
216  {
217  besterror = bettererror;
218  bestdata = betterdata;
219  #ifdef DEBUG_RANSAC
220  bestcoeff = bettercoeff;
221  bestrsq = betterrsq;
222  std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;
223  #endif
224  }
225  }
226  }
227 
228  #ifdef DEBUG_RANSAC
229  std::cout << "=======STARTPOINTS=======" << std::endl;
230  for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)
231  {
232  std::cout << it->first << "\t" << it->second << std::endl;
233  }
234  std::cout << "=======ENDPOINTS=======" << std::endl;
235  #endif
236 
237  return(bestdata);
238  } // ransac()
239 
240  }; // class
241 
242  } // namespace Math
243 
244 
245 } // namespace OpenMS
OpenMS::Math::RANSAC
This class provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:96
RANSACModelLinear.h
OpenMS::Constants::k
const double k
OpenMS::Math::RANSACParam
A simple struct to carry all the parameters required for a RANSAC run.
Definition: RANSAC.h:58
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::Exception::Precondition
Precondition failed exception.
Definition: Exception.h:166
OpenMS::Math::RANSAC::ransac
static std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition: RANSAC.h:101
OpenMS::Math::RANSACParam::k
size_t k
iterations: The maximum number of iterations allowed in the algorithm
Definition: RANSAC.h:83
OpenMS::Math::RANSACParam::relative_d
bool relative_d
Should 'd' be interpreted as percentages (0-100) of data input size.
Definition: RANSAC.h:86
OpenMS::Math::RANSACParam::rng
int(* rng)(int)
Optional RNG function (useful for testing with fixed seeds)
Definition: RANSAC.h:87
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::Math::RANSACParam::toString
std::string toString() const
Definition: RANSAC.h:75
Exception.h
OpenMS::Math::RANSACParam::n
size_t n
data points: The minimum number of data points required to fit the model
Definition: RANSAC.h:82
int
OpenMS::Math::RANSACParam::RANSACParam
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false, int(*p_rng)(int)=nullptr)
Full constructor.
Definition: RANSAC.h:66
OpenMS::Math::RANSAC::ransac
static std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false, int(*rng)(int)=nullptr)
This function provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:139
String.h
RANSACModel.h
OpenMS::Math::RANSACParam::RANSACParam
RANSACParam()
Default constructor.
Definition: RANSAC.h:61
OpenMS::Math::RANSACParam::d
size_t d
The number of close data values (according to 't') required to assert that a model fits well to data.
Definition: RANSAC.h:85
OpenMS::Math::RANSACParam::t
double t
Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared d...
Definition: RANSAC.h:84