Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
RANSAC.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: George Rosenberger $
32 // $Authors: George Rosenberger, Hannes Roest, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_MATH_MISC_RANSAC_H
36 #define OPENMS_MATH_MISC_RANSAC_H
37 
38 #include <OpenMS/config.h>
39 
41 
45 
46 #include <algorithm> // std::random_shuffle
47 #include <limits> // std::numeric_limits
48 #include <vector> // std::vector
49 #include <sstream> // stringstream
50 
51 namespace OpenMS
52 {
53 
54  namespace Math
55  {
59  struct RANSACParam
60  {
63  : n(0), k(0), t(0), d(0), relative_d(false), rng(NULL)
64  {
65  }
67  RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false, int (*p_rng)(int) = NULL)
68  : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d), rng(p_rng)
69  {
70  if (relative_d)
71  {
72  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
73  }
74  }
75 
76  std::string toString() const
77  {
78  std::stringstream r;
79  r << "RANSAC param:\n n: " << n << "\n k: " << k << " iterations\n t: " << t << " threshold\n d: " << d << " inliers\n\n";
80  return r.str();
81  }
82 
83  size_t n; //< data points: The minimum number of data points required to fit the model
84  size_t k; //< iterations: The maximum number of iterations allowed in the algorithm
85  double t; //< Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared deviation in units of the _second_ dimension (dim2).
86  size_t d; //< The number of close data values (according to 't') required to assert that a model fits well to data
87  bool relative_d; //< Should 'd' be interpreted as percentages (0-100) of data input size.
88  int (*rng)(int); //< Optional RNG function (useful for testing with fixed seeds)
89  };
90 
96  template<typename TModelType = RansacModelLinear>
97  class RANSAC
98  {
99 public:
100 
102  static std::vector<std::pair<double, double> > ransac(
103  const std::vector<std::pair<double, double> >& pairs,
104  const RANSACParam& p)
105  {
106  return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d, p.rng);
107  }
108 
140  static std::vector<std::pair<double, double> > ransac(
141  const std::vector<std::pair<double, double> >& pairs,
142  size_t n,
143  size_t k,
144  double t,
145  size_t d,
146  bool relative_d = false,
147  int (*rng)(int) = NULL)
148  {
149  // translate relative percentages into actual numbers
150  if (relative_d)
151  {
152  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
153  d = pairs.size() * d / 100;
154  }
155 
156  // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.
157 
158  if (pairs.size() <= n)
159  {
160  throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
161  String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");
162  }
163 
164  TModelType model;
165 
166  std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;
167  std::vector<std::pair<double, double> > pairs_shuffled = pairs; // mutable data. will be shuffled in every iteration
168  double besterror = std::numeric_limits<double>::max();
169  typename TModelType::ModelParameters coeff;
170  #ifdef DEBUG_RANSAC
171  std::pair<double, double > bestcoeff;
172  double betterrsq = 0;
173  double bestrsq = 0;
174  #endif
175 
176  for (size_t ransac_int=0; ransac_int<k; ransac_int++)
177  {
178  // check if the model already includes all points
179  if (bestdata.size() == pairs.size()) break;
180 
181  if (rng != NULL)
182  { // use portable RNG in test mode
183  std::random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end(), rng);
184  } else {
185  std::random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());
186  }
187 
188  // test 'maybeinliers'
189  try
190  { // fitting might throw UnableToFit if points are 'unfortunate'
191  coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);
192  }
193  catch (...)
194  {
195  continue;
196  }
197  // apply model to remaining data; pick inliers
198  alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);
199  // ... and add data
200  if (alsoinliers.size() > d
201  || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)
202  {
203  betterdata.clear();
204  std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );
205  betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );
206  typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());
207  double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);
208  #ifdef DEBUG_RANSAC
209  betterrsq = model.rm_rsq(betterdata);
210  #endif
211 
212  // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);
213  // If the number of points is equal, we trust rss.
214  // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good
215  // these points fit, since its a simple absolute SUM() of residual error over all points.
216  if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))
217  {
218  besterror = bettererror;
219  bestdata = betterdata;
220  #ifdef DEBUG_RANSAC
221  bestcoeff = bettercoeff;
222  bestrsq = betterrsq;
223  std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;
224  #endif
225  }
226  }
227  }
228 
229  #ifdef DEBUG_RANSAC
230  std::cout << "=======STARTPOINTS=======" << std::endl;
231  for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)
232  {
233  std::cout << it->first << "\t" << it->second << std::endl;
234  }
235  std::cout << "=======ENDPOINTS=======" << std::endl;
236  #endif
237 
238  return(bestdata);
239  } // ransac()
240 
241  }; // class
242 
243  } // namespace Math
244 
245 
246 } // namespace OpenMS
247 #endif // OPENMS_MATH_MISC_RANSAC_H
A more convenient string class.
Definition: String.h:57
bool relative_d
Definition: RANSAC.h:87
static std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition: RANSAC.h:102
static std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false, int(*rng)(int)=NULL)
This function provides a generic implementation of the RANSAC outlier detection algorithm. Is implemented and tested after the SciPy reference: http://wiki.scipy.org/Cookbook/RANSAC.
Definition: RANSAC.h:140
size_t d
Definition: RANSAC.h:86
size_t k
Definition: RANSAC.h:84
A simple struct to carry all the parameters required for a RANSAC run.
Definition: RANSAC.h:59
std::string toString() const
Definition: RANSAC.h:76
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Precondition failed exception.
Definition: Exception.h:167
int(* rng)(int)
Definition: RANSAC.h:88
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false, int(*p_rng)(int)=NULL)
Full constructor.
Definition: RANSAC.h:67
RANSACParam()
Default constructor.
Definition: RANSAC.h:62
double t
Definition: RANSAC.h:85
size_t n
Definition: RANSAC.h:83
This class provides a generic implementation of the RANSAC outlier detection algorithm. Is implemented and tested after the SciPy reference: http://wiki.scipy.org/Cookbook/RANSAC.
Definition: RANSAC.h:97

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:03 using doxygen 1.8.13