OpenMS
RANSAC.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: George Rosenberger $
32 // $Authors: George Rosenberger, Hannes Roest, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/config.h>
38 
40 
45 
46 #include <limits> // std::numeric_limits
47 #include <vector> // std::vector
48 #include <sstream> // stringstream
49 
50 namespace OpenMS
51 {
52 
53  namespace Math
54  {
58  struct RANSACParam
59  {
62  : n(0), k(0), t(0), d(0), relative_d(false)
63  {
64  }
66  RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false)
67  : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d)
68  {
69  if (relative_d)
70  {
71  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
72  }
73  }
74 
75  [[nodiscard]] std::string toString() const
76  {
77  std::stringstream r;
78  r << "RANSAC param:\n n: " << n << "\n k: " << k << " iterations\n t: " << t << " threshold\n d: " << d << " inliers\n\n";
79  return r.str();
80  }
81 
82  size_t n;
83  size_t k;
84  double t;
85  size_t d;
86  bool relative_d;
87  };
88 
94  template<typename TModelType = RansacModelLinear>
95  class RANSAC
96  {
97 public:
98 
99  explicit RANSAC(uint64_t seed = time(nullptr)):
100  shuffler_(seed)
101  {}
102 
103  ~RANSAC() = default;
104 
105 
107  void setSeed(uint64_t seed)
108  {
109  shuffler_.seed(seed);
110  }
111 
113  std::vector<std::pair<double, double> > ransac(
114  const std::vector<std::pair<double, double> >& pairs,
115  const RANSACParam& p)
116  {
117  return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d);
118  }
119 
150  std::vector<std::pair<double, double> > ransac(
151  const std::vector<std::pair<double, double> >& pairs,
152  size_t n,
153  size_t k,
154  double t,
155  size_t d,
156  bool relative_d = false)
157  {
158  // translate relative percentages into actual numbers
159  if (relative_d)
160  {
161  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
162  d = pairs.size() * d / 100;
163  }
164 
165  // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.
166 
167  if (pairs.size() <= n)
168  {
169  throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
170  String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");
171  }
172 
173  TModelType model;
174 
175  std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;
176  std::vector<std::pair<double, double> > pairs_shuffled = pairs; // mutable data. will be shuffled in every iteration
177  double besterror = std::numeric_limits<double>::max();
178  typename TModelType::ModelParameters coeff;
179  #ifdef DEBUG_RANSAC
180  std::pair<double, double > bestcoeff;
181  double betterrsq = 0;
182  double bestrsq = 0;
183  #endif
184 
185  for (size_t ransac_int=0; ransac_int<k; ransac_int++)
186  {
187  // check if the model already includes all points
188  if (bestdata.size() == pairs.size()) break;
189 
190  // use portable RNG in test mode
191  shuffler_.portable_random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());
192 
193  // test 'maybeinliers'
194  try
195  { // fitting might throw UnableToFit if points are 'unfortunate'
196  coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);
197  }
198  catch (...)
199  {
200  continue;
201  }
202  // apply model to remaining data; pick inliers
203  alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);
204  // ... and add data
205  if (alsoinliers.size() > d
206  || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)
207  {
208  betterdata.clear();
209  std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );
210  betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );
211  typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());
212  double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);
213  #ifdef DEBUG_RANSAC
214  betterrsq = model.rm_rsq(betterdata);
215  #endif
216 
217  // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);
218  // If the number of points is equal, we trust rss.
219  // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good
220  // these points fit, since its a simple absolute SUM() of residual error over all points.
221  if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))
222  {
223  besterror = bettererror;
224  bestdata = betterdata;
225  #ifdef DEBUG_RANSAC
226  bestcoeff = bettercoeff;
227  bestrsq = betterrsq;
228  std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;
229  #endif
230  }
231  }
232  }
233 
234  #ifdef DEBUG_RANSAC
235  std::cout << "=======STARTPOINTS=======" << std::endl;
236  for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)
237  {
238  std::cout << it->first << "\t" << it->second << std::endl;
239  }
240  std::cout << "=======ENDPOINTS=======" << std::endl;
241  #endif
242 
243  return(bestdata);
244  } // ransac()
245 
246  private:
248  }; // class
249 
250  } // namespace Math
251 
252 
253 } // namespace OpenMS
Precondition failed exception.
Definition: Exception.h:159
This class provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:96
void setSeed(uint64_t seed)
set seed for random shuffle
Definition: RANSAC.h:107
Math::RandomShuffler shuffler_
Definition: RANSAC.h:247
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false)
This function provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:150
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition: RANSAC.h:113
RANSAC(uint64_t seed=time(nullptr))
Definition: RANSAC.h:99
Definition: MathFunctions.h:408
void seed(uint64_t val)
Definition: MathFunctions.h:432
void portable_random_shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition: MathFunctions.h:423
A more convenient string class.
Definition: String.h:60
const double k
Definition: Constants.h:158
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
A simple struct to carry all the parameters required for a RANSAC run.
Definition: RANSAC.h:59
std::string toString() const
Definition: RANSAC.h:75
size_t d
The number of close data values (according to 't') required to assert that a model fits well to data.
Definition: RANSAC.h:85
size_t n
data points: The minimum number of data points required to fit the model
Definition: RANSAC.h:82
double t
Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared d...
Definition: RANSAC.h:84
size_t k
iterations: The maximum number of iterations allowed in the algorithm
Definition: RANSAC.h:83
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false)
Full constructor.
Definition: RANSAC.h:66
bool relative_d
Should 'd' be interpreted as percentages (0-100) of data input size.
Definition: RANSAC.h:86
RANSACParam()
Default constructor.
Definition: RANSAC.h:61