OpenMS  2.8.0
GridBasedClustering.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Lars Nilse $
32 // $Authors: Lars Nilse, Johannes Veit $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 #include <cmath>
45 #include <limits>
46 #include <map>
47 #include <set>
48 #include <queue>
49 #include <vector>
50 #include <algorithm>
51 #include <iostream>
52 #include <unordered_map>
53 
54 namespace OpenMS
55 {
56 
60  class OPENMS_DLLAPI MinimumDistance
61  {
62 public:
63 
67  MinimumDistance(const int& cluster_index, const int& nearest_neighbour_index, const double& distance);
68 
72  int getClusterIndex() const;
73 
78 
83  bool operator<(const MinimumDistance& other) const;
84  bool operator>(const MinimumDistance& other) const;
85  bool operator==(const MinimumDistance& other) const;
86 
87 private:
88 
91 
96 
101 
105  double distance_;
106 
107  };
108 
124  template <typename Metric>
126  public ProgressLogger
127  {
128 public:
132  typedef GridBasedCluster::Point Point; // DPosition<2>
133  typedef GridBasedCluster::Rectangle Rectangle; // DBoundingBox<2>
134  typedef ClusteringGrid::CellIndex CellIndex; // std::pair<int,int>
135  typedef std::multiset<MinimumDistance>::const_iterator MultisetIterator;
136  typedef std::unordered_multimap<int, MultisetIterator>::const_iterator NNIterator;
137 
148  GridBasedClustering(Metric metric, const std::vector<double>& data_x,
149  const std::vector<double>& data_y, const std::vector<int>& properties_A,
150  const std::vector<int>& properties_B, std::vector<double> grid_spacing_x,
151  std::vector<double> grid_spacing_y) :
152  metric_(metric),
153  grid_(grid_spacing_x, grid_spacing_y)
154  {
155  init_(data_x, data_y, properties_A, properties_B);
156  }
157 
166  GridBasedClustering(Metric metric, const std::vector<double>& data_x,
167  const std::vector<double>& data_y, std::vector<double> grid_spacing_x,
168  std::vector<double> grid_spacing_y) :
169  metric_(metric),
170  grid_(grid_spacing_x, grid_spacing_y)
171  {
172  // set properties A and B to -1, i.e. ignore properties when clustering
173  std::vector<int> properties_A(data_x.size(), -1);
174  std::vector<int> properties_B(data_x.size(), -1);
175  init_(data_x, data_y, properties_A, properties_B);
176  }
177 
182  void cluster()
183  {
184  // progress logger
185  // NOTE: for some reason, gcc7 chokes if we remove the OpenMS::String
186  // below, so lets just not change it.
187  Size clusters_start = clusters_.size();
188  startProgress(0, clusters_start, OpenMS::String("clustering"));
189 
190  MinimumDistance zero_distance(-1, -1, 0);
191 
192  // combine clusters until all have been moved to the final list
193  while (!clusters_.empty())
194  {
195  setProgress(clusters_start - clusters_.size());
196 
197  MultisetIterator smallest_distance_it = distances_.lower_bound(zero_distance);
198 
199  int cluster_index1 = smallest_distance_it->getClusterIndex();
200  int cluster_index2 = smallest_distance_it->getNearestNeighbourIndex();
201 
202  eraseMinDistance_(smallest_distance_it);
203 
204  // update cluster list
205  std::map<int, GridBasedCluster>::iterator cluster1_it = clusters_.find(cluster_index1);
206  std::map<int, GridBasedCluster>::iterator cluster2_it = clusters_.find(cluster_index2);
207  const GridBasedCluster& cluster1 = cluster1_it->second;
208  const GridBasedCluster& cluster2 = cluster2_it->second;
209  const std::vector<int>& points1 = cluster1.getPoints();
210  const std::vector<int>& points2 = cluster2.getPoints();
211  std::vector<int> new_points;
212  new_points.reserve(points1.size() + points2.size());
213  new_points.insert(new_points.end(), points1.begin(), points1.end());
214  new_points.insert(new_points.end(), points2.begin(), points2.end());
215 
216  double new_x = (cluster1.getCentre().getX() * points1.size() + cluster2.getCentre().getX() * points2.size()) / (points1.size() + points2.size());
217  double new_y = (cluster1.getCentre().getY() * points1.size() + cluster2.getCentre().getY() * points2.size()) / (points1.size() + points2.size());
218 
219  // update grid
220  CellIndex cell_for_cluster1 = grid_.getIndex(cluster1.getCentre());
221  CellIndex cell_for_cluster2 = grid_.getIndex(cluster2.getCentre());
222  CellIndex cell_for_new_cluster = grid_.getIndex(DPosition<2>(new_x, new_y));
223  grid_.removeCluster(cell_for_cluster1, cluster_index1);
224  grid_.removeCluster(cell_for_cluster2, cluster_index2);
225  grid_.addCluster(cell_for_new_cluster, cluster_index1);
226 
227  // merge clusters
228  const Rectangle& box1 = cluster1.getBoundingBox();
229  const Rectangle& box2 = cluster2.getBoundingBox();
230  Rectangle new_box(box1);
231  new_box.enlarge(box2.minPosition());
232  new_box.enlarge(box2.maxPosition());
233 
234  // Properties A of both clusters should by now be the same. The merge veto has been checked
235  // when a new entry to the minimum distance list was added, @see findNearestNeighbour_.
236  if (cluster1.getPropertyA() != cluster2.getPropertyA())
237  {
238  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Property A of both clusters not the same. ", "A");
239  }
240  int new_A = cluster1.getPropertyA();
241 
242  const std::vector<int>& B1 = cluster1.getPropertiesB();
243  const std::vector<int>& B2 = cluster2.getPropertiesB();
244  std::vector<int> new_B;
245  new_B.reserve(B1.size() + B2.size());
246  new_B.insert(new_B.end(), B1.begin(), B1.end());
247  new_B.insert(new_B.end(), B2.begin(), B2.end());
248 
249  GridBasedCluster new_cluster(DPosition<2>(new_x, new_y), new_box, new_points, new_A, new_B);
250 
251  clusters_.erase(cluster1_it);
252  clusters_.erase(cluster2_it);
253  clusters_.insert(std::make_pair(cluster_index1, new_cluster));
254 
255  std::set<int> clusters_to_be_updated;
256  clusters_to_be_updated.insert(cluster_index1);
257 
258  // erase distance object of cluster with cluster_index2 without updating (does not exist anymore!)
259  // (the one with cluster_index1 has already been erased at the top of the while loop)
261 
262  // find out which clusters need to be updated
263  std::pair<NNIterator, NNIterator> nn_range = reverse_nns_.equal_range(cluster_index1);
264  for (NNIterator nn_it = nn_range.first; nn_it != nn_range.second;)
265  {
266  clusters_to_be_updated.insert(nn_it->second->getClusterIndex());
267  eraseMinDistance_((nn_it++)->second);
268  }
269  nn_range = reverse_nns_.equal_range(cluster_index2);
270  for (NNIterator nn_it = nn_range.first; nn_it != nn_range.second;)
271  {
272  clusters_to_be_updated.insert(nn_it->second->getClusterIndex());
273  eraseMinDistance_((nn_it++)->second);
274  }
275 
276  // update clusters
277  for (std::set<int>::const_iterator cluster_index = clusters_to_be_updated.begin(); cluster_index != clusters_to_be_updated.end(); ++cluster_index)
278  {
279  std::map<int, GridBasedCluster>::iterator c_it = clusters_.find(*cluster_index);
280  const GridBasedCluster& c = c_it->second;
281  if (findNearestNeighbour_(c, *cluster_index))
282  {
283  grid_.removeCluster(grid_.getIndex(c.getCentre()), *cluster_index); // remove from grid
284  clusters_.erase(c_it); // remove from cluster list
285  }
286  }
287  }
288 
289  endProgress();
290  }
291 
297  {
298 
299  // construct new grid (grid only in x-direction, single cell in y-direction)
300  std::vector<double> grid_spacing_x = grid_.getGridSpacingX();
301  std::vector<double> grid_spacing_y = grid_.getGridSpacingY();
302  std::vector<double> grid_spacing_y_new;
303  grid_spacing_y_new.push_back(grid_spacing_y.front());
304  grid_spacing_y_new.push_back(grid_spacing_y.back());
305  ClusteringGrid grid_x_only(grid_spacing_x, grid_spacing_y_new);
306 
307  // register final clusters on the new grid
308  for (std::map<int, GridBasedCluster>::const_iterator it = clusters_final_.begin(); it != clusters_final_.end(); ++it)
309  {
310  int cluster_index = it->first;
311  GridBasedCluster cluster = it->second;
312  grid_x_only.addCluster(grid_x_only.getIndex(cluster.getCentre()), cluster_index);
313  }
314 
315 
316  // scan through x on the grid
317  for (unsigned cell = 0; cell < grid_spacing_x.size(); ++cell)
318  {
319  CellIndex grid_index(cell, 1);
320  if (grid_x_only.isNonEmptyCell(grid_index))
321  {
322  std::list<int> cluster_indices = grid_x_only.getClusters(grid_index); // indices of clusters in this x-range
323  if (cluster_indices.size() > 1)
324  {
325  // First, order the clusters in ascending y.
326  std::list<GridBasedCluster> cluster_list; // list to order clusters in y
327  std::map<GridBasedCluster, int> index_list; // allows us to keep track of cluster indices after sorting
328  for (std::list<int>::const_iterator it = cluster_indices.begin(); it != cluster_indices.end(); ++it)
329  {
330  cluster_list.push_back(clusters_final_.find(*it)->second);
331  index_list.insert(std::make_pair(clusters_final_.find(*it)->second, *it));
332  }
333  cluster_list.sort();
334 
335  // Now check if two adjacent clusters c1 and c2 can be merged.
336  std::list<GridBasedCluster>::const_iterator c1 = cluster_list.begin();
337  std::list<GridBasedCluster>::const_iterator c2 = cluster_list.begin();
338  ++c1;
339  while (c1 != cluster_list.end())
340  {
341  double centre1x = (*c1).getCentre().getX();
342  double centre1y = (*c1).getCentre().getY();
343  double centre2x = (*c2).getCentre().getX();
344  double centre2y = (*c2).getCentre().getY();
345 
346  double box1x_min = (*c1).getBoundingBox().minX();
347  double box1x_max = (*c1).getBoundingBox().maxX();
348  double box1y_min = (*c1).getBoundingBox().minY();
349  double box1y_max = (*c1).getBoundingBox().maxY();
350  double box2x_min = (*c2).getBoundingBox().minX();
351  double box2x_max = (*c2).getBoundingBox().maxX();
352  double box2y_min = (*c2).getBoundingBox().minY();
353  double box2y_max = (*c2).getBoundingBox().maxY();
354 
355  //double y_range1 = box1y_max - box1y_min;
356  //double y_range2 = box2y_max - box2y_min;
357  //double y_gap = box1y_min - box2y_max;
358 
359  // Is there an overlap of the two clusters in x?
360  bool overlap = (box1x_min <= box2x_max && box1x_min >= box2x_min) || (box1x_max >= box2x_min && box1x_max <= box2x_max);
361 
362  // Is the x-centre of one cluster in the x-range of the other?
363  //bool centre_in_range1 = (box2x_min <= centre1x && centre1x <= box2x_max);
364  //bool centre_in_range2 = (box1x_min <= centre2x && centre2x <= box1x_max);
365 
366  // Is the y-gap between the two clusters smaller than 1/s of their average y-range?
367  //double s = 6; // scaling factor
368  //bool clusters_close = (y_gap * s <= (y_range1 - y_range2)/2);
369 
370  // Shall we merge the two adjacent clusters?
371  //if ((centre_in_range1 || centre_in_range2) && clusters_close)
372  if (overlap)
373  {
374  std::vector<int> points1 = (*c1).getPoints();
375  std::vector<int> points2 = (*c2).getPoints();
376  std::vector<int> new_points;
377  new_points.reserve(points1.size() + points2.size());
378  new_points.insert(new_points.end(), points1.begin(), points1.end());
379  new_points.insert(new_points.end(), points2.begin(), points2.end());
380 
381  double new_x = (centre1x * points1.size() + centre2x * points2.size()) / (points1.size() + points2.size());
382  double new_y = (centre1y * points1.size() + centre2y * points2.size()) / (points1.size() + points2.size());
383 
384  double min_x = std::min(box1x_min, box2x_min);
385  double min_y = std::min(box1y_min, box2y_min);
386  double max_x = std::max(box1x_max, box2x_max);
387  double max_y = std::max(box1y_max, box2y_max);
388 
389  Point new_centre(new_x, new_y);
390  Point position_min(min_x, min_y);
391  Point position_max(max_x, max_y);
392  Rectangle new_bounding_box(position_min, position_max);
393 
394  GridBasedCluster new_cluster(new_centre, new_bounding_box, new_points);
395 
396  // update final cluster list
397  clusters_final_.erase(clusters_final_.find(index_list.find(*c1)->second));
398  clusters_final_.erase(clusters_final_.find(index_list.find(*c2)->second));
399  clusters_final_.insert(std::make_pair(index_list.find(*c1)->second, new_cluster));
400 
401  // update grid
402  CellIndex cell_for_cluster1 = grid_x_only.getIndex((*c1).getCentre());
403  CellIndex cell_for_cluster2 = grid_x_only.getIndex((*c2).getCentre());
404  CellIndex cell_for_new_cluster = grid_x_only.getIndex(new_centre);
405 
406  grid_x_only.removeCluster(cell_for_cluster1, index_list.find(*c1)->second);
407  grid_x_only.removeCluster(cell_for_cluster2, index_list.find(*c2)->second);
408  grid_x_only.addCluster(cell_for_new_cluster, index_list.find(*c1)->second);
409  }
410  ++c1;
411  ++c2;
412  }
413  }
414  }
415  }
416 
417  }
418 
423  void removeSmallClustersY(double threshold_y)
424  {
425  std::map<int, GridBasedCluster>::iterator it = clusters_final_.begin();
426  while (it != clusters_final_.end())
427  {
428  Rectangle box = it->second.getBoundingBox();
429  if (box.maxY() - box.minY() < threshold_y)
430  {
431  clusters_final_.erase(it++);
432  }
433  else
434  {
435  ++it;
436  }
437  }
438  }
439 
443  std::map<int, GridBasedCluster> getResults() const
444  {
445  return clusters_final_;
446  }
447 
448 private:
449 
453  Metric metric_;
454 
460 
465  std::map<int, GridBasedCluster> clusters_;
466 
471  std::map<int, GridBasedCluster> clusters_final_;
472 
477  std::multiset<MinimumDistance> distances_;
478 
483  std::unordered_multimap<int, std::multiset<MinimumDistance>::const_iterator> reverse_nns_;
484 
489  std::unordered_map<int, std::multiset<MinimumDistance>::const_iterator> distance_it_for_cluster_idx_;
490 
499  void init_(const std::vector<double>& data_x, const std::vector<double>& data_y,
500  const std::vector<int>& properties_A, const std::vector<int>& properties_B)
501  {
502  // fill the grid with points to be clustered (initially each cluster contains a single point)
503  for (unsigned i = 0; i < data_x.size(); ++i)
504  {
505  Point position(data_x[i], data_y[i]);
506  Rectangle box(position, position);
507 
508  std::vector<int> pi; // point indices
509  pi.push_back(i);
510  std::vector<int> pb; // properties B
511  pb.push_back(properties_B[i]);
512 
513  // add to cluster list
514  GridBasedCluster cluster(position, box, pi, properties_A[i], pb);
515  clusters_.insert(std::make_pair(i, cluster));
516 
517  // register on grid
518  grid_.addCluster(grid_.getIndex(position), i);
519  }
520 
521  // fill list of minimum distances
522  std::map<int, GridBasedCluster>::iterator iterator = clusters_.begin();
523  while (iterator != clusters_.end())
524  {
525  int cluster_index = iterator->first;
526  const GridBasedCluster& cluster = iterator->second;
527 
528  if (findNearestNeighbour_(cluster, cluster_index))
529  {
530  // remove from grid
531  grid_.removeCluster(grid_.getIndex(cluster.getCentre()), cluster_index);
532  // remove from cluster list
533  clusters_.erase(iterator++);
534  }
535  else
536  {
537  ++iterator;
538  }
539  }
540  }
541 
555  bool mergeVeto_(const GridBasedCluster& c1, const GridBasedCluster& c2) const
556  {
557  int A1 = c1.getPropertyA();
558  int A2 = c2.getPropertyA();
559 
560  // check if properties A of both clusters is set or not (not set := -1)
561  if (A1 == -1 || A2 == -1)
562  {
563  return false;
564  }
565 
566  // Will the merged cluster have the same properties A?
567  if (A1 != A2) return true;
568 
569  std::vector<int> B1 = c1.getPropertiesB();
570  std::vector<int> B2 = c2.getPropertiesB();
571 
572  // check if properties B of both clusters is set or not (not set := -1)
573  if (std::find(B1.begin(), B1.end(), -1) != B1.end() || std::find(B2.begin(), B2.end(), -1) != B2.end())
574  {
575  return false;
576  }
577 
578  // Will the merged cluster have different properties B?
579  // (Hence the intersection of properties B of cluster 1 and cluster 2 should be empty.)
580  std::vector<int> B_intersection;
581  sort(B1.begin(), B1.end());
582  sort(B2.begin(), B2.end());
583  set_intersection(B1.begin(), B1.end(), B2.begin(), B2.end(), back_inserter(B_intersection));
584 
585  return !B_intersection.empty();
586  }
587 
601  bool findNearestNeighbour_(const GridBasedCluster& cluster, int cluster_index)
602  {
603  const Point& centre = cluster.getCentre();
604  const CellIndex& cell_index = grid_.getIndex(centre);
605  double min_dist = 0;
606  int nearest_neighbour = -1;
607 
608  // search in the grid cell and its 8 neighbouring cells for the nearest neighbouring cluster
609  for (int i = -1; i <= 1; ++i)
610  {
611  for (int j = -1; j <= 1; ++j)
612  {
613  CellIndex cell_index2(cell_index);
614  cell_index2.first += i;
615  cell_index2.second += j;
616  if (grid_.isNonEmptyCell(cell_index2))
617  {
618  const std::list<int>& cluster_indices = grid_.getClusters(cell_index2);
619  for (std::list<int>::const_iterator cluster_index2 = cluster_indices.begin(); cluster_index2 != cluster_indices.end(); ++cluster_index2)
620  {
621  if (*cluster_index2 != cluster_index)
622  {
623  const GridBasedCluster& cluster2 = clusters_.find(*cluster_index2)->second;
624  const Point& centre2 = cluster2.getCentre();
625  double distance = metric_(centre, centre2);
626 
627  if (distance < min_dist || nearest_neighbour == -1)
628  {
629  bool veto = mergeVeto_(cluster, cluster2); // If clusters cannot be merged anyhow, they are no nearest neighbours.
630  if (!veto)
631  {
632  min_dist = distance;
633  nearest_neighbour = *cluster_index2;
634  }
635  }
636  }
637  }
638  }
639  }
640  }
641 
642  if (nearest_neighbour == -1)
643  {
644  // no other cluster nearby, hence move the cluster to the final results
645  clusters_final_.insert(std::make_pair(cluster_index, clusters_.find(cluster_index)->second));
646  return true;
647  }
648 
649  // add to the list of minimal distances
650  std::multiset<MinimumDistance>::const_iterator it = distances_.insert(MinimumDistance(cluster_index, nearest_neighbour, min_dist));
651  // add to reverse nearest neighbor lookup table
652  reverse_nns_.insert(std::make_pair(nearest_neighbour, it));
653  // add to cluster index -> distance lookup table
654  distance_it_for_cluster_idx_[cluster_index] = it;
655 
656  return false;
657  }
658 
668  void eraseMinDistance_(const std::multiset<MinimumDistance>::const_iterator it)
669  {
670  // remove corresponding entries from nearest neighbor lookup table
671  std::pair<NNIterator, NNIterator> nn_range = reverse_nns_.equal_range(it->getNearestNeighbourIndex());
672  for (NNIterator nn_it = nn_range.first; nn_it != nn_range.second; ++nn_it)
673  {
674  if (nn_it->second == it)
675  {
676  reverse_nns_.erase(nn_it);
677  break;
678  }
679  }
680 
681  // remove corresponding entry from cluster index -> distance lookup table
682  distance_it_for_cluster_idx_.erase(it->getClusterIndex());
683 
684  // remove from distances_
685  distances_.erase(it);
686  }
687  };
688 }
data structure to store 2D data to be clustered e.g. (m/z, retention time) coordinates from multiplex...
Definition: ClusteringGrid.h:57
std::vector< double > getGridSpacingX() const
returns grid spacing in x direction
CellIndex getIndex(const Point &position) const
returns grid cell index (i,j) for the positions (x,y)
void removeCluster(const CellIndex &cell_index, const int &cluster_index)
removes a cluster from this grid cell and removes the cell if no other cluster left
std::list< int > getClusters(const CellIndex &cell_index) const
returns clusters in this grid cell
std::pair< int, int > CellIndex
Definition: ClusteringGrid.h:62
std::vector< double > getGridSpacingY() const
returns grid spacing in y direction
bool isNonEmptyCell(const CellIndex &cell_index) const
checks if there are clusters at this cell index
void addCluster(const CellIndex &cell_index, const int &cluster_index)
adds a cluster to this grid cell
void enlarge(const PositionType &p)
Enlarges the bounding box such that it contains a position.
Definition: DBoundingBox.h:121
CoordinateType getY() const
Name accessor for the second dimension. Only for DPosition<2>, for visualization.
Definition: DPosition.h:164
CoordinateType getX() const
Name accessor for the first dimension. Only for DPosition<2>, for visualization.
Definition: DPosition.h:157
Invalid value exception.
Definition: Exception.h:329
basic data structure for clustering
Definition: GridBasedCluster.h:50
const Point & getCentre() const
returns cluster centre
const std::vector< int > & getPoints() const
returns indices of points in cluster
const std::vector< int > & getPropertiesB() const
returns properties B of all points
const Rectangle & getBoundingBox() const
returns bounding box
int getPropertyA() const
returns property A
2D hierarchical clustering implementation optimized for large data sets containing many small cluster...
Definition: GridBasedClustering.h:127
void removeSmallClustersY(double threshold_y)
removes clusters with bounding box dimension in y-direction below certain threshold
Definition: GridBasedClustering.h:423
bool findNearestNeighbour_(const GridBasedCluster &cluster, int cluster_index)
determines the nearest neighbour for each cluster
Definition: GridBasedClustering.h:601
std::unordered_map< int, std::multiset< MinimumDistance >::const_iterator > distance_it_for_cluster_idx_
cluster index to distance iterator lookup table for finding out which clusters need to be updated fas...
Definition: GridBasedClustering.h:489
bool mergeVeto_(const GridBasedCluster &c1, const GridBasedCluster &c2) const
checks if two clusters can be merged Each point in a cluster can (optionally) have two properties A a...
Definition: GridBasedClustering.h:555
void init_(const std::vector< double > &data_x, const std::vector< double > &data_y, const std::vector< int > &properties_A, const std::vector< int > &properties_B)
initialises all data structures
Definition: GridBasedClustering.h:499
ClusteringGrid grid_
grid on which the position of the clusters are registered used in cluster method
Definition: GridBasedClustering.h:459
void extendClustersY()
extends clusters in y-direction if possible (merges clusters further in y-direction,...
Definition: GridBasedClustering.h:296
Metric metric_
metric for measuring the distance between points in the 2D plane
Definition: GridBasedClustering.h:453
GridBasedClustering(Metric metric, const std::vector< double > &data_x, const std::vector< double > &data_y, std::vector< double > grid_spacing_x, std::vector< double > grid_spacing_y)
initialises all data structures
Definition: GridBasedClustering.h:166
GridBasedCluster::Rectangle Rectangle
Definition: GridBasedClustering.h:133
std::multiset< MinimumDistance > distances_
list of minimum distances stores the smallest of the distances in the head
Definition: GridBasedClustering.h:477
GridBasedClustering(Metric metric, const std::vector< double > &data_x, const std::vector< double > &data_y, const std::vector< int > &properties_A, const std::vector< int > &properties_B, std::vector< double > grid_spacing_x, std::vector< double > grid_spacing_y)
initialises all data structures
Definition: GridBasedClustering.h:148
void cluster()
performs the hierarchical clustering (merges clusters until their dimension exceeds that of cell)
Definition: GridBasedClustering.h:182
GridBasedCluster::Point Point
cluster centre, cluster bounding box, grid index
Definition: GridBasedClustering.h:132
std::unordered_multimap< int, std::multiset< MinimumDistance >::const_iterator > reverse_nns_
reverse nearest neighbor lookup table for finding out which clusters need to be updated faster
Definition: GridBasedClustering.h:483
std::multiset< MinimumDistance >::const_iterator MultisetIterator
Definition: GridBasedClustering.h:135
std::map< int, GridBasedCluster > clusters_
list of clusters maps cluster indices to clusters
Definition: GridBasedClustering.h:465
std::unordered_multimap< int, MultisetIterator >::const_iterator NNIterator
Definition: GridBasedClustering.h:136
std::map< int, GridBasedCluster > clusters_final_
list of final clusters i.e. clusters that are no longer merged
Definition: GridBasedClustering.h:471
void eraseMinDistance_(const std::multiset< MinimumDistance >::const_iterator it)
remove minimum distance object and its related data
Definition: GridBasedClustering.h:668
ClusteringGrid::CellIndex CellIndex
Definition: GridBasedClustering.h:134
std::map< int, GridBasedCluster > getResults() const
returns final results (mapping of cluster indices to clusters)
Definition: GridBasedClustering.h:443
PositionType const & maxPosition() const
Accessor to maximum position.
Definition: DIntervalBase.h:130
CoordinateType maxY() const
Accessor for max_ coordinate maximum.
Definition: DIntervalBase.h:261
CoordinateType minY() const
Accessor for max_ coordinate minimum.
Definition: DIntervalBase.h:249
PositionType const & minPosition() const
Accessor to minimum position.
Definition: DIntervalBase.h:124
basic data structure for distances between clusters
Definition: GridBasedClustering.h:61
MinimumDistance()
hide default constructor
bool operator>(const MinimumDistance &other) const
int getClusterIndex() const
returns cluster index
MinimumDistance(const int &cluster_index, const int &nearest_neighbour_index, const double &distance)
constructor
int nearest_neighbour_index_
index of the nearest neighbour of the above cluster
Definition: GridBasedClustering.h:100
int getNearestNeighbourIndex() const
returns index of nearest cluster
double distance_
distance between cluster and its nearest neighbour
Definition: GridBasedClustering.h:105
bool operator==(const MinimumDistance &other) const
int cluster_index_
index in the cluster list
Definition: GridBasedClustering.h:95
bool operator<(const MinimumDistance &other) const
operators for comparisons (for multiset)
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double c
Definition: Constants.h:209
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47