Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
GridBasedClustering.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Lars Nilse $
32 // $Authors: Lars Nilse, Johannes Veit $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_COMPARISON_CLUSTERING_GRIDBASEDCLUSTERING_H
36 #define OPENMS_COMPARISON_CLUSTERING_GRIDBASEDCLUSTERING_H
37 
44 
45 #include <cmath>
46 #include <limits>
47 #include <map>
48 #include <set>
49 #include <queue>
50 #include <vector>
51 #include <algorithm>
52 #include <iostream>
53 #include <boost/unordered_map.hpp>
54 
55 namespace OpenMS
56 {
57 
61  class OPENMS_DLLAPI MinimumDistance
62  {
63 public:
64 
68  MinimumDistance(const int& cluster_index, const int& nearest_neighbour_index, const double& distance);
69 
73  int getClusterIndex() const;
74 
78  int getNearestNeighbourIndex() const;
79 
84  bool operator<(const MinimumDistance& other) const;
85  bool operator>(const MinimumDistance& other) const;
86  bool operator==(const MinimumDistance& other) const;
87 
88 private:
89 
92 
97 
102 
106  double distance_;
107 
108  };
109 
125  template <typename Metric>
127  public ProgressLogger
128  {
129 public:
133  typedef GridBasedCluster::Point Point; // DPosition<2>
134  typedef GridBasedCluster::Rectangle Rectangle; // DBoundingBox<2>
135  typedef ClusteringGrid::CellIndex CellIndex; // std::pair<int,int>
136  typedef std::multiset<MinimumDistance>::const_iterator MultisetIterator;
137  typedef boost::unordered::unordered_multimap<int, MultisetIterator>::const_iterator NNIterator;
138 
149  GridBasedClustering(Metric metric, const std::vector<double>& data_x,
150  const std::vector<double>& data_y, const std::vector<int>& properties_A,
151  const std::vector<int>& properties_B, std::vector<double> grid_spacing_x,
152  std::vector<double> grid_spacing_y) :
153  metric_(metric),
154  grid_(grid_spacing_x, grid_spacing_y)
155  {
156  init_(data_x, data_y, properties_A, properties_B);
157  }
158 
167  GridBasedClustering(Metric metric, const std::vector<double>& data_x,
168  const std::vector<double>& data_y, std::vector<double> grid_spacing_x,
169  std::vector<double> grid_spacing_y) :
170  metric_(metric),
171  grid_(grid_spacing_x, grid_spacing_y)
172  {
173  // set properties A and B to -1, i.e. ignore properties when clustering
174  std::vector<int> properties_A(data_x.size(), -1);
175  std::vector<int> properties_B(data_x.size(), -1);
176  init_(data_x, data_y, properties_A, properties_B);
177  }
178 
183  void cluster()
184  {
185  // progress logger
186  // NOTE: for some reason, gcc7 chokes if we remove the OpenMS::String
187  // below, so lets just not change it.
188  Size clusters_start = clusters_.size();
189  startProgress(0, clusters_start, OpenMS::String("clustering"));
190 
191  MinimumDistance zero_distance(-1, -1, 0);
192 
193  // combine clusters until all have been moved to the final list
194  while (clusters_.size() > 0)
195  {
196  setProgress(clusters_start - clusters_.size());
197 
198  MultisetIterator smallest_distance_it = distances_.lower_bound(zero_distance);
199 
200  int cluster_index1 = smallest_distance_it->getClusterIndex();
201  int cluster_index2 = smallest_distance_it->getNearestNeighbourIndex();
202 
203  eraseMinDistance_(smallest_distance_it);
204 
205  // update cluster list
206  std::map<int, GridBasedCluster>::iterator cluster1_it = clusters_.find(cluster_index1);
207  std::map<int, GridBasedCluster>::iterator cluster2_it = clusters_.find(cluster_index2);
208  const GridBasedCluster& cluster1 = cluster1_it->second;
209  const GridBasedCluster& cluster2 = cluster2_it->second;
210  const std::vector<int>& points1 = cluster1.getPoints();
211  const std::vector<int>& points2 = cluster2.getPoints();
212  std::vector<int> new_points;
213  new_points.reserve(points1.size() + points2.size());
214  new_points.insert(new_points.end(), points1.begin(), points1.end());
215  new_points.insert(new_points.end(), points2.begin(), points2.end());
216 
217  double new_x = (cluster1.getCentre().getX() * points1.size() + cluster2.getCentre().getX() * points2.size()) / (points1.size() + points2.size());
218  double new_y = (cluster1.getCentre().getY() * points1.size() + cluster2.getCentre().getY() * points2.size()) / (points1.size() + points2.size());
219 
220  // update grid
221  CellIndex cell_for_cluster1 = grid_.getIndex(cluster1.getCentre());
222  CellIndex cell_for_cluster2 = grid_.getIndex(cluster2.getCentre());
223  CellIndex cell_for_new_cluster = grid_.getIndex(DPosition<2>(new_x, new_y));
224  grid_.removeCluster(cell_for_cluster1, cluster_index1);
225  grid_.removeCluster(cell_for_cluster2, cluster_index2);
226  grid_.addCluster(cell_for_new_cluster, cluster_index1);
227 
228  // merge clusters
229  const Rectangle& box1 = cluster1.getBoundingBox();
230  const Rectangle& box2 = cluster2.getBoundingBox();
231  Rectangle new_box(box1);
232  new_box.enlarge(box2.minPosition());
233  new_box.enlarge(box2.maxPosition());
234 
235  // Properties A of both clusters should by now be the same. The merge veto has been checked
236  // when a new entry to the minimum distance list was added, @see findNearestNeighbour_.
237  if (cluster1.getPropertyA() != cluster2.getPropertyA())
238  {
239  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Property A of both clusters not the same. ", "A");
240  }
241  int new_A = cluster1.getPropertyA();
242 
243  const std::vector<int>& B1 = cluster1.getPropertiesB();
244  const std::vector<int>& B2 = cluster2.getPropertiesB();
245  std::vector<int> new_B;
246  new_B.reserve(B1.size() + B2.size());
247  new_B.insert(new_B.end(), B1.begin(), B1.end());
248  new_B.insert(new_B.end(), B2.begin(), B2.end());
249 
250  GridBasedCluster new_cluster(DPosition<2>(new_x, new_y), new_box, new_points, new_A, new_B);
251 
252  clusters_.erase(cluster1_it);
253  clusters_.erase(cluster2_it);
254  clusters_.insert(std::make_pair(cluster_index1, new_cluster));
255 
256  std::set<int> clusters_to_be_updated;
257  clusters_to_be_updated.insert(cluster_index1);
258 
259  // erase distance object of cluster with cluster_index2 without updating (does not exist anymore!)
260  // (the one with cluster_index1 has already been erased at the top of the while loop)
261  eraseMinDistance_(distance_it_for_cluster_idx_[cluster_index2]);
262 
263  // find out which clusters need to be updated
264  std::pair<NNIterator, NNIterator> nn_range = reverse_nns_.equal_range(cluster_index1);
265  for (NNIterator nn_it = nn_range.first; nn_it != nn_range.second;)
266  {
267  clusters_to_be_updated.insert(nn_it->second->getClusterIndex());
268  eraseMinDistance_((nn_it++)->second);
269  }
270  nn_range = reverse_nns_.equal_range(cluster_index2);
271  for (NNIterator nn_it = nn_range.first; nn_it != nn_range.second;)
272  {
273  clusters_to_be_updated.insert(nn_it->second->getClusterIndex());
274  eraseMinDistance_((nn_it++)->second);
275  }
276 
277  // update clusters
278  for (std::set<int>::const_iterator cluster_index = clusters_to_be_updated.begin(); cluster_index != clusters_to_be_updated.end(); ++cluster_index)
279  {
280  std::map<int, GridBasedCluster>::iterator c_it = clusters_.find(*cluster_index);
281  const GridBasedCluster& c = c_it->second;
282  if (findNearestNeighbour_(c, *cluster_index))
283  {
284  grid_.removeCluster(grid_.getIndex(c.getCentre()), *cluster_index); // remove from grid
285  clusters_.erase(c_it); // remove from cluster list
286  }
287  }
288  }
289 
290  endProgress();
291  }
292 
298  {
299 
300  // construct new grid (grid only in x-direction, single cell in y-direction)
301  std::vector<double> grid_spacing_x = grid_.getGridSpacingX();
302  std::vector<double> grid_spacing_y = grid_.getGridSpacingY();
303  std::vector<double> grid_spacing_y_new;
304  grid_spacing_y_new.push_back(grid_spacing_y.front());
305  grid_spacing_y_new.push_back(grid_spacing_y.back());
306  ClusteringGrid grid_x_only(grid_spacing_x, grid_spacing_y_new);
307 
308  // register final clusters on the new grid
309  for (std::map<int, GridBasedCluster>::const_iterator it = clusters_final_.begin(); it != clusters_final_.end(); ++it)
310  {
311  int cluster_index = it->first;
312  GridBasedCluster cluster = it->second;
313  grid_x_only.addCluster(grid_x_only.getIndex(cluster.getCentre()), cluster_index);
314  }
315 
316 
317  // scan through x on the grid
318  for (unsigned cell = 0; cell < grid_spacing_x.size(); ++cell)
319  {
320  CellIndex grid_index(cell, 1);
321  if (grid_x_only.isNonEmptyCell(grid_index))
322  {
323  std::list<int> cluster_indices = grid_x_only.getClusters(grid_index); // indices of clusters in this x-range
324  if (cluster_indices.size() > 1)
325  {
326  // First, order the clusters in ascending y.
327  std::list<GridBasedCluster> cluster_list; // list to order clusters in y
328  std::map<GridBasedCluster, int> index_list; // allows us to keep track of cluster indices after sorting
329  for (std::list<int>::const_iterator it = cluster_indices.begin(); it != cluster_indices.end(); ++it)
330  {
331  cluster_list.push_back(clusters_final_.find(*it)->second);
332  index_list.insert(std::make_pair(clusters_final_.find(*it)->second, *it));
333  }
334  cluster_list.sort();
335 
336  // Now check if two adjacent clusters c1 and c2 can be merged.
337  std::list<GridBasedCluster>::const_iterator c1 = cluster_list.begin();
338  std::list<GridBasedCluster>::const_iterator c2 = cluster_list.begin();
339  ++c1;
340  while (c1 != cluster_list.end())
341  {
342  double centre1x = (*c1).getCentre().getX();
343  double centre1y = (*c1).getCentre().getY();
344  double centre2x = (*c2).getCentre().getX();
345  double centre2y = (*c2).getCentre().getY();
346 
347  double box1x_min = (*c1).getBoundingBox().minX();
348  double box1x_max = (*c1).getBoundingBox().maxX();
349  double box1y_min = (*c1).getBoundingBox().minY();
350  double box1y_max = (*c1).getBoundingBox().maxY();
351  double box2x_min = (*c2).getBoundingBox().minX();
352  double box2x_max = (*c2).getBoundingBox().maxX();
353  double box2y_min = (*c2).getBoundingBox().minY();
354  double box2y_max = (*c2).getBoundingBox().maxY();
355 
356  //double y_range1 = box1y_max - box1y_min;
357  //double y_range2 = box2y_max - box2y_min;
358  //double y_gap = box1y_min - box2y_max;
359 
360  // Is there an overlap of the two clusters in x?
361  bool overlap = (box1x_min <= box2x_max && box1x_min >= box2x_min) || (box1x_max >= box2x_min && box1x_max <= box2x_max);
362 
363  // Is the x-centre of one cluster in the x-range of the other?
364  //bool centre_in_range1 = (box2x_min <= centre1x && centre1x <= box2x_max);
365  //bool centre_in_range2 = (box1x_min <= centre2x && centre2x <= box1x_max);
366 
367  // Is the y-gap between the two clusters smaller than 1/s of their average y-range?
368  //double s = 6; // scaling factor
369  //bool clusters_close = (y_gap * s <= (y_range1 - y_range2)/2);
370 
371  // Shall we merge the two adjacent clusters?
372  //if ((centre_in_range1 || centre_in_range2) && clusters_close)
373  if (overlap)
374  {
375  std::vector<int> points1 = (*c1).getPoints();
376  std::vector<int> points2 = (*c2).getPoints();
377  std::vector<int> new_points;
378  new_points.reserve(points1.size() + points2.size());
379  new_points.insert(new_points.end(), points1.begin(), points1.end());
380  new_points.insert(new_points.end(), points2.begin(), points2.end());
381 
382  double new_x = (centre1x * points1.size() + centre2x * points2.size()) / (points1.size() + points2.size());
383  double new_y = (centre1y * points1.size() + centre2y * points2.size()) / (points1.size() + points2.size());
384 
385  double min_x = std::min(box1x_min, box2x_min);
386  double min_y = std::min(box1y_min, box2y_min);
387  double max_x = std::max(box1x_max, box2x_max);
388  double max_y = std::max(box1y_max, box2y_max);
389 
390  Point new_centre(new_x, new_y);
391  Point position_min(min_x, min_y);
392  Point position_max(max_x, max_y);
393  Rectangle new_bounding_box(position_min, position_max);
394 
395  GridBasedCluster new_cluster(new_centre, new_bounding_box, new_points);
396 
397  // update final cluster list
398  clusters_final_.erase(clusters_final_.find(index_list.find(*c1)->second));
399  clusters_final_.erase(clusters_final_.find(index_list.find(*c2)->second));
400  clusters_final_.insert(std::make_pair(index_list.find(*c1)->second, new_cluster));
401 
402  // update grid
403  CellIndex cell_for_cluster1 = grid_x_only.getIndex((*c1).getCentre());
404  CellIndex cell_for_cluster2 = grid_x_only.getIndex((*c2).getCentre());
405  CellIndex cell_for_new_cluster = grid_x_only.getIndex(new_centre);
406 
407  grid_x_only.removeCluster(cell_for_cluster1, index_list.find(*c1)->second);
408  grid_x_only.removeCluster(cell_for_cluster2, index_list.find(*c2)->second);
409  grid_x_only.addCluster(cell_for_new_cluster, index_list.find(*c1)->second);
410  }
411  ++c1;
412  ++c2;
413  }
414  }
415  }
416  }
417 
418  }
419 
424  void removeSmallClustersY(double threshold_y)
425  {
426  std::map<int, GridBasedCluster>::iterator it = clusters_final_.begin();
427  while (it != clusters_final_.end())
428  {
429  Rectangle box = it->second.getBoundingBox();
430  if (box.maxY() - box.minY() < threshold_y)
431  {
432  clusters_final_.erase(it++);
433  }
434  else
435  {
436  ++it;
437  }
438  }
439  }
440 
444  std::map<int, GridBasedCluster> getResults() const
445  {
446  return clusters_final_;
447  }
448 
449 private:
450 
454  Metric metric_;
455 
461 
466  std::map<int, GridBasedCluster> clusters_;
467 
472  std::map<int, GridBasedCluster> clusters_final_;
473 
478  std::multiset<MinimumDistance> distances_;
479 
484  boost::unordered::unordered_multimap<int, std::multiset<MinimumDistance>::const_iterator> reverse_nns_;
485 
490  boost::unordered::unordered_map<int, std::multiset<MinimumDistance>::const_iterator> distance_it_for_cluster_idx_;
491 
500  void init_(const std::vector<double>& data_x, const std::vector<double>& data_y,
501  const std::vector<int>& properties_A, const std::vector<int>& properties_B)
502  {
503  // fill the grid with points to be clustered (initially each cluster contains a single point)
504  for (unsigned i = 0; i < data_x.size(); ++i)
505  {
506  Point position(data_x[i], data_y[i]);
507  Rectangle box(position, position);
508 
509  std::vector<int> pi; // point indices
510  pi.push_back(i);
511  std::vector<int> pb; // properties B
512  pb.push_back(properties_B[i]);
513 
514  // add to cluster list
515  GridBasedCluster cluster(position, box, pi, properties_A[i], pb);
516  clusters_.insert(std::make_pair(i, cluster));
517 
518  // register on grid
519  grid_.addCluster(grid_.getIndex(position), i);
520  }
521 
522  // fill list of minimum distances
523  std::map<int, GridBasedCluster>::iterator iterator = clusters_.begin();
524  while (iterator != clusters_.end())
525  {
526  int cluster_index = iterator->first;
527  const GridBasedCluster& cluster = iterator->second;
528 
529  if (findNearestNeighbour_(cluster, cluster_index))
530  {
531  // remove from grid
532  grid_.removeCluster(grid_.getIndex(cluster.getCentre()), cluster_index);
533  // remove from cluster list
534  clusters_.erase(iterator++);
535  }
536  else
537  {
538  ++iterator;
539  }
540  }
541  }
542 
556  bool mergeVeto_(const GridBasedCluster& c1, const GridBasedCluster& c2) const
557  {
558  int A1 = c1.getPropertyA();
559  int A2 = c2.getPropertyA();
560 
561  // check if properties A of both clusters is set or not (not set := -1)
562  if (A1 == -1 || A2 == -1)
563  {
564  return false;
565  }
566 
567  // Will the merged cluster have the same properties A?
568  if (A1 != A2) return true;
569 
570  std::vector<int> B1 = c1.getPropertiesB();
571  std::vector<int> B2 = c2.getPropertiesB();
572 
573  // check if properties B of both clusters is set or not (not set := -1)
574  if (std::find(B1.begin(), B1.end(), -1) != B1.end() || std::find(B2.begin(), B2.end(), -1) != B2.end())
575  {
576  return false;
577  }
578 
579  // Will the merged cluster have different properties B?
580  // (Hence the intersection of properties B of cluster 1 and cluster 2 should be empty.)
581  std::vector<int> B_intersection;
582  sort(B1.begin(), B1.end());
583  sort(B2.begin(), B2.end());
584  set_intersection(B1.begin(), B1.end(), B2.begin(), B2.end(), back_inserter(B_intersection));
585 
586  return !B_intersection.empty();
587  }
588 
602  bool findNearestNeighbour_(const GridBasedCluster& cluster, int cluster_index)
603  {
604  const Point& centre = cluster.getCentre();
605  const CellIndex& cell_index = grid_.getIndex(centre);
606  double min_dist = 0;
607  int nearest_neighbour = -1;
608 
609  // search in the grid cell and its 8 neighbouring cells for the nearest neighbouring cluster
610  for (int i = -1; i <= 1; ++i)
611  {
612  for (int j = -1; j <= 1; ++j)
613  {
614  CellIndex cell_index2(cell_index);
615  cell_index2.first += i;
616  cell_index2.second += j;
617  if (grid_.isNonEmptyCell(cell_index2))
618  {
619  const std::list<int>& cluster_indices = grid_.getClusters(cell_index2);
620  for (std::list<int>::const_iterator cluster_index2 = cluster_indices.begin(); cluster_index2 != cluster_indices.end(); ++cluster_index2)
621  {
622  if (*cluster_index2 != cluster_index)
623  {
624  const GridBasedCluster& cluster2 = clusters_.find(*cluster_index2)->second;
625  const Point& centre2 = cluster2.getCentre();
626  double distance = metric_(centre, centre2);
627 
628  if (distance < min_dist || nearest_neighbour == -1)
629  {
630  bool veto = mergeVeto_(cluster, cluster2); // If clusters cannot be merged anyhow, they are no nearest neighbours.
631  if (!veto)
632  {
633  min_dist = distance;
634  nearest_neighbour = *cluster_index2;
635  }
636  }
637  }
638  }
639  }
640  }
641  }
642 
643  if (nearest_neighbour == -1)
644  {
645  // no other cluster nearby, hence move the cluster to the final results
646  clusters_final_.insert(std::make_pair(cluster_index, clusters_.find(cluster_index)->second));
647  return true;
648  }
649 
650  // add to the list of minimal distances
651  std::multiset<MinimumDistance>::const_iterator it = distances_.insert(MinimumDistance(cluster_index, nearest_neighbour, min_dist));
652  // add to reverse nearest neighbor lookup table
653  reverse_nns_.insert(std::make_pair(nearest_neighbour, it));
654  // add to cluster index -> distance lookup table
655  distance_it_for_cluster_idx_[cluster_index] = it;
656 
657  return false;
658  }
659 
669  void eraseMinDistance_(const std::multiset<MinimumDistance>::const_iterator it)
670  {
671  // remove corresponding entries from nearest neighbor lookup table
672  std::pair<NNIterator, NNIterator> nn_range = reverse_nns_.equal_range(it->getNearestNeighbourIndex());
673  for (NNIterator nn_it = nn_range.first; nn_it != nn_range.second; ++nn_it)
674  {
675  if (nn_it->second == it)
676  {
677  reverse_nns_.erase(nn_it);
678  break;
679  }
680  }
681 
682  // remove corresponding entry from cluster index -> distance lookup table
683  distance_it_for_cluster_idx_.erase(it->getClusterIndex());
684 
685  // remove from distances_
686  distances_.erase(it);
687  }
688  };
689 }
690 #endif /* OPENMS_COMPARISON_CLUSTERING_GRIDBASEDCLUSTERING_H */
void removeCluster(const CellIndex &cell_index, const int &cluster_index)
removes a cluster from this grid cell and removes the cell if no other cluster left ...
boost::unordered::unordered_multimap< int, MultisetIterator >::const_iterator NNIterator
Definition: GridBasedClustering.h:137
std::pair< int, int > CellIndex
Definition: ClusteringGrid.h:63
double distance_
distance between cluster and its nearest neighbour
Definition: GridBasedClustering.h:106
A more convenient string class.
Definition: String.h:57
std::map< int, GridBasedCluster > clusters_final_
list of final clusters i.e. clusters that are no longer merged
Definition: GridBasedClustering.h:472
void enlarge(const PositionType &p)
Enlarges the bounding box such that it contains a position.
Definition: DBoundingBox.h:122
boost::unordered::unordered_multimap< int, std::multiset< MinimumDistance >::const_iterator > reverse_nns_
reverse nearest neighbor lookup table for finding out which clusters need to be updated faster ...
Definition: GridBasedClustering.h:484
std::multiset< MinimumDistance > distances_
list of minimum distances stores the smallest of the distances in the head
Definition: GridBasedClustering.h:478
GridBasedCluster::Point Point
cluster centre, cluster bounding box, grid index
Definition: GridBasedClustering.h:133
void removeSmallClustersY(double threshold_y)
removes clusters with bounding box dimension in y-direction below certain threshold ...
Definition: GridBasedClustering.h:424
bool operator==(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:806
void cluster()
performs the hierarchical clustering (merges clusters until their dimension exceeds that of cell) ...
Definition: GridBasedClustering.h:183
CoordinateType getY() const
Name accessor for the second dimension. Only for DPosition<2>, for visualization. ...
Definition: DPosition.h:158
const double c
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
const std::vector< int > & getPropertiesB() const
returns properties B of all points
ClusteringGrid grid_
grid on which the position of the clusters are registered used in cluster method
Definition: GridBasedClustering.h:460
GridBasedCluster::Rectangle Rectangle
Definition: GridBasedClustering.h:134
bool operator<(const MultiplexDeltaMasses &dm1, const MultiplexDeltaMasses &dm2)
std::map< int, GridBasedCluster > clusters_
list of clusters maps cluster indices to clusters
Definition: GridBasedClustering.h:466
PositionType const & maxPosition() const
Accessor to maximum position.
Definition: DIntervalBase.h:128
void extendClustersY()
extends clusters in y-direction if possible (merges clusters further in y-direction, i.e. clusters can now span multiple cells)
Definition: GridBasedClustering.h:297
GridBasedClustering(Metric metric, const std::vector< double > &data_x, const std::vector< double > &data_y, std::vector< double > grid_spacing_x, std::vector< double > grid_spacing_y)
initialises all data structures
Definition: GridBasedClustering.h:167
2D hierarchical clustering implementation optimized for large data sets containing many small cluster...
Definition: GridBasedClustering.h:126
CoordinateType getX() const
Name accessor for the first dimension. Only for DPosition<2>, for visualization.
Definition: DPosition.h:151
bool mergeVeto_(const GridBasedCluster &c1, const GridBasedCluster &c2) const
checks if two clusters can be merged Each point in a cluster can (optionally) have two properties A a...
Definition: GridBasedClustering.h:556
bool findNearestNeighbour_(const GridBasedCluster &cluster, int cluster_index)
determines the nearest neighbour for each cluster
Definition: GridBasedClustering.h:602
void addCluster(const CellIndex &cell_index, const int &cluster_index)
adds a cluster to this grid cell
data structure to store 2D data to be clustered e.g. (m/z, retention time) coordinates from multiplex...
Definition: ClusteringGrid.h:57
int cluster_index_
index in the cluster list
Definition: GridBasedClustering.h:96
void init_(const std::vector< double > &data_x, const std::vector< double > &data_y, const std::vector< int > &properties_A, const std::vector< int > &properties_B)
initialises all data structures
Definition: GridBasedClustering.h:500
bool isNonEmptyCell(const CellIndex &cell_index) const
checks if there are clusters at this cell index
GridBasedClustering(Metric metric, const std::vector< double > &data_x, const std::vector< double > &data_y, const std::vector< int > &properties_A, const std::vector< int > &properties_B, std::vector< double > grid_spacing_x, std::vector< double > grid_spacing_y)
initialises all data structures
Definition: GridBasedClustering.h:149
Metric metric_
metric for measuring the distance between points in the 2D plane
Definition: GridBasedClustering.h:454
CoordinateType minY() const
Accessor for max_ coordinate minimum.
Definition: DIntervalBase.h:247
const Rectangle & getBoundingBox() const
returns bounding box
CoordinateType maxY() const
Accessor for max_ coordinate maximum.
Definition: DIntervalBase.h:259
Invalid value exception.
Definition: Exception.h:336
const Point & getCentre() const
returns cluster centre
std::list< int > getClusters(const CellIndex &cell_index) const
returns clusters in this grid cell
const std::vector< int > & getPoints() const
returns indices of points in cluster
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
CellIndex getIndex(const Point &position) const
returns grid cell index (i,j) for the positions (x,y)
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
void eraseMinDistance_(const std::multiset< MinimumDistance >::const_iterator it)
remove minimum distance object and its related data
Definition: GridBasedClustering.h:669
ClusteringGrid::CellIndex CellIndex
Definition: GridBasedClustering.h:135
boost::unordered::unordered_map< int, std::multiset< MinimumDistance >::const_iterator > distance_it_for_cluster_idx_
cluster index to distance iterator lookup table for finding out which clusters need to be updated fas...
Definition: GridBasedClustering.h:490
basic data structure for distances between clusters
Definition: GridBasedClustering.h:61
std::multiset< MinimumDistance >::const_iterator MultisetIterator
Definition: GridBasedClustering.h:136
basic data structure for clustering
Definition: GridBasedCluster.h:55
int nearest_neighbour_index_
index of the nearest neighbour of the above cluster
Definition: GridBasedClustering.h:101
std::map< int, GridBasedCluster > getResults() const
returns final results (mapping of cluster indices to clusters)
Definition: GridBasedClustering.h:444
int getPropertyA() const
returns property A
PositionType const & minPosition() const
Accessor to minimum position.
Definition: DIntervalBase.h:122

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:00 using doxygen 1.8.13