I have been searching around for an implementation of DBSCAN for 3 dimensional points without much luck. Does anyone know I library that handles this or has any experience with doing this? I am assuming that the DBSCAN algorithm can handle 3 dimensions, by having the e value be a radius metric and the distance between points measured by euclidean separation. If anyone has tried implementing this and would like to share that would also be greatly appreciated, thanks.
You can use sklearn for DBSCAN. Here is some code that works for me-
from sklearn.cluster import DBSCAN
import numpy as np
data = np.random.rand(500,3)
db = DBSCAN(eps=0.12, min_samples=1).fit(data)
labels = db.labels_
from collections import Counter
Counter(labels)
The output I got was-
Counter({1: 342, 10: 30, 31: 13, 13: 11, 30: 10, 24: 5, 29: 5, 2: 4, 18: 4,
19: 4, 28: 4, 49: 4, 3: 3, 17: 3, 23: 3, 32: 3, 7: 2, 9: 2, 12: 2, 14: 2, 15: 2,
16: 2, 20: 2, 21: 2, 26: 2, 39: 2, 41: 2, 46: 2, 0: 1, 4: 1, 5: 1, 6: 1, 8: 1, 11:
1, 22: 1, 25: 1, 27: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 40: 1, 42: 1,
43: 1, 44: 1, 45: 1, 47: 1, 48: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1})
So, the clustering identifies 55 clusters with the count of the number of points in each cluster as shown above.
So this is what I came up with, I know it is not the most efficient implementation but it works; for example the region query, which is the main time eater of the algorithm computes the distance between two points more than once, instead of just storing it for use later.
class DBSCAN(object):
def __init__(self, eps=0, min_points=2):
self.eps = eps
self.min_points = min_points
self.visited = []
self.noise = []
self.clusters = []
self.dp = []
def cluster(self, data_points):
self.visited = []
self.dp = data_points
c = 0
for point in data_points:
if point not in self.visited:
self.visited.append(point)
neighbours = self.region_query(point)
if len(neighbours) < self.min_points:
self.noise.append(point)
else:
c += 1
self.expand_cluster(c, neighbours)
def expand_cluster(self, cluster_number, p_neighbours):
cluster = ("Cluster: %d" % cluster_number, [])
self.clusters.append(cluster)
new_points = p_neighbours
while new_points:
new_points = self.pool(cluster, new_points)
def region_query(self, p):
result = []
for d in self.dp:
distance = (((d[0] - p[0])**2 + (d[1] - p[1])**2 + (d[2] - p[2])**2)**0.5)
if distance <= self.eps:
result.append(d)
return result
def pool(self, cluster, p_neighbours):
new_neighbours = []
for n in p_neighbours:
if n not in self.visited:
self.visited.append(n)
n_neighbours = self.region_query(n)
if len(n_neighbours) >= self.min_points:
new_neighbours = self.unexplored(p_neighbours, n_neighbours)
for c in self.clusters:
if n not in c[1] and n not in cluster[1]:
cluster[1].append(n)
return new_neighbours
@staticmethod
def unexplored(x, y):
z = []
for p in y:
if p not in x:
z.append(p)
return z
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With