I am trying to perform clustering of rectangles where number of clusters is unknown.
Below is an image generated programmatically as well as coordinates of the rectangles.

Below is code in Python to reproduce the output:
from PIL import Image, ImageDraw
width = 1024
height = 768
im = Image.new('RGBA', (width, height), (255, 255, 255))
draw = ImageDraw.Draw(im)
coords = [[392, 0, 441, 62], [889, 106, 958, 146], [144, 215, 184, 299], [224, 44, 288, 62], [219, 243, 243, 277], [760, 450, 847, 547], [390, 311, 399, 351], [439, 232, 520, 274], [72, 438, 129, 506], [64, 177, 151, 207], [758, 160, 828, 228], [310, 543, 314, 587], [300, 215, 310, 271], [787, 310, 850, 332], [63, 198, 86, 276], [372, 432, 409, 448], [235, 121, 302, 217], [140, 614, 216, 643], [160, 203, 168, 244], [364, 645, 394, 708], [294, 219, 299, 294], [101, 215, 119, 251], [776, 65, 792, 151], [421, 608, 447, 659], [127, 183, 189, 196], [350, 114, 439, 140], [134, 600, 207, 686], [55, 126, 148, 224], [159, 130, 239, 219], [705, 114, 743, 164], [824, 601, 893, 620], [916, 541, 966, 557], [82, 251, 178, 324], [216, 317, 235, 349], [806, 443, 898, 483], [297, 618, 320, 659], [887, 213, 929, 256], [17, 117, 109, 182], [755, 233, 818, 287], [165, 392, 215, 463], [202, 256, 248, 308], [61, 499, 103, 595], [163, 182, 200, 268], [167, 143, 257, 243], [3, 468, 99, 549], [126, 51, 129, 112], [24, 63, 113, 67], [830, 555, 924, 585], [881, 297, 918, 356], [917, 226, 998, 295], [59, 148, 59, 156], [56, 16, 82, 113]]
for coord in coords:
x1, y1, x2, y2 = coord
draw.rectangle((x1, y1, x2, y2), fill=(192, 192, 192), outline=(122, 122, 122))
draw = ImageDraw.Draw(im)
im.save('./1.png',)
Is it possible to detect number of clusters automatically ?
If so, could you please provide minimal working example for clustering such data ?
Are you looking for something like this
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
width = 1024
height = 768
im = Image.new('RGBA', (width, height), (255, 255, 255))
draw = ImageDraw.Draw(im)
im2 = Image.new('RGBA', (width, height), (255, 255, 255))
draw2 = ImageDraw.Draw(im2)
im3 = Image.new('RGBA', (width, height), (255, 255, 255))
draw3 = ImageDraw.Draw(im3)
im4 = Image.new('RGBA', (width, height), (255, 255, 255))
draw4 = ImageDraw.Draw(im4)
coords = [[392, 0, 441, 62], [889, 106, 958, 146], [144, 215, 184, 299], [224, 44, 288, 62], [219, 243, 243, 277], [760, 450, 847, 547], [390, 311, 399, 351], [439, 232, 520, 274], [72, 438, 129, 506], [64, 177, 151, 207], [758, 160, 828, 228], [310, 543, 314, 587], [300, 215, 310, 271], [787, 310, 850, 332], [63, 198, 86, 276], [372, 432, 409, 448], [235, 121, 302, 217], [140, 614, 216, 643], [160, 203, 168, 244], [364, 645, 394, 708], [294, 219, 299, 294], [101, 215, 119, 251], [776, 65, 792, 151], [421, 608, 447, 659], [127, 183, 189, 196], [350, 114, 439, 140], [134, 600, 207, 686], [55, 126, 148, 224], [159, 130, 239, 219], [705, 114, 743, 164], [824, 601, 893, 620], [916, 541, 966, 557], [82, 251, 178, 324], [216, 317, 235, 349], [806, 443, 898, 483], [297, 618, 320, 659], [887, 213, 929, 256], [17, 117, 109, 182], [755, 233, 818, 287], [165, 392, 215, 463], [202, 256, 248, 308], [61, 499, 103, 595], [163, 182, 200, 268], [167, 143, 257, 243], [3, 468, 99, 549], [126, 51, 129, 112], [24, 63, 113, 67], [830, 555, 924, 585], [881, 297, 918, 356], [917, 226, 998, 295], [59, 148, 59, 156], [56, 16, 82, 113]]
centers = []
for coord in coords:
x1, y1, x2, y2 = coord
draw.rectangle((x1, y1, x2, y2), fill=(192, 192, 192), outline=(122, 122, 122))
draw = ImageDraw.Draw(im)
c = [(x1+x2)/2, (y1+y2)/2]
centers.append(c)
draw2.point((c[0], c[1]), fill = 0)
draw2.ellipse((c[0]-2, c[1]-2, c[0]+2, c[1]+2), outline=(255, 0 , 0))
im.save('./1.png',)
im2.save('./2.png',)
x = np.array(centers)
kmeans = KMeans(n_clusters=3)
kmeans.fit(x)
print(kmeans.cluster_centers_)
print(kmeans.labels_)
for c,label, cord in zip(centers, kmeans.labels_, coords):
draw3.point((c[0], c[1]), fill = 0)
if label==0:
draw3.ellipse((c[0]-5, c[1]-5, c[0]+5, c[1]+5), outline=(255, 0 , 0))
draw4.rectangle((cord[0], cord[1], cord[2], cord[3]), fill=(255, 0, 0), outline=(122, 122, 122))
elif label==1:
draw3.ellipse((c[0]-5, c[1]-5, c[0]+5, c[1]+5), outline=(0, 255 , 0))
draw4.rectangle((cord[0], cord[1], cord[2], cord[3]), fill=(0, 255, 0), outline=(122, 122, 122))
else:
draw3.ellipse((c[0]-5, c[1]-5, c[0]+5, c[1]+5), outline=(0, 0 , 255))
draw4.rectangle((cord[0], cord[1], cord[2], cord[3]), fill=(0, 0, 255), outline=(122, 122, 122))
im3.save("./3.png")
im4.save("./4.png")
I found the center point of rectangles and used centers as rectangle positions then kmeans to group them up now using same you can even color the rectangles
In kmeans you can set number of clusters to be created
It also contains code for the center point representation images as well not attached

[Update] Added Silhouette Score as metric for optimizing cluster count
I added Silhouette Score calculation, it will run on multiple cluster number and give a value between 0 and 1 describing how well distinguished our clusters are
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
coords = [[392, 0, 441, 62], [889, 106, 958, 146], [144, 215, 184, 299], [224, 44, 288, 62], [219, 243, 243, 277], [760, 450, 847, 547], [390, 311, 399, 351], [439, 232, 520, 274], [72, 438, 129, 506], [64, 177, 151, 207], [758, 160, 828, 228], [310, 543, 314, 587], [300, 215, 310, 271], [787, 310, 850, 332], [63, 198, 86, 276], [372, 432, 409, 448], [235, 121, 302, 217], [140, 614, 216, 643], [160, 203, 168, 244], [364, 645, 394, 708], [294, 219, 299, 294], [101, 215, 119, 251], [776, 65, 792, 151], [421, 608, 447, 659], [127, 183, 189, 196], [350, 114, 439, 140], [134, 600, 207, 686], [55, 126, 148, 224], [159, 130, 239, 219], [705, 114, 743, 164], [824, 601, 893, 620], [916, 541, 966, 557], [82, 251, 178, 324], [216, 317, 235, 349], [806, 443, 898, 483], [297, 618, 320, 659], [887, 213, 929, 256], [17, 117, 109, 182], [755, 233, 818, 287], [165, 392, 215, 463], [202, 256, 248, 308], [61, 499, 103, 595], [163, 182, 200, 268], [167, 143, 257, 243], [3, 468, 99, 549], [126, 51, 129, 112], [24, 63, 113, 67], [830, 555, 924, 585], [881, 297, 918, 356], [917, 226, 998, 295], [59, 148, 59, 156], [56, 16, 82, 113]]
centers = []
for coord in coords:
x1, y1, x2, y2 = coord
c = [(x1+x2)/2, (y1+y2)/2]
centers.append(c)
# Vector with center coordinates
x = np.array(centers)
bestScore = -1
idx=-1
for i in range (2, 10):
kmeans = KMeans(n_clusters=i, random_state=10)
kmeans.fit(x)
silhouette_avg = silhouette_score(x, kmeans.labels_)
print("For n_clusters =", i,
"The average silhouette_score is :", silhouette_avg)
if silhouette_avg > bestScore:
bestScore=silhouette_avg
idx=i
print("Best score :", bestScore, " Number Of Clusters:", idx)
Output:
For n_clusters = 2 The average silhouette_score is : 0.5998398985890973
For n_clusters = 3 The average silhouette_score is : 0.5537982490021037
For n_clusters = 4 The average silhouette_score is : 0.533919415288406
For n_clusters = 5 The average silhouette_score is : 0.460641507207192
For n_clusters = 6 The average silhouette_score is : 0.4685516097102802
For n_clusters = 7 The average silhouette_score is : 0.42313268667935106
For n_clusters = 8 The average silhouette_score is : 0.43283812579237735
For n_clusters = 9 The average silhouette_score is : 0.41602365463779123
Best score : 0.5998398985890973 Number Of Clusters: 2
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With