-
Notifications
You must be signed in to change notification settings - Fork 1
6. Clustering problems
Marsha Gómez edited this page May 25, 2023
·
4 revisions
Exercise 6.1. Consider the k-means algorithm, with k = 3, for the set of patterns given:
- a) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 3).
- b) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 4).
- c) Is it possible to improve the solutions obtained in a) and b)?
close all;
clear;
clc;
matlab.lang.OnOffSwitchState = 1;
global data l;
data = [1.2734 6.2721
2.7453 7.4345
1.6954 8.6408
1.1044 8.6364
4.8187 7.3664
2.7224 6.3303
4.8462 8.4123
4.0497 6.7696
1.0294 8.6174
3.7202 5.1327
3.8238 7.1297
3.5805 7.8660
3.2092 5.7172
1.8724 6.3461
4.0895 5.7509
1.9121 6.2877
2.4835 6.6154
4.5637 7.1943
4.4255 5.1950
2.6097 7.2109
6.0992 8.0496
5.9660 7.3042
5.9726 7.9907
5.6166 7.5821
8.8257 5.4929
8.7426 7.0176
8.2749 6.3890
7.9130 5.3686
5.7032 5.5914
6.4415 5.7927
5.7552 7.6891
5.0048 6.7260
6.2657 7.7776
7.7985 6.0271
7.5010 5.0390
7.1722 7.1291
6.7561 6.1176
6.1497 8.7849
7.0066 8.6258
8.0462 6.5707
3.0994 1.7722
5.6857 2.3666
6.3487 4.7316
6.8860 2.5627
3.2277 2.0929
4.8013 1.6078
5.3299 2.5884
5.7466 2.4989
5.8777 1.5245
5.6002 2.7402
5.9077 1.3661
4.4954 3.4585
5.3263 1.0439
3.4645 3.2930
3.2306 4.1589
6.9191 1.9415
4.1393 2.7921
5.3799 3.2774
6.8486 1.2456
3.7431 2.9852];
% number of patterns
l = size(data, 1);
k = 3;
% initialize centroids
x = [5 7
6 3
4 3]; % part a)
fprintf("Exercise A. Centroids \n");
for i = 1:k
fprintf("%.2f %.2f \n", x(i, 1), x(i, 2));
end
exercise_kmeans(x, k);
x = [5 7
6 3
4 4]; % part b)
Output

fprintf("Exercise B. Centroids \n");
for i = 1:k
fprintf("%.2f %.2f \n", x(i, 1), x(i, 2));
end
exercise_kmeans(x, k);
Output

function exercise_kmeans(centroid, k)
global data l;
% plot patterns
plot(data(:,1),data(:,2),'ko');
axis([0 10 0 10])
title('k-means algorithm');
hold on
x = centroid;
% plot centroids
plot(x(1,1),x(1,2),'b^',...
x(2,1),x(2,2),'r^',...
x(3,1),x(3,2),'g^');
pause
% initialize clusters
cluster = zeros(l,1);
for i = 1 : l
d = inf;
for j = 1 : k
if norm(data(i,:)-x(j,:)) < d
d = norm(data(i,:)-x(j,:));
cluster(i) = j;
end
end
end
% plot cluster
c1 = data(cluster==1,:);
c2 = data(cluster==2,:);
c3 = data(cluster==3,:);
plot(c1(:,1),c1(:,2),'bo',c2(:,1),c2(:,2),'ro',...
c3(:,1),c3(:,2),'go');
% compute the objective function value
v = 0;
for i = 1 : l
v = v + norm(data(i,:)-x(cluster(i),:))^2 ;
end
title(['k-means algoritm: objective function = ',num2str(v)]);
pause
while true
% delete old centroids
plot(x(1,1),x(1,2),'w^',...
x(2,1),x(2,2),'w^',...
x(3,1),x(3,2),'w^');
% update centroids
for j = 1 : k
ind = find(cluster == j);
if ~isempty(ind)
x(j,:) = mean(data(ind,:));
end
end
% plot new centroids
plot(x(1,1),x(1,2),'b^',...
x(2,1),x(2,2),'r^',...
x(3,1),x(3,2),'g^');
pause
% update clusters
for i = 1 : l
d = inf;
for j = 1 : k
if norm(data(i,:)-x(j,:)) < d
d = norm(data(i,:)-x(j,:));
cluster(i) = j;
end
end
end
% plot cluster
c1 = data(cluster==1,:);
c2 = data(cluster==2,:);
c3 = data(cluster==3,:);
plot(c1(:,1),c1(:,2),'bo',c2(:,1),c2(:,2),...
'ro',c3(:,1),c3(:,2),'go');
% update objective function
vnew = 0;
for i = 1 : l
vnew = vnew + norm(data(i,:)-x(cluster(i),:))^2 ;
end
title(['k-means algoritm: objective function = ',num2str(vnew)]);
pause
% stopping criterion
if v - vnew < 1e-5
break
else
v = vnew;
end
end
v
hold off
end
vbest = inf;
xbest = [];
clusterbest = [];
maxiter = 100;
iter = 0;
while iter < maxiter
InitialCentroids = 10*rand(k,2);
[best,bestcluster,v, D] = kmeans(data,k,'Start',InitialCentroids);
if v < vbest
xbest = best;
clusterbest = bestcluster;
vbest = v
end
iter = iter + 1;
end
fprintf("Exercise C. Centroids \n");
for i = 1:k
fprintf("%.2f %.2f \n", clusterbest(i, 1), clusterbest(i, 2));
end
exercise_kmeans(clusterbest, k);
Output

Exercise 6.1. Consider the k-median algorithm, with k = 3, for the set of patterns given:
- a) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 3).
- b) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 4).
- c) Is it possible to improve the solutions obtained in a) and b)?