Skip to content

Commit f52a01c

Browse files
solves unsupervised learning quiz
1 parent e03f543 commit f52a01c

File tree

8 files changed

+96
-0
lines changed

8 files changed

+96
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ __Instructor__: Andrew Ng.
108108

109109
## Week 8
110110
### Quizzes
111+
- [Unsupervised Learning](week8/unsupervised-learning-quiz.md)
111112

112113
### Programming Exercises
113114

Loading
Loading
Loading
Loading
Loading

week8/unsupervised-learning-quiz.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Unsupervised Learning Quiz
2+
3+
![Question 1](assets/unsupervised-learning/question-1.PNG)
4+
![Question 2](assets/unsupervised-learning/question-2.PNG)
5+
![Question 3](assets/unsupervised-learning/question-3.PNG)
6+
![Question 4](assets/unsupervised-learning/question-4.PNG)
7+
![Question 5](assets/unsupervised-learning/question-5.PNG)

week8/week8.m

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
clc;
2+
clear;
3+
4+
function costs = clusterVariationCosts(X)
5+
K = size(X, 1);
6+
for i = 1:K
7+
[~, minCost] = multipleKMeans(X, i, 5);
8+
costs(i) = minCost;
9+
endfor
10+
endfunction
11+
12+
function [kMeans, minCost] = multipleKMeans(X, K, iters)
13+
n = size(X, 2);
14+
kMeans = zeros(iters, K, n);
15+
cost = zeros(iters, 1);
16+
for i = 1:iters
17+
[clusters, J] = kMeansClustering(X, K);
18+
kMeans(i, :, :) = clusters;
19+
cost(i) = J;
20+
endfor
21+
[minCost, index] = min(cost);
22+
kMeans = reshape(kMeans(index, :, :), K, n);
23+
endfunction
24+
25+
function [clusters, minCost] = kMeansClustering(X, K)
26+
m = size(X, 1);
27+
n = size(X, 2);
28+
29+
% randomly generate K clusters and positions
30+
randomizedDataSet = randperm(m);
31+
clusters = X(randomizedDataSet(1:K), :);
32+
33+
% Run the K-means clustering algorithm
34+
for iter = 1:10
35+
pointGroup = distanceFromClusterPoints(X, K, clusters);
36+
[minDistace, index] = min(pointGroup, [], 2);
37+
cost = (1 / m) * sum(minDistace);
38+
clusters = centroids(X, K, index, clusters);
39+
endfor
40+
minCost = cost;
41+
endfunction
42+
43+
function mat = distanceFromClusterPoints(X, K, clusters)
44+
m = size(X, 1);
45+
mat = zeros(m, K);
46+
for i = 1:K
47+
mat(:, i) = euclideanDistanceSquare(X, clusters(i, :));
48+
endfor
49+
endfunction
50+
51+
function c = centroids(X, K, clusterData, clusters)
52+
n = size(X, 2);
53+
m = size(X, 1);
54+
c = zeros(K, n);
55+
frequency = zeros(K, 1);
56+
for i = 1:m
57+
frequency(clusterData(i))++;
58+
c(clusterData(i), :) += X(i, :);
59+
endfor
60+
mask = c == 0;
61+
frequency = maskZeroAsOne(frequency);
62+
c = c ./ frequency;
63+
c += mask .* clusters;
64+
endfunction
65+
66+
function mat = maskZeroAsOne(mat)
67+
mask = mat == 0;
68+
mat += mask;
69+
endfunction
70+
71+
function d = euclideanDistanceSquare(X, cluster)
72+
trainingDataSize = size(X, 1);
73+
similarityMatrix = repelem(cluster, trainingDataSize, 1);
74+
difference = X - similarityMatrix;
75+
d = sum(difference .^ 2, 2);
76+
endfunction
77+
78+
labels = 5;
79+
data = [-10 1 ; 11 2 ; 45 3 ; 4 4 ; 7 5 ; 100 100 ; 5 -8 ; -89 23];
80+
% disp('K means clustering - single');
81+
% disp(kMeansClustering(data, labels));
82+
83+
disp('multiple k means');
84+
[kMeans, minCost] = multipleKMeans(data, labels, 10);
85+
disp('min cost'); disp(minCost);
86+
disp('k means'); disp(kMeans);
87+
88+
plot(clusterVariationCosts(data));

0 commit comments

Comments
 (0)