more updates

mlinderman · mlinderman · commit d6d3d370e377 · 2018-10-02T12:30:05.000-04:00
diff --git a/.DS_Store b/.DS_Store
diff --git a/machine-learning-ex1/ex1/ex1_multi.m b/machine-learning-ex1/ex1/ex1_multi.m
@@ -82,49 +82,58 @@
 fprintf('Running gradient descent ...\n');
 
 % Choose some alpha value
-alpha_histories = zeros(10,1)
+alpha_histories = []
 alpha = .01;
-num_iters = 150;
-theta = zeros(3, 1)
+num_iters = 200;
 
 % Init Theta and Run Gradient Descent 10 times with varying alphas, saving results
-J_histories = [10, 2]
-for i = length(J_histories)
-    J_histories(i, 1:2) = gradientDescentMulti(X, y, theta, alpha, num_iters); % returns [theta, J_history]
-    alpha_histories(i) = alpha
-    alpha = alpha * 3
+J_histories = {};
+ans_thetas = {};
+for i = 1:8
+    theta = zeros(3, 1);
+    [t, h] = gradientDescentMultiVectorized(X, y, theta, alpha, num_iters); % returns [theta, J_history]
+    ans_thetas{i} = t;
+    J_histories{i} = h;
+    alpha_histories = [alpha_histories; alpha];
+    alpha = alpha * 2;
 end
 
 % Plot the convergence graph
 figure;
 xlabel('Number of iterations');
 ylabel('Cost J');
 
-for hist = 1:length(J_histories)
-    x = [1:length(J_histories(hist, 2))]
-    y = J_histories(hist, 2)
-    plot(1:numel(J_histories(hist)()), J_history(hist), '-b', 'LineWidth', 2);
+for i = 1:length(J_histories)
+    if sum(isinf(J_histories{i})) > 0
+        break
+    endif
+    x = [1:length(J_histories{i})];
+    y = J_histories{i};
+    plot(x, y, '-'+i, 'LineWidth', 2);
+    xlabel('Number of iterations');
+    ylabel('Cost J');
     hold on;
 end
 
 % Display gradient descent's result
-fprintf('Theta computed from gradient descent: \n');
-fprintf(' %f \n', theta);
-fprintf('\n');
+fprintf('Thetas computed from gradient descent: \n');
+for i = 1:length(ans_thetas)
+    fprintf('run #%i: %.0f, %.0f, %.0f \n', i, ans_thetas{i});
+endfor
 
 % Estimate the price of a 1650 sq-ft, 3 br house
 % ====================== YOUR CODE HERE ======================
 % Recall that the first column of X is all-ones. Thus, it does
 % not need to be normalized.
 
-testX = [1650, 3]
+testX = [1650, 3];
 
 % to normalize a single row of features, you want to use the values you found
 % earlier for all samples for mean and standard deviation
 testX = (testX - mu) ./ sigma
 
-testX = [1, testX]
-theta
+testX = [1, testX];
+theta = ans_thetas{length(ans_thetas)}; % the last thetas calculated in the iteration above with different alphas
 % two equally sized vectors, theta and testX (one column vector, one row vector) 
 % remember that theta1*x1 + theta2*x2 + theta3*x3...  is your hypothesis
 % and now you have thetas calculated using gradient descent
@@ -165,7 +174,7 @@
 X = [ones(m, 1) X];
 
 % Calculate the parameters from the normal equation
-theta = normalEqn(X, y);
+theta = normalEqn(X, y)
 
 % Display normal equation's result
 fprintf('Theta computed from the normal equations: \n');
@@ -175,7 +184,8 @@
 
 % Estimate the price of a 1650 sq-ft, 3 br house
 % ====================== YOUR CODE HERE ======================
-price = 0; % You should change this
+
+price = sum(theta' .* [1, 1650, 3]);
 
 
 % ============================================================
diff --git a/machine-learning-ex1/ex1/gradientDescentMulti.m b/machine-learning-ex1/ex1/gradientDescentMulti.m
@@ -19,8 +19,11 @@
 
     newThetas = zeros([size(theta), 1]);
     for idx = 1:size(theta) % size of the largest dimension, the row count, in this instance
+        % using X*theta, not theta'X because X samples are rows, not columns and to do this vector/matrix 
+        % multiplication, we want the features for each row of X to be multiplied to the corresponding thetas.'
         newThetas(idx, 1) = theta(idx) - alpha * 1/m * sum((X * theta - y) .* X(:, idx));
     end
+
     theta = [newThetas];
 
     % ============================================================
@@ -31,3 +34,4 @@
 end
 
 end
+
diff --git a/machine-learning-ex1/ex1/normalEqn.m b/machine-learning-ex1/ex1/normalEqn.m
@@ -12,7 +12,7 @@
 
 % ---------------------- Sample Solution ----------------------
 
-
+theta = pinv(X'*X)*(X'*y);
 
 
 % -------------------------------------------------------------
diff --git a/machine-learning-ex1/ex1/token.mat b/machine-learning-ex1/ex1/token.mat
@@ -1,4 +1,4 @@
-# Created by Octave 4.4.1, Sun Sep 09 15:55:20 2018 EDT <mlinderm@Mark-Lindermans-MacBook-Pro-2.local>
+# Created by Octave 4.4.0, Sat Sep 15 15:09:19 2018 EDT <marklinderman@Mark Linderman's MacBook Pro>
 # name: email
 # type: sq_string
 # elements: 1

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Created by Octave 4.4.1, Sun Sep 09 15:55:20 2018 EDT <mlinderm@Mark-Lindermans-MacBook-Pro-2.local>`
	`1`	`+# Created by Octave 4.4.0, Sat Sep 15 15:09:19 2018 EDT <marklinderman@Mark Linderman's MacBook Pro>`
`2`	`2`	`# name: email`
`3`	`3`	`# type: sq_string`
`4`	`4`	`# elements: 1`