机器学习实战：单变量线性回归的实现

2012-09-07

ComputeCost函数：

[plain]
function J = computeCost(X, y, theta)

m = length(y); % number of training examples
J = 0;
predictions = X * theta;
J = 1/(2*m)*(predictions - y)&#39;*(predictions - y);

end

[plain]
function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
% X is m*(n+1) matrix
% y is m*1
% theta is (n+1)*1 matrix
% alpha is a number
% num_iters is number of iterators

m = length(y); % number of training examples
J_history = zeros(num_iters, 1); %cost function的值的变化过程
%预先定义了迭代的次数

for iter = 1:num_iters

temp1 = theta(1) - (alpha / m) * sum((X * theta - y).* X(:,1));
temp2 = theta(2) - (alpha / m) * sum((X * theta - y).* X(:,2));
theta(1) = temp1;
theta(2) = temp2;
J_history(iter) = computeCost(X, y, theta);

end

end

(1)画出训练集的散点图+拟合后的直线；
(2)画出J(theta)为z轴，theta0为x轴，theta1为y轴的三维曲线；
(3)画出(2)的三维曲线的等高线图；

1.画散点图+拟合的直线

6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
........

(2)X = data( : , 1 )；Y = data( : , 2)； %将两列分别赋予X和Y
(3)X = [ones(size(X,1),1),X]; %在X的左边添加一列1
(4)plot(X,Y,&#39;rx&#39;,&#39;MarkerSize&#39;, 4); %画图，将X向量作为X轴，Y向量作为Y轴，每个点用“x”表示，&lsquo;r&rsquo;表示红点，每个点的大小为4；
(5)axis([4 24 -5 25]); %调整x和y轴的起始坐标和最高坐标；
(6)xlabel(&#39;x&#39;); %给x轴标号为&lsquo;x&rsquo;;
(7)ylabel(&#39;y&#39;); %给y轴标号为&lsquo;y&rsquo;;

[theta,J_history] = gradientDescent(X, y, theta, alpha, num_iters);

plot(X(:,2), X*theta)； %画出最后拟合的直线

2.Surface Plot

function J = computeCost(X, y, theta)
m = length(y);
J = 0;
predictions = X * theta;
J = 1/(2*m)*sum((predictions - y) .^ 2);
end

(1)theta0_vals = linspace(-10, 10, 100); %从-10到10之间取100个数组成一个向量
(2)theta1_vals = linspace(-1, 4, 100); %从-1到4之间取100个数组成一个向量
(3)J_vals = zeros(length(theta0_vals), length(theta1_vals)); %初始化J_vals矩阵，对于某个theta0和theta1，J_vals都有对应的cost function值；
(4)计算每个（theta0，theta1）所对应的J_vals；
for i = 1:length(theta0_vals)
for j = 1:length(theta1_vals)
t = [theta0_vals(i); theta1_vals(j)];
J_vals(i,j) = computeCost(X, y, t);
end
end
(5)figure; %创建一个图
(6)surf(theta0_vals,theta1_vals,J_vals); %x轴为theta0_vals，y轴为theta1_vals，z轴为J_vals；
(7)xlabel(&#39;\theta_0&#39;); %添加x轴标志
(8)ylabel(&#39;\theta_1&#39;); %添加y轴标志

2.Contour Plot

(1)theta0_vals = linspace(-10, 10, 100); %从-10到10之间取100个数组成一个向量
(2)theta1_vals = linspace(-1, 4, 100); %从-1到4之间取100个数组成一个向量
(3)J_vals = zeros(length(theta0_vals), length(theta1_vals)); %初始化J_vals矩阵，对于某个theta0和theta1，J_vals都有对应的cost function值；
(4)计算每个（theta0，theta1）所对应的J_vals；
for i = 1:length(theta0_vals)
for j = 1:length(theta1_vals)
t = [theta0_vals(i); theta1_vals(j)];
J_vals(i,j) = computeCost(X, y, t);
end
end
(5)figure; %创建一个图
(6)contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)); %画等高线图
(7)xlabel(&#39;\theta_0&#39;); ylabel(&#39;\theta_1&#39;);

plot(theta(1), theta(2), &#39;rx&#39;, &#39;MarkerSize&#39;, 10, &#39;LineWidth&#39;, 2);

4.画图查看Learning Rate是否合理

(1)[theta,J_history] = gradientDescent(X, y, theta, alpha, num_iters);
(2)figure;
(3)plot(1:length(J_history), J_history, &#39;-b&#39;, &#39;LineWidth&#39;, 2);
(4)xlabel(&#39;Number of iterations&#39;);
(5)ylabel(&#39;Cost J&#39;);

(1)alpha=0.01;
(2)[theta,J1] = gradientDescent(X, y, zeros(3,1), alpha, num_iters);
(3)alpha=0.03;
(4)[theta,J2] = gradientDescent(X, y, zeros(3,1), alpha, num_iters);
(5)alpha=0.1;
(6)[theta,J3] = gradientDescent(X, y, zeros(3,1), alpha, num_iters);
(7)plot(1:numel(J1), J1, &#39;-b&#39;, &#39;LineWidth&#39;, 2);
(8)plot(1:numel(J2), J2, &#39;-r&#39;, &#39;LineWidth&#39;, 2);
(9)plot(1:numel(J3), J3, &#39;-k&#39;, &#39;LineWidth&#39;, 2);