% Illustration of 1D linear regression % Accompanies CS195-5 Lecture 2 % (c) Greg Shakhnarovich, Brown University, 2006 % it's a script so it will run in our current environment. % best to make sure to clear some things clear w empLoss testError sigma = 3; % std. deviation of noise genNew=[]; while (isempty(genNew) | (genNew ~= 'y' & genNew ~= 'n')) genNew = input('Generate new random data? (y/[n])','s'); end if (genNew=='y') x=unifrnd(-5,5,1,100); noise=randn(size(x,2),1)*sigma; % white Gaussian additive noise, % var=sigma^2 else % this will load four variables: the pre-drawn (random) x and xtst, % and the noise vectors. Note: once those is fixed, there is no source % of randomness in the simulation. load XY_Lecture2; end wtrue=[.75;-1.5]; % y=.75-1.5x ytrue=[ones(size(x,2),1) x']*wtrue; if (genNew == 'y') % also generate test set xtst=unifrnd(-5,5,1,500); noiseTest=randn(size(xtst,2),1)*sigma; end Xtst=[ones(size(xtst,2),1) xtst']; ytrueTest=Xtst*wtrue; % create (corrupted) observations, train and test y=ytrue+noise; ytst=ytrueTest+noiseTest; % will maintain figure axis stable (prevent visually unpleasant "jerking" % of the axis as we plot new things) axisbox=[-5 5 min(y)-.5 max(y)+.5]; % prepare for plotting figure(1); clf; % plot the "true" function hLine=line([-5 5],[1 -5;1 5]*wtrue); set(hLine,'Color','r','LineWidth',4); hold on; % "handle" for the current line estimate hLine = []; % similar handle to trainint data point hX=[]; ns=[2 3 5 10 15 25 50]; for i=1:length(ns) n=ns(i); % create X and Y as in Lecture 2. Note adding the ones and transposition X = [ones(n,1) x(:,1:n)']; Y = y(1:n); % calculate LSQ estimate of w w{i} = pinv(X)*Y; % predict labels for training data and calculate empirical loss yhatTrain = X*w{i}; empLoss(i) = mean((Y-yhatTrain).^2); % what's the test error (a better proxy for expected loss)? yhatTest = Xtst*w{i}; testError(i) = mean((ytst-yhatTest).^2); fprintf(2,'%d examples: empirical loss %.4f, test %.4f\n',n,empLoss(i),testError(i)); if (~isempty(hLine)), delete(hLine), end hLine = line([-5 5],[1 -5; 1 5]*w{i}); set(hLine,'Color','k','LineWidth',4,'LineStyle','--'); axis(axisbox); title(sprintf('%d training examples, loss %.4f/ test %.4f',n,empLoss(i),testError(i))); if (i==1), legend('True (unknown)','Estimated'), end if (~isempty(hX)), delete(hX), end hX=plot(x(1:n),y(1:n),'bo','MarkerSize',10,'LineWidth',2); pause; end figure(2);clf; plot(ns,empLoss,'b','LineWidth',6); hold on; plot(ns,testError,'r','LineWidth',6); hlegend=legend('Empirical loss','Test error'); set(hlegend,'FontSize',24); axis([0 max(ns) 0 min(20,max([testError,empLoss])+1)]); set(gca,'XTick',ns);