# TEST SCRIPT FOR ASSIGNMENT #1. source("lr.r") source("lrb.r") postscript("a1plts1.ps",horiz=F) par(mfrow=c(2,2)) cat("\nTESTS ON DATA SET 1\n\n") trn1x = as.matrix(read.table("a1trn1x",head=F)) trn1y = scan("a1trn1y") tst1x = as.matrix(read.table("a1tst1x",head=F)) tst1y = scan("a1tst1y") # Enable this to see a plot of the training cases. # plot(trn1x[,1],trn1x[,2],col=c("blue","red")[trn1y+1],pch=20,xlab="",ylab="") cat("\nOrdinary logistic regression\n\n") # Enable this to check lambda=0 results against R's built-in glm function. # print((m = glm(trn1y~trn1x,family=binomial))); cat("\n") lambda.seq = c(0, 10^seq(-1,3,by=1/4)) err.rate.seq = c() sq.err.seq = c() for (lambda in lambda.seq) { e = lr.est(trn1y,trn1x,lambda) p = lr.pred(tst1x,e) err.rate = mean(tst1y!=(p>0.5)) sq.err = mean((tst1y-p)^2) cat("lambda =",lambda,"\n") cat(" estimates:",round(e,4),"\n") cat(" error rate:",round(err.rate,5), " sq. error:",round(sq.err,5), "\n\n") err.rate.seq = c(err.rate.seq,err.rate) sq.err.seq = c(sq.err.seq,sq.err) } plot (c(10^(-1.25),lambda.seq[-1]), rep(0,length(lambda.seq)), log="x", xlab="lambda (log scale, first point is for lambda=0)", ylab="green: error rate, blue: squared error", ylim=c(0.15,0.3), type="n") lines (c(10^(-1.25),lambda.seq[-1]), err.rate.seq, col="green",type="b",pch=20) lines (c(10^(-1.25),lambda.seq[-1]), sq.err.seq, col="blue",type="b",pch=20) abline(h=min(err.rate.seq),col="green",lty=3) abline(h=min(sq.err.seq),col="blue",lty=3) title ("Data set 1, Ordinary logisic regression") cat("\nBounded logistic regression\n\n") lambda.seq = c(0, 10^seq(-1,3,by=1/4)) err.rate.seq = c() sq.err.seq = c() for (lambda in lambda.seq) { e = lrb.est(trn1y,trn1x,lambda) p = lrb.pred(tst1x,e) err.rate = mean(tst1y!=(p>0.5)) sq.err = mean((tst1y-p)^2) cat("lambda =",lambda,"\n") cat(" estimates:",round(e,4),"\n") cat(" error rate:",round(err.rate,5), " sq. error:",round(sq.err,5), "\n\n") err.rate.seq = c(err.rate.seq,err.rate) sq.err.seq = c(sq.err.seq,sq.err) } plot (c(10^(-1.25),lambda.seq[-1]), rep(0,length(lambda.seq)), log="x", xlab="lambda (log scale, first point is for lambda=0)", ylab="green: error rate, blue: squared error", ylim=c(0.15,0.3), type="n") lines (c(10^(-1.25),lambda.seq[-1]), err.rate.seq, col="green",type="b",pch=20) lines (c(10^(-1.25),lambda.seq[-1]), sq.err.seq, col="blue",type="b",pch=20) abline(h=min(err.rate.seq),col="green",lty=3) abline(h=min(sq.err.seq),col="blue",lty=3) title ("Data set 1, Bounded logisic regression") cat("\nTESTS ON DATA SET 2\n\n") trn2x = as.matrix(read.table("a1trn2x",head=F)) trn2y = scan("a1trn2y") tst2x = as.matrix(read.table("a1tst2x",head=F)) tst2y = scan("a1tst2y") cat("\nOrdinary logistic regression\n\n") lambda.seq = c(0, 10^seq(-1,3,by=1/4)) err.rate.seq = c() sq.err.seq = c() for (lambda in lambda.seq) { e = lr.est(trn2y,trn2x,lambda) p = lr.pred(tst2x,e) err.rate = mean(tst2y!=(p>0.5)) sq.err = mean((tst2y-p)^2) cat("lambda =",lambda,"\n") cat(" estimates:",round(e,4),"\n") cat(" error rate:",round(err.rate,5), " sq. error:",round(sq.err,5), "\n\n") err.rate.seq = c(err.rate.seq,err.rate) sq.err.seq = c(sq.err.seq,sq.err) } plot (c(10^(-1.25),lambda.seq[-1]), rep(0,length(lambda.seq)), log="x", xlab="lambda (log scale, first point is for lambda=0)", ylab="green: error rate, blue: squared error", ylim=c(0.1,0.25), type="n") lines (c(10^(-1.25),lambda.seq[-1]), err.rate.seq, col="green",type="b",pch=20) lines (c(10^(-1.25),lambda.seq[-1]), sq.err.seq, col="blue",type="b",pch=20) abline(h=min(err.rate.seq),col="green",lty=3) abline(h=min(sq.err.seq),col="blue",lty=3) title ("Data set 2, Ordinary logisic regression") cat("\nBounded logistic regression\n\n") lambda.seq = c(0, 10^seq(-1,3,by=1/4)) err.rate.seq = c() sq.err.seq = c() for (lambda in lambda.seq) { e = lrb.est(trn2y,trn2x,lambda) p = lrb.pred(tst2x,e) err.rate = mean(tst2y!=(p>0.5)) sq.err = mean((tst2y-p)^2) cat("lambda =",lambda,"\n") cat(" estimates:",round(e,4),"\n") cat(" error rate:",round(err.rate,5), " sq. error:",round(sq.err,5), "\n\n") err.rate.seq = c(err.rate.seq,err.rate) sq.err.seq = c(sq.err.seq,sq.err) } plot (c(10^(-1.25),lambda.seq[-1]), rep(0,length(lambda.seq)), log="x", xlab="lambda (log scale, first point is for lambda=0)", ylab="green: error rate, blue: squared error", ylim=c(0.1,0.25), type="n") lines (c(10^(-1.25),lambda.seq[-1]), err.rate.seq, col="green",type="b",pch=20) lines (c(10^(-1.25),lambda.seq[-1]), sq.err.seq, col="blue",type="b",pch=20) abline(h=min(err.rate.seq),col="green",lty=3) abline(h=min(sq.err.seq),col="blue",lty=3) title ("Data set 2, Bounded logisic regression") dev.off()