options(echo = FALSE) #################################################################### ##Execute either of following on dos prompt (omit the "##"): ##"C:\Program Files\R\rw2001\bin\Rterm.exe" -q --no-restore --no-save < linear_reg.r > R.out ##"S:\R\rw2001\bin\Rterm.exe" -q --no-restore --no-save < linear_reg.r > R.out #################################################################### library(MASS); #library(car); # Read in data file: # xydata <- scan("/Users/fmasci/ABS/R_language/Mvt1_Mvt2.txt", list(0,0)); xydata <- scan("P:\\My Documents\\FrankMasci\\LinearRegress_underR\\Mvt1_Mvt2.txt", list(0,0)); # Assign variables to columns: x <- xydata[[1]]; y <- xydata[[2]]; #------------------------------------------------------------- # Simple linear regression 1: (y = mx + c; estimate m and c by LLS) lin <- lm(y ~ x + 1); par <- coefficients(summary.lm(lin)); intercept1 <- par[1,1]; slope1 <- par[2,1]; seint1 <- par[1,2]; seslope1 <- par[2,2]; tint1 <- par[1,3]; tslope1 <- par[2,3]; pint1 <- par[1,4]; pslope1 <- par[2,4]; write(paste("\nSLOPE1=",slope1), file=""); write(paste("SESLOPE=",seslope1), file=""); write(paste("TSLOPE=",tslope1), file=""); write(paste("PSLOPE=",pslope1), file=""); write(paste("INTERCEPT1=",intercept1), file=""); write(paste("SEINT=",seint1), file=""); write(paste("TINT=",tint1), file=""); write(paste("PINT=",pint1), file=""); rm(lin,par); #------------------------------------------------------------- # Robust linear regression 2: using "lqs" function (outlier resistant) set.seed(123); lin <- lqs(x, y, intercept = TRUE, method="lts"); par <- coefficients(lin); intercept2 <- par[1]; slope2 <- par[2]; write(paste("\nSLOPE2=",slope2), file=""); write(paste("INTERCEPT2=",intercept2), file=""); rm(lin,par); #------------------------------------------------------------- # Robust linear regression 3: using "rlm" function (outlier resistant) lin <- rlm(y ~ x + 1, method="MM"); par <- coefficients(summary(lin)); intercept3 <- par[1,1]; slope3 <- par[2,1]; seint3 <- par[1,2]; seslope3 <- par[2,2]; tint3 <- par[1,3]; tslope3 <- par[2,3]; slopemax <- slope3 + seslope3; slopemin <- slope3 - seslope3; intmax <- intercept3 + seint3; intmin <- intercept3 - seint3; write(paste("\nSLOPE3=",slope3), file=""); write(paste("SESLOPE=",seslope3), file=""); write(paste("TSLOPE=",tslope3), file=""); write(paste("INTERCEPT3=",intercept3), file=""); write(paste("SEINT=",seint3), file=""); write(paste("TINT=",tint3,"\n"), file=""); resid <- residuals(lin); rm(lin,par); #------------------------------------------------------------- # Find outliers in residual distribution using MAD method and report. # Also store original data with outliers removed for robust cov. matrix. threshold <- 3.0; MADvalue <- mad(resid); zscore <- ( resid - median(resid) ) / MADvalue; #initialise. outx <- 0; outy <- 0; nooutx <- 0; noouty <- 0; pdiff <- 0; j <- 1; k <- 1; for( i in 1:length(zscore) ) { if( abs(zscore[[i]]) > threshold ) { outx[[j]] <- x[[i]]; outy[[j]] <- y[[i]]; j = j + 1; } else { nooutx[[k]] <- x[[i]]; noouty[[k]] <- y[[i]]; k = k + 1; } } #------------------------------------------------------------- # Using good data (no outliers), compute robust (outlier corrected) covariances # for mahalanobis confidence ellipse plot. covmat <- cov(cbind(nooutx, noouty), use = "all.obs", method = "pearson"); cormat <- cov2cor(covmat); write(paste("\nCORRGOODDATA",cormat[1,2]), file=""); write("\n",file=""); #------------------------------------------------------------- # Mahalanobis distance measures and P-values for the outliers. # Report outlier mvt1, mvt2, sqrt[mahala dists], p-values: mahala <- mahalanobis(cbind(outx, outy), colMeans(cbind(nooutx, noouty)), covmat); pvalue <- pchisq(mahala,2,lower.tail=FALSE); for( i in 1:length(outx) ) { write( paste("OUTLIER",outx[[i]],outy[[i]],sqrt(mahala[[i]]),pvalue[[i]]), file="" ); } #------------------------------------------------------------- # plots #bitmap(file="myplot1.png", width=5, height=5, res=400) png(file="myplot1.png", width = 680, height = 450, pointsize = 12, bg="transparent", res = 200); plot(x, y, main="linear regress.", xlab="mvt series1", ylab="mvt series2", pch=" ") points(x, y, col="red", pch=24, bg="red") points(outx, outy, pch=4, cex=2.5) abline(0, 1, col="black") abline(intmax, slopemax, col="blue", lty=2) abline(intmin, slopemin, col="blue", lty=2) #ellipse(colMeans(cbind(nooutx, noouty)), covmat, 6, center.pch=1, center.cex=0, col="black", lwd=2) # ??????????? sqrt(qchisq(0.95,2))) #data.ellipse(nooutx, noouty, levels=0.95, center.pch=1, center.cex=0, plot.points=FALSE, robust=TRUE, col="green", lwd=2) rm(x, y) #bitmap(file="myplot2.png", width=5, height=5, res=400) png(file="myplot2.png", width = 680, height = 450, pointsize = 12, bg="transparent", res = 200); hist(resid, 40, main="Histogram of Residuals", xlab="residual", ylab="Number", col="red"); q() #--------------------END OF linear_reg.in---------------------