diff --git a/07_RegressionModels/01_06_residualVariation/index.Rmd b/07_RegressionModels/01_06_residualVariation/index.Rmd index f4c090164..90390b7d4 100644 --- a/07_RegressionModels/01_06_residualVariation/index.Rmd +++ b/07_RegressionModels/01_06_residualVariation/index.Rmd @@ -83,44 +83,77 @@ for (i in 1 : n) lines(c(x[i], x[i]), c(y[i], yhat[i]), col = "red" , lwd = 2) ``` ---- -## Residuals versus X -```{r, echo = FALSE, fig.height=5, fig.width=5} -plot(diamond$carat, e, - xlab = "Mass (carats)", - ylab = "Residuals (SIN $)", - bg = "lightblue", - col = "black", cex = 1.1, pch = 21,frame = FALSE) -abline(h = 0, lwd = 2) -for (i in 1 : n) - lines(c(x[i], x[i]), c(e[i], 0), col = "red" , lwd = 2) -``` - ---- +--- ## Non-linear data ```{r, echo = TRUE, fig.height=5, fig.width=5} x <- runif(100, -3, 3); y <- x + sin(x) + rnorm(100, sd = .2); -plot(x, y); abline(lm(y ~ x)) +library(ggplot2) +g = ggplot(data.frame(x = x, y = y), aes(x = x, y = y)) +g = g + geom_smooth(method="lm", colour="black") +g = g + geom_point(size = 7, colour="black", alpha = 0.4) +g = g + geom_point(size = 5, colour="red", alpha = 0.4) +g ``` --- ```{r, echo = TRUE, fig.height=5, fig.width=5} -plot(x, resid(lm(y ~ x))); -abline(h = 0) +g = ggplot(data.frame(x = x, y = resid(lm(y ~ x))), + aes(x = x, y = y)) +g = g + geom_hline(yintercept = 0, size=2); +g = g + geom_point(size = 7, colour = "black", alpha = 0.4) +g = g + geom_point(size = 5, colour = "red", alpha = 0.4) +g = g + xlab("X") + ylab("Residual") +g ``` --- ## Heteroskedasticity ```{r, echo = TRUE, fig.height=4.5, fig.width=4.5} x <- runif(100, 0, 6); y <- x + rnorm(100, mean = 0, sd = .001 * x); -plot(x, y); abline(lm(y ~ x)) +g = ggplot(data.frame(x = x, y = y), aes(x = x, y = y)) +g = g + geom_smooth(method = "lm", colour = "black") +g = g + geom_point(size = 7, colour = "black", alpha = 0.4) +g = g + geom_point(size = 5, colour = "red", alpha = 0.4) +g ``` --- ## Getting rid of the blank space can be helpful ```{r, echo = TRUE, fig.height=4.5, fig.width=4.5} -plot(x, resid(lm(y ~ x))); -abline(h = 0) +g = ggplot(data.frame(x = x, y = resid(lm(y ~ x))), + aes(x = x, y = y)) +g = g + geom_hline(yintercept = 0, size = 2); +g = g + geom_point(size = 7, colour = "black", alpha = 0.4) +g = g + geom_point(size = 5, colour = "red", alpha = 0.4) +g = g + xlab("X") + ylab("Residual") +g +``` +--- +##Running Residual Plot on Diamond Data +```{r, echo = FALSE, fig.height=4.5, fig.width=4.5} + +diamond$e <- resid(lm(price ~ carat, data=diamond)) +g = ggplot(diamond, aes(x = carat, y = e)) +g = g + xlab("Mass (carats)") +g = g + ylab("Residual price (SIN $)") +g = g + geom_hline(yintercept = 0, size =2) +g = g + geom_point(size = 7, colour = "black", alpha=0.5) +g = g + geom_point(size = 5, colour = "blue", alpha=0.2) +g +``` + +--- +##Diamond data residual plot +```{r, echo = FALSE, fig.height=4.5, fig.width=4.5} +e = c(resid(lm(price ~ 1, data = diamond)), + resid(lm(price ~ carat, data = diamond))) +fit = factor(c(rep("Itc", nrow(diamond)), + rep("Itc, slope", nrow(diamond)))) +g = ggplot(data.frame(e = e, fit = fit), aes(y = e, x = fit, fill = fit)) +g = g + geom_dotplot(binaxis = "y", dotsize = 2, stackdir = "center", binwidth = 15) +g = g + xlab("Fitting approach") +g = g + ylab("Residual price") +g ``` ---