# Exercise session 4, exercise 1 source("resplot.R") set.seed(1) n <- 100 p <- 2 xx <- 1:n yy.a <- 2 + 1 * xx + rnorm(n) yy.b <- 2 + 1 * xx + rnorm(n) * (xx) yy.c <- 2 + 1 * xx + rnorm(n) * (1 + xx / n) yy.d <- cos(xx * pi / (n / 2)) + rnorm(n) leverage.cutoff <- 2*p/n # Montgomery p. 213 cooks.cutoff <- qf(0.5, p, n - p, lower.tail = FALSE) # Montgomery p. 215 studres.cutoff <- qt(0.05/2, n - p, lower.tail = FALSE) # Montgomery p. 135 leverage.cutoff cooks.cutoff studres.cutoff # a) par(mfrow = c(2, 3)) plot(yy.a ~ xx, pch = 20) fit.a <- lm(yy.a ~ xx) abline(fit.a, col = "red") plot(fit.a, which = 1:5, pch = 20) # Normal assumption ok # No leverage points # Some (14, 24, 61) large (in absoulte values) residuals but from residuals vs. # fitted it seems ok. Note that these values are outside the 5% quantile in the # t(n-p) distrubution, but since we have 100 observations we expect # about 5 observations to be this "extreme". infl.a <- influence.measures(fit.a) summary(infl.a) par(mfrow = c(2, 3)) plot(yy.b ~ xx, pch = 20) fit.b <- lm(yy.b ~ xx) abline(fit.b, col = "red") plot(fit.b, 1:5, pch = 20) # Not homogneous variance, residuals seem to depend on x # Normal assumption seeoms to be violated. (obs, it is not violated but for the # Q-Q plot to work, it has to be constant variance ) # Observation 96, 98, 100 seem to have high leverage and high standardized res, # so may be influential, but by looking at the x-y plot, no point is influential. par(mfrow = c(2, 3)) plot(yy.c ~ xx, pch = 20) fit.c <- lm(yy.c ~ xx) abline(fit.c, col = "red") plot(fit.c, 1:5, pch = 20) infl.c <- influence.measures(fit.c) summary(infl.c) # As in the previous model, from the residuals vs. fitted we conclude that there # might might be a dependence between x and e. This dependence also exlpains # the non-normality indicated by the Q-Q plot. par(mfrow = c(2, 3)) plot(yy.d ~ xx, pch = 20) fit.d <- lm(yy.d ~ xx) abline(fit.d, col = "red") plot(fit.d, 1:5, pch = 20) # From y vs x we see that the model is wrong, not a linear dependence # From residual vs. fitted (Tukey-Anscombe) plot we see that the homogeneuous # variance assumption is violated. # Note that the QQ plot seems ok, bute from the residuals vs. fitted we see that # distributions ofthe residuals is dependent on x. # obs 12, 37, 42 infl.d <- influence.measures(fit.d) summary(infl.d) # b) resplot(fit.a) resplot(fit.b) resplot(fit.c) resplot(fit.d) # c)