library(faraway) data(gala) gala <- gala[,-2] modl <- lm(Species ~ . , gala) plot(predict(modl),residuals(modl),xlab="Fitted",ylab="Residuals") #looks a bit heteroscedastic library(MASS) boxcox(Species ~ . , data = gala) #we try square root transformation? modt <- lm(sqrt(Species) ~ . , gala) plot(predict(modt),residuals(modt),xlab="Fitted",ylab="Residuals") #much better summary(modt) #good fit (R^2=0.78), difficult interpretation, overall: not bad #let us try Poisson regression modp <- glm(Species ~ .,family=poisson,gala) summary(modp) #look at the residual deviance, not a good fit #half normal plot halfnorm(residuals(modp)) plot(log(fitted(modp)),log((gala$Species-fitted(modp))^2),xlab=expression(hat(mu)),ylab=expression((y-hat(mu))^2)) abline(0,1) #it seems that the variance is larger than the Poisson regression implies # -> -> - overdispersion #we estimate the dispersion parameter, Poisson regression assumes this is one (dp <- sum(residuals(modp,type="pearson")^2)/modp$df.res) #not quite one, much more like 31.7 summary(modp,dispersion=dp) #we adjust the standard errors, Nearest and Scruz are no longer significant drop1(modp,test="F") #we even get a warning #note that with overdispersion we use F-test for comparing models #effect of gamma radiation on chromosomal abnormalities data(dicentric) head(dicentric) summary(dicentric) #look at the table of data round(xtabs(ca/cells ~ doseamt+doserate, dicentric),2) #plot it for different levels of doserate with(dicentric,interaction.plot(doseamt,doserate,ca/cells)) #doserate has probably a multiplicative effect on #look at the linear model with multiplicative doserate lmod <- lm(ca/cells ~ log(doserate)*factor(doseamt), dicentric) summary(lmod)$adj #quite a good fit summary(lmod) plot(residuals(lmod) ~ fitted(lmod),xlab="Fitted",ylab="Residuals") abline(h=0) dicentric$dosef <- factor(dicentric$doseamt) pmod <- glm(ca ~ log(cells)+log(doserate)*dosef, family=poisson,dicentric) summary(pmod) rmod <- glm(ca ~ offset(log(cells))+log(doserate)*dosef, family=poisson,dicentric) summary(rmod) data(solder) modp <- glm(skips ~ . , family=poisson, data=solder) deviance(modp) df.residual(modp) modp2 <- glm(skips ~ (Opening +Solder + Mask + PadType + Panel)^2 , family=poisson, data=solder) deviance(modp2) pchisq(deviance(modp2),df.residual(modp2),lower=FALSE) library(MASS) modn <- glm(skips ~ .,negative.binomial(1),solder) modn modn <- glm.nb(skips ~ .,solder) summary(modn)