library(foreign) home1data <- read.dta("c:/data.avc/teaching/vhm882/exam/home1.dta",convert.factors=F) home1data$lnscc <- log(home1data$scc) is.na(home1data$lnscc)[477] <- T home1data$id <- rep(c(1:29),rep(28,29)) home1data.quart <- home1data[home1data$day==1,,] library(nlme) home1 <- groupedData(lnscc~day|cowid/side/qtr,home1data) home1data$count <- rep(gapply(home1,"lnscc",function(x) sum(!is.na(x))),rep(7,116)) home1data$count4 <- rep(gapply(home1[home1$day<=4,,],"lnscc",function(x) sum(!is.na(x))),rep(7,116)) # see Pinheiro & Bates, p.127 top for this method of counting non-missing observations # initial analyses separately for each day home1.day1 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==1,na.action=na.omit) summary(home1.day1) anova(home1.day1) home1.day2 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==2,na.action=na.omit) summary(home1.day2) anova(home1.day2) home1.day3 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==3,na.action=na.omit) summary(home1.day3) anova(home1.day3) home1.day4 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==4,na.action=na.omit) summary(home1.day4) anova(home1.day4) home1.day5 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==5,na.action=na.omit) summary(home1.day5) anova(home1.day5) home1.day6 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==6,na.action=na.omit) summary(home1.day6) anova(home1.day6) home1.day7 <- lme(lnscc~as.factor(ts)*cul+qopen,random=~1|cowid/side,data=home1,subset=day==7,na.action=na.omit) summary(home1.day7) anova(home1.day7) # effect of ts only seen at days 1-2; first days have lowest variation # (a) # note: lmList does not return objects that are easy to work with so the analysis is done manually # allowing easier control of the order of vector of summary statistics # summary statistic: slope slope <- c() for (i in 1:29) for (j in 1:0) for (k in 1:2) { if (home1data$count[home1data$id==i & home1data$side==j & home1data$qtr==(2*j+k)& home1data$day==1]>1) slope <- c(slope,lm(lnscc~day, data=home1data, subset=(id==i & side==j & qtr==(2*j+k)))$"coefficients"[2]) else slope <- c(slope,NA) } home1data.quart$slope <- slope home1.quart <- groupedData(slope~ts*qopen|cowid/side, data=home1data.quart) home1data.quart$slope <- slope home1.quart <- groupedData(slope~ts|cowid/side, data=home1data.quart) # random effects model analysis of slopes home1.quart.lme <- lme(slope~as.factor(ts)*cul*qopen, random=~1|cowid/side, home1.quart, na.action=na.omit) summary(home1.quart.lme) anova(home1.quart.lme) # marginal model analysis of slopes home1.quart.gls <- gls(slope~as.factor(ts)*cul*qopen, home1data.quart, correlation=corSymm(form=~1|cowid), weights=varIdent(form=~1|cowid), na.action=na.omit) summary(home1.quart.gls) home1.quart.gls2 <- gls(slope~as.factor(ts)*cul*qopen, home1data.quart, correlation=corSymm(form=~1|cowid), na.action=na.omit) summary(home1.quart.gls2) anova(home1.quart.gls2) # (b) # dealing with cows by fixed effects: # simple analysis but abandons cow level variables (cul) home1.wide <- reshape(home1data, direction="wide", v.names="lnscc", timevar="day", idvar=c("cowid","side","qtr"), drop="scc") home1.wide.nonmiss <- home1.wide[home1.wide$count==7 & !is.na(home1.wide$qopen),,] attach(home1.wide.nonmiss) y <- matrix(c(lnscc.1,lnscc.2,lnscc.3,lnscc.4,lnscc.5,lnscc.6,lnscc.7),ncol=7,byrow=F) # simplest model (note that only ts and cul:ts can be estimated) m <- manova(y~ts+cul:ts+qopen*ts+qopen:ts:cul+as.factor(cowid)) summary(m,test="Wilk") summary.aov(m) cov<-crossprod(m$"residuals")/m$"df.residual" cov corr<-cov/sqrt(crossprod(t(diag(cov)))) corr # extensions summary(manova(y~ts*qopen*as.factor(cowid)-ts:qopen:as.factor(cowid)),test="Wilk") summary(manova(y~ts+cul:ts+qopen*ts*side+cul:ts:side+as.factor(cowid)),test="Wilk") # estimate full correlation within cows, but infeasible for full vector of 28 obs. => reduce to days 1-4 # unbalanced within-cow predictors like qopen cannot (easily) be assessed home1data$qtr.day <- 10*home1data$ts + home1data$qtr-2*as.numeric(home1data$qtr>2)+home1data$day*0.1 home1.wwide <- reshape(home1data[home1data$day<=4,,], direction="wide", v.names="lnscc", timevar="qtr.day", idvar="cowid", drop=c("scc","count","qtr","side","ts","qopen","day","tx","qid","id","count4")) nmis <- rep(0,29) for (i in 1:29) nmis[i] <- sum(is.na(home1.wwide[i,3:18])) home1.wwide.nonmiss <- home1.wwide[nmis==0,,] attach(home1.wwide.nonmiss) y <-matrix(c(lnscc.1.1,lnscc.1.2,lnscc.1.3,lnscc.1.4,lnscc.2.1,lnscc.2.2,lnscc.2.3,lnscc.2.4,lnscc.11.1,lnscc.11.2,lnscc.11.3,lnscc.11.4,lnscc.12.1,lnscc.12.2,lnscc.12.3,lnscc.12.4),ncol=16,byrow=F) diffqtr <- matrix(c(y[,1]-y[,5],y[,2]-y[,6],y[,3]-y[,7],y[,4]-y[,8],y[,9]-y[,13],y[,10]-y[,14],y[,11]-y[,15],y[,12]-y[,16]),ncol=8,byrow=F) summary(manova(diffqtr~1),intercept=TRUE) diffts <- matrix(c(y[,1]-y[,9],y[,2]-y[,10],y[,3]-y[,11],y[,4]-y[,12],y[,5]-y[,13],y[,6]-y[,14],y[,7]-y[,15],y[,8]-y[,16]),ncol=8,byrow=F) summary(manova(diffts~as.factor(cul)-1),test="Wilk") summary.aov(manova(diffts~as.factor(cul)-1)) summary(manova(diffts~1),intercept=T) diffday <- matrix(c(y[,1]-y[,2],y[,2]-y[,3],y[,3]-y[,4],y[,5]-y[,6],y[,6]-y[,7],y[,7]-y[,8],y[,9]-y[,10],y[,10]-y[,11],y[,11]-y[,12],y[,13]-y[,14],y[,14]-y[,15],y[,15]-y[,16]),ncol=12,byrow=F) summary(manova(diffday~1),intercept=T) difftsmean <- matrix(c(y[,1]+y[,5]-y[,9]-y[,13],y[,2]+y[,6]-y[,10]-y[,14],y[,3]+y[,7]-y[,11]-y[,15],y[,4]+y[,8]-y[,12]-y[,16]),ncol=4,byrow=F) summary(manova(difftsmean~as.factor(cul)-1),test="Wilk") summary.aov(manova(difftsmean~as.factor(cul)-1)) summary(manova(difftsmean~1),intercept=T) m <- manova(y~cul) summary(m,test="Wilk") cov<-crossprod(m$"residuals")/m$"df.residual" cov corr<-cov/sqrt(crossprod(t(diag(cov)))) corr # (c) # for repeated measures anova, we need a dataset without missing values # we can deal with cow dependencies in two ways (as in (b)): taking cows as fixed effects # or analyse at the cow level # part 1: quarter level, cows as fixed effects, abandons cul-effects home1data$qid <- home1$cowid+0.1*home1$qtr home2 <- groupedData(lnscc~day|qid, home1data[home1data$count==7 & !is.na(home1data$qopen),,]) home2.lme <- lme(lnscc~as.factor(day)*ts+ts:cul+as.factor(day):ts:cul+qopen*as.factor(day)+as.factor(cowid), random=~1|qid, data=home2) anova(home2.lme) library(MASS) tr <- function(M) sum(diag(M)) ff <- lnscc~ts+ts:cul+qopen+as.factor(cowid) x <- model.matrix(ff, model.frame(ff, home2[home2$day==1,])) home2.wide <- reshape(home2, direction="wide", v.names="lnscc", timevar="day", idvar=c("qid"), drop="scc") y2 <- matrix(c(home2.wide$lnscc.1,home2.wide$lnscc.2,home2.wide$lnscc.3,home2.wide$lnscc.4,home2.wide$lnscc.5,home2.wide$lnscc.6,home2.wide$lnscc.7),nrow=80,ncol=7) bhat <- ginv(t(x) %*% x) %*% crossprod(x,y2) # t(x) %*% y2 n <- 80 # no. of subjects p <- 7-1 # no. of time points -1 rX <- 24 # rank of X s <- crossprod(y - x %*% bhat)/(n-rX) c <- t(poly(1:7,p)) r <- c %*% s %*% t(c) w <- det(r)/(abs(tr(r)/p))^p rho <- 1-(2*p*p+p+2)/(6*p*(n-rX)) # chi-square approx from SAS/SPSS x2 <- -rho*(n-rX)*log(w) # df=p*(p+1)/2-1, but maybe more complex formula 1-pchisq(x2,p*(p+1)/2-1) eps.GG <- tr(r)^2/(p*tr(r %*% r)) eps.HF <- (n*p*eps.GG-2)/(p*(n-rX-p*eps.GG)) # strong significance, but moderate epsilon values # values agree with SAS analysis # part 2: cow level, to keep the dimension down, we restrict the analysis to days 1-4 # unbalanced within-cow predictors like qopen cannot easily be included home24 <- groupedData(lnscc~day|qid, home1data[home1data$day<=4 & home1data$count4==4,,]) home24.lme <- lme(lnscc~as.factor(day)*ts*cul, random=~1|cowid, data=home24) anova(home24.lme) home24.lme.red <- lme(lnscc~as.factor(day)+ts*cul, random=~1|cowid, data=home24) anova(home24.lme.red) home24.lme.add <- lme(lnscc~as.factor(day)+ts+cul, random=~1|cowid, data=home24) anova(home24.lme.add) x <- matrix(c(rep(1,20),home1.wwide.nonmiss$cul),nrow=20,ncol=2) rX <- 2 # rank of X bhat <- ginv(t(x) %*% x) %*% crossprod(x,y) # t(x) %*% y n <- 20 # no. of subjects # sphericity on days only, same as in Crowder & Hand p.57-58 p <- 4-1 # no. of time points -1 s <- crossprod(y - x %*% bhat)/(n-rX) c <- t(poly(rep(1:4,4),p)) r <- c %*% s %*% t(c) w <- det(r)/(abs(tr(r)/p))^p rho <- 1-(2*p*p+p+2)/(6*p*(n-rX)) # chi-square approx from SAS/SPSS x2 <- -rho*(n-rX)*log(w) # df=p*(p+1)/2-1, but maybe more complex formula 1-pchisq(x2,p*(p+1)/2-1) eps.GG <- tr(r)^2/(p*tr(r %*% r)) eps.HF <- (n*p*eps.GG-2)/(p*(n-rX-p*eps.GG)) # significance, but moderate epsilon values # sphericity on power*eye interaction, same as in Crowder & Hand p.57-58 c <- t(poly(rep(1:4,4),p))*matrix(c(rep(1,24),rep(-1,24)),nrow=3,ncol=16,byrow=FALSE) r <- c %*% s %*% t(c) w <- det(r)/(abs(tr(r)/p))^p rho <- 1-(2*p*p+p+2)/(6*p*(n-rX)) # chi-square approx from SAS/SPSS x2 <- -rho*(n-rX)*log(w) # df=p*(p+1)/2-1, but maybe more complex formula 1-pchisq(x2,p*(p+1)/2-1) eps.GG <- tr(r)^2/(p*tr(r %*% r)) eps.HF <- (n*p*eps.GG-2)/(p*(n-rX-p*eps.GG)) # no significance # (d) home1.lme.uns <- lme(lnscc~ts*cul*as.factor(day)+qopen*ts*cul+qopen:as.factor(day),random=~1|cowid/side, correlation=corSymm(form=~day|cowid/side/qtr),weights=varIdent(form=~1|day),data=home1,na.action=na.omit) # not useful: home1.lme.altuns <- update(home1.lme.uns,random=~1|cowid/side/quarter) summary(home1.lme.uns) anova(home1.lme.uns) home1.lme.unshom <- update(home1.lme.uns, weights=NULL) summary(home1.lme.unshom) anova(home1.lme.uns,home1.lme.unshom) # clear evidence of different variances across days home1.lme.arma11 <- update(home1.lme.uns,correlation=corARMA(c(0.5,0),form=~day|cowid/side/qtr,p=1,q=1)) home1.lme.arma21 <- update(home1.lme.uns,correlation=corARMA(c(0.5,0,0),form=~day|cowid/side/qtr,p=2,q=1)) home1.lme.arma12 <- update(home1.lme.uns,correlation=corARMA(c(0.5,0,0),form=~day|cowid/side/qtr,p=1,q=2)) anova(home1.lme.uns,home1.lme.arma21,home1.lme.arma12,home1.lme.arma11) # no simple reduction of the correlation structure home1.lme.ranslp <- update(home1.lme.uns, random=~day|cowid/side/qtr) # weak evidence only of random slopes home1.lme1.uns <- update(home1.lme.uns,lnscc~ts*cul+ts*as.factor(day)+qopen*ts+qopen*cul+qopen:as.factor(day)) anova(home1.lme1.uns) home1.lme2.uns <- update(home1.lme.uns,lnscc~ts+ts*as.factor(day)+qopen*as.factor(day)) anova(home1.lme2.uns) home1.lme2.arma11 <- update(home1.lme2.uns,correlation=corARMA(c(0.5,0),form=~day|cowid/side/qtr,p=1,q=1)) anova(home1.lme2.arma11) # neither of the final models give any evidence for ts*day, contradicting the univariate analyses