--- title: "Example generalized linear mixed model analysis with different packages" author: "Kevin Wright" output: rmarkdown::html_vignette: md_extensions: -autolink_bare_uris vignette: > %\VignetteIndexEntry{Example generalized linear mixed model analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, setup} library(agridat) data(crowder.seeds) dat <- crowder.seeds ``` ```{r, plot} libs(lattice) dotplot(germ/n~gen|extract, dat, main="crowder.seeds", xlab="gen") ``` The response variable on the vertical axis is the germination percent. There are two predictive factors, `gen` (O73, O75) and `extract` (bean/cucumber). Each dot is for a separate plate. # GLM (not mixed--no random plate) ### brms It takes a minute or so to compile Stan program... Note, in Emacs brms ends the R process for some reason! ```{r,brms, eval=FALSE} if(require(brms)){ m1.brms <- brms::brm( germ|trials(n)~ gen*extract, data = dat, family = binomial, chains=3, iter=3000, warmup=1000) summary(m1.brms) # round( summary(m1.brms)$fixed[,1:4] , 2) # Estimate Est.Error l-95% CI u-95% CI # Intercept -0.42 0.18 -0.77 -0.06 # genO75 -0.14 0.22 -0.56 0.29 # extractcucumber 0.55 0.25 0.07 1.05 # genO75:extractcucumber 0.77 0.30 0.18 1.36 } ``` ### glm ```{r,glm,eval=FALSE} # ----- GLM. # family=binomial() fixes dispersion at 1 # family=quasibinomial() estimates dispersion, had larger std errors m1.glm <- glm(cbind(germ,n-germ) ~ gen*extract, data=dat, #family="binomial", family=quasibinomial() ) summary(m1.glm) ## round(summary(m1.glm)$coef,2) ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.41 0.25 -1.64 0.12 ## genO75 -0.15 0.30 -0.48 0.64 ## extractcucumber 0.54 0.34 1.58 0.13 ## genO75:extractcucumber 0.78 0.42 1.86 0.08 ``` ### rstan ```{r, eval=FALSE} # ----- Stan using pre-built models from rstanarm libs(tidyverse, rstan, rstanarm,bayesplot) set.seed(42) m1.stan <- stan_glm( cbind(germ,n-germ) ~ gen*extract, data=dat, family = binomial(link="logit") ) summary(m1.stan) ## round(posterior_interval(m1.stan, prob=.90),3) # 5% 95% # (Intercept) -0.728 -0.115 # genO75 -0.506 0.243 # extractcucumber 0.133 0.947 # genO75:extractcucumber 0.255 1.267 libs(bayesplot) mcmc_areas(m1.stan, prob = 0.9) + ggtitle("Posterior distributions", "with medians and 95 pct intervals") ``` # Generalized Linear Mixed Model ### asreml ```{r,asreml,eval=FALSE} if(require(asreml)){ m1.asreml <- asreml(germ ~ gen*extract, data=dat, random= ~ plate, family=asr_binomial(dispersion=1, total=n)) summary(m1.asreml) ## ## effect ## (Intercept) -0.47 ## gen_O73 0.00 ## gen_O75 -0.08 ## extract_bean 0.00 ## extract_cucumber 0.51 ## gen_O73:extract_bean 0.00 ## gen_O73:extract_cucumber 0.00 ## gen_O75:extract_bean 0.00 ## gen_O75:extract_cucumber 0.83 } ``` ### MASS::glmmPQL ```{r,glmmpql,eval=FALSE} # --- GLMM. Assumes Gaussian random effects libs(MASS) m1.glmm <- glmmPQL(cbind(germ, n-germ) ~ gen*extract, random= ~1|plate, family=binomial(), data=dat) summary(m1.glmm) ## round(summary(m1.glmm)$tTable,2) ## Value Std.Error DF t-value p-value ## (Intercept) -0.44 0.25 17 -1.80 0.09 ## genO75 -0.10 0.31 17 -0.34 0.74 ## extractcucumber 0.52 0.34 17 1.56 0.14 ## genO75:extractcucumber 0.80 0.42 17 1.88 0.08 ``` ### glmmTMB ```{r,glmmtmb,eval=FALSE} libs(glmmTMB) m1.glmmtmb <- glmmTMB(cbind(germ, n-germ) ~ gen*extract + (1|plate), data=dat, family=binomial) round(summary(m1.glmmtmb)$coefficients$cond , 2) ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) -0.45 0.22 -2.03 0.04 ## genO75 -0.10 0.28 -0.35 0.73 ## extractcucumber 0.53 0.30 1.74 0.08 ## genO75:extractcucumber 0.81 0.38 2.11 0.04 ``` ### hglm ```{r,hglm,eval=FALSE} # ----- HGML package. Beta-binomial with beta-distributed random effects if(require(hglm)){ m1.hglm <- hglm(fixed= germ/n ~ I(gen=="O75")*extract, weights=n, data=dat, random=~1|plate, family=binomial(), rand.family=Beta(), fix.disp=1) summary(m1.hglm) # round(summary(m1.hglm)$FixCoefMat,2) ## Estimate Std. Error t-value Pr(>|t|) ## (Intercept) -0.47 0.24 -1.92 0.08 ## I(gen == "O75")TRUE -0.08 0.31 -0.25 0.81 ## extractcucumber 0.51 0.33 1.53 0.16 ## I(gen == "O75")TRUE:extractcucumber 0.83 0.43 1.92 0.08 } ``` ### INLA See: https://haakonbakka.bitbucket.io/btopic102.html ```{r,inla,eval=FALSE} if(require(INLA)){ #gen,extract are fixed. plate is a random effect #Priors for hyper parameters. See: inla.doc("pc.prec") hyper1 = list(theta = list(prior="pc.prec", param=c(1,0.01))) m1.inla = inla(germ ~ gen*extract + f(plate, model="iid", hyper=hyper1), data=crowder.seeds, family="binomial", Ntrials=n, control.family=list(control.link=list(model="logit"))) round( summary(m1.inla)$fixed, 2) ## mean sd 0.025quant 0.5quant 0.975quant mode kld ## (Intercept) -0.47 0.24 -0.96 -0.46 0.00 -0.46 0 ## genO75 -0.08 0.31 -0.68 -0.09 0.54 -0.09 0 ## extractcucumber 0.53 0.33 -0.13 0.53 1.18 0.53 0 ## genO75:extractcucumber 0.82 0.43 -0.01 0.82 1.69 0.82 0 } ``` ### rjags Requires JAGS to be installed. ```{r,rjags,eval=FALSE} # JAGS/BUGS. See https://mathstat.helsinki.fi/openbugs/Examples/Seeds.html # Germination rate depends on p, which is a logit of a linear predictor # based on genotype and extract, plus random deviation to intercept # To match the output on the BUGS web page, use: dat$gen=="O73". # We use dat$gen=="O75" to compare with the parameterization above. jdat =list(germ = dat$germ, n = dat$n, root = as.numeric(dat$extract=="cucumber"), gen = as.numeric(dat$gen=="O75"), nobs = nrow(dat)) jinit = list(int = 0, genO75 = 0, extcuke = 0, g75ecuke = 0, tau = 10) # Use logical names (unlike BUGS documentation) mod.bug = "model { for(i in 1:nobs) { germ[i] ~ dbin(p[i], n[i]) b[i] ~ dnorm(0.0, tau) logit(p[i]) <- int + genO75 * gen[i] + extcuke * root[i] + g75ecuke * gen[i] * root[i] + b[i] } int ~ dnorm(0.0, 1.0E-6) genO75 ~ dnorm(0.0, 1.0E-6) extcuke ~ dnorm(0.0, 1.0E-6) g75ecuke ~ dnorm(0.0, 1.0E-6) tau ~ dgamma(0.001, 0.001) sigma <- 1 / sqrt(tau) }" libs(rjags) oo <- textConnection(mod.bug) j1 <- jags.model(oo, data=jdat, inits=jinit, n.chains=1) close(oo) c1 <- coda.samples(j1, c("int","genO75","g75ecuke","extcuke","sigma"), n.iter=20000) summary(c1) # Medians are very similar to estimates from hglm # libs(lucid) # print(vc(c1),3) ## Mean SD 2.5% Median 97.5% ## extcuke 0.543 0.331 -0.118 0.542 1.2 ## g75ecuke 0.807 0.436 -0.0586 0.802 1.7 ## genO75 -0.0715 0.309 -0.665 -0.0806 0.581 ## int -0.479 0.241 -0.984 -0.473 -0.0299 ## sigma 0.289 0.142 0.0505 0.279 0.596 # Plot observed data with HPD intervals for germination probability c2 <- coda.samples(j1, c("p"), n.iter=20000) hpd <- HPDinterval(c2)[[1]] med <- summary(c2, quantiles=.5)$quantiles fit <- data.frame(med, hpd) libs(latticeExtra) obs <- dotplot(1:21 ~ germ/n, dat, main="crowder.seeds", ylab="plate", col=as.numeric(dat$gen), pch=substring(dat$extract,1)) obs + segplot(1:21 ~ lower + upper, data=fit, centers=med) ``` ### R2jags ```{r,R2jags,eval=FALSE} libs("agridat") libs("R2jags") dat <- crowder.seeds # To match the output on the BUGS web page, use: dat$gen=="O73". # We use dat$gen=="O75" to compare with the parameterization above. jdat =list(germ = dat$germ, n = dat$n, root = as.numeric(dat$extract=="cucumber"), gen = as.numeric(dat$gen=="O75"), nobs = nrow(dat)) jinit = list(list(int = 0, genO75 = 0, extcuke = 0, g75ecuke = 0, tau = 10)) mod.bug = function() { for(i in 1:nobs) { germ[i] ~ dbin(p[i], n[i]) b[i] ~ dnorm(0.0, tau) logit(p[i]) <- int + genO75 * gen[i] + extcuke * root[i] + g75ecuke * gen[i] * root[i] + b[i] } int ~ dnorm(0.0, 1.0E-6) genO75 ~ dnorm(0.0, 1.0E-6) extcuke ~ dnorm(0.0, 1.0E-6) g75ecuke ~ dnorm(0.0, 1.0E-6) tau ~ dgamma(0.001, 0.001) sigma <- 1 / sqrt(tau) } parms <- c("int","genO75","g75ecuke","extcuke","sigma") j1 <- jags(data=jdat, inits=jinit, parms, model.file=mod.bug, n.iter=20000, n.chains=1) print(j1) ## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% ## extcuke 0.519 0.325 -0.140 0.325 0.531 0.728 1.158 ## g75ecuke 0.834 0.429 -0.019 0.552 0.821 1.101 1.710 ## genO75 -0.096 0.305 -0.670 -0.295 -0.115 0.089 0.552 ## int -0.461 0.236 -0.965 -0.603 -0.455 -0.312 0.016 ## sigma 0.255 0.148 0.033 0.140 0.240 0.352 0.572 ## deviance 103.319 7.489 90.019 98.010 102.770 108.689 117.288 traceplot(as.mcmc(j1)) densityplot(as.mcmc(j1)) HPDinterval(as.mcmc(j1)) } ```