This document (nearly) duplicates the examples from Johnson and Kuhn (2014). Minor differences are due to the use of R Markdown.
library(ltbayes)
library(ggplot2)
library(gridExtra)
library(reshape2)
Trace plot (a) and estimated posterior density (b) for \(\zeta\) based on 5000 simulated realizations from the posterior distribution.
samp <- 5000
burn <- 1000
alph <- c(1.27,1.34,1.14,1,0.67)
beta <- c(1.19,0.59,0.15,-0.59,-2)
gamm <- c(0.1,0.15,0.15,0.2,0.1)
zeta <- postsamp(fmodel3pl, c(0,0,1,1,1),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn, scale = 3))
zeta <- data.frame(sample = 1:samp,
zeta = zeta$batch[(burn + 1):(samp + burn)])
p <- ggplot(zeta, aes(x = sample, y = zeta))
p <- p + geom_line() + ylab(expression(zeta)) + xlab("Sample")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ggtitle("(a)\n")
p1 <- p
p <- ggplot(zeta, aes(x = zeta)) + geom_density(adjust = 2) + coord_flip()
p <- p + xlab(expression(zeta)) + ylab("Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ggtitle("(b)\n")
p2 <- p
grid.arrange(p1, p2, nrow = 1)
Violin plots for 5000 simulated realizations from the posterior distribution of \(\zeta\) given each of 32 possible response patterns, stratified by sum score. Points and line segments within each violin plot represent the posterior mean and 95% credibility interval, respectively.
y <- patterns(5, 2, 0:5)
zeta <- vector("list", length = 32)
for (i in 1:32) {
zeta[[i]] <- postsamp(fmodel3pl, y[i,], apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))$batch[(burn + 1):(samp + burn)]
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$pattern <- rep(apply(y, 1, paste, collapse = ""), each = samp)
zeta$sum <- rep(apply(y, 1, sum), each = samp)
p <- ggplot(zeta, aes(x = pattern, y = zeta))
p <- p + geom_violin(trim = FALSE)
p <- p + facet_grid(. ~ sum, scales = "free_x", space = "free_x")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15)) + xlab("Response Pattern")
p <- p + theme(axis.text.x = element_text(size = 10, angle = 45,
vjust = 0.5, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ylab(expression(zeta)) + xlab("Reponse Pattern")
p <- p + stat_summary(fun.y = mean,
fun.ymin = function(x) quantile(x, probs = 0.025),
fun.ymax = function(x) quantile(x, probs = 0.975), size = 0.5)
plot(p)
Violin plots (a) and empirical cumulative distribution functions (b) for 5000 simulated realizations from the posterior distribution of \(\zeta\) conditional on sum score. Points and line segments within each violin plot represent the posterior mean and 95% credibility interval, respectively.
zeta <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
zeta[[i]] <- postsamp(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta[[i]] <- zeta[[i]]$batch[(burn+1):(samp + burn)]
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$sum <- factor(rep(0:5, each = samp))
p <- ggplot(zeta, aes(x = zeta, color = sum, linetype = sum)) + stat_ecdf()
p <- p + ylab("") + xlim(c(-3,3)) + guides(color = guide_legend(title = "Sum"))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ylab("") + xlab(expression(zeta))
p <- p + guides(linetype = guide_legend(title = "Sum"))
p <- p + ggtitle("(b)\n")
p1 <- p
p <- ggplot(zeta, aes(x = sum, y = zeta))
p <- p + geom_violin(trim = FALSE)
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15)) + xlab("Sum Score")
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black")) + ylab(expression(zeta))
p <- p + stat_summary(fun.y = mean,
fun.ymin = function(x) quantile(x, probs = 0.025),
fun.ymax = function(x) quantile(x, probs = 0.975), size = 0.5)
p <- p + ggtitle("(a)\n")
p2 <- p
grid.arrange(p2, p1, nrow = 1)
## Warning: Removed 190 rows containing non-finite values (stat_ecdf).
Estimated density functions and empirical cumulative distribution functions for the posterior distributions of \(\zeta\) given \(\tilde{Y} < \tilde{y}\) (lower interval) versus \(\tilde{Y} \ge \tilde{y}\) (upper interval) for \(c = 1, 2, \dots, 5\).
zetal <- vector("list", length = 5)
zetau <- vector("list", length = 5)
for (i in 1:5) {
zetal[[i]] <- postsamp(fmodel3pl, patterns(5, 2, 0:(i-1)),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zetau[[i]] <- postsamp(fmodel3pl, patterns(5, 2, i:5),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zetal[[i]] <- zetal[[i]]$batch[(burn+1):(samp + burn)]
zetau[[i]] <- zetau[[i]]$batch[(burn+1):(samp + burn)]
}
zetal <- data.frame(zeta = unlist(zetal))
zetau <- data.frame(zeta = unlist(zetau))
zetal$interval <- "Lower"
zetau$interval <- "Upper"
zetal$cut <- rep(c("y1","y2","y3","y4","y5"), each = samp)
zetau$cut <- rep(c("y1","y2","y3","y4","y5"), each = samp)
zeta <- rbind(zetal, zetau)
p <- ggplot(zeta, aes(x = zeta)) + geom_density(aes(linetype = interval),
adjust = 2, show.legend = FALSE)
p <- p + facet_wrap(~ cut, ncol = 5) + xlim(c(-4,4)) + ylab("") + xlab(expression(zeta))
p <- p + stat_ecdf(aes(linetype = interval))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + scale_linetype_manual(values = c(6,1), name = "Interval")
plot(p)
Posterior probabilities, conditional on sum score (\(\tilde{y}\)), of \(\zeta\) falling into one of four consecutive intervals defined as \(R_1 = \{\zeta | \zeta < Q_1\}\), \(R_2 = \{\zeta|Q_1 < \zeta < Q_2\}\), \(R_3 = \{\zeta|Q_2 < \zeta < Q_3\}\), and \(R_4 = \{\zeta|\zeta > Q_4\}\) where \(Q_1\), \(Q_2\), and \(Q_3\) are the quartiles of the prior distribution of \(\zeta\).
zeta <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
zeta[[i]] <- postsamp(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta[[i]] <- zeta[[i]]$batch[(burn + 1):(samp + burn)]
zeta[[i]] <- table(cut(zeta[[i]], c(-Inf, qnorm(c(0.25,0.5,0.75)), Inf)))
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$quartile = factor(rep(1:4, 6))
levels(zeta$quartile) <- c("First","Second","Third","Fourth")
zeta$zeta <- zeta$zeta/samp * 100
zeta$sum <- factor(rep(0:5, each = 4))
zeta$text <- paste(round(zeta$zeta, 1), "%", sep = "")
zeta$side <- factor(ifelse(zeta$zeta > 50, 1, 0))
p <- ggplot(zeta, aes(x = sum, y = quartile, fill = zeta)) + geom_tile()
p <- p + scale_fill_gradient(name = "Probability (%)", low = grey(0.9),
high = grey(0.1), breaks = c(0, 50, 100), limits = c(0, 100))
p <- p + geom_text(aes(label = text, color = side))
p <- p + scale_color_manual(values = c("black","white"), guide = FALSE)
p <- p + theme_bw() + coord_fixed()
p <- p + theme(text = element_text(size = 10))
p <- p + theme(axis.text = element_text(color = "black"))
p <- p + ylab("") + xlab("Sum")
plot(p)
Posterior probabilities of the form \(P(\zeta_A > \zeta_B|\tilde{Y}_A = \tilde{y}_A, \tilde{Y}_B = \tilde{y}_B)\) — i.e., the posterior probability that examinee A has a larger latent score than examinee B given their sum scores.
prob <- matrix(NA, 6, 6)
for (j in 1:6) {
for (i in j:6) {
if (i == j) {
prob[i,j] <- 0.5
}
else {
zeta1 <- postsamp(fmodel3pl, patterns(5, 2, i-1),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta2 <- postsamp(fmodel3pl, patterns(5, 2, j-1),
apar = alph, bpar = beta, cpar = gamm,
control = list(nbatch = samp + burn))
zeta1 <- zeta1$batch[(burn + 1):(samp + burn)]
zeta2 <- zeta2$batch[(burn + 1):(samp + burn)]
prob[i,j] <- mean(zeta1 > zeta2)
prob[j,i] <- 1 - prob[i,j]
}
}
}
prob <- data.frame(p = as.vector(prob), sumr = factor(rep(0:5, 6)),
sumc = factor(rep(0:5, each = 6)))
prob$p <- prob$p * 100
prob$text <- paste(round(prob$p, 1), "%", sep = "")
prob$side <- factor(ifelse(prob$p > 50, 1, 0))
p <- ggplot(prob, aes(x = sumc, y = sumr, fill = p)) + geom_tile()
p <- p + scale_fill_gradient(name = "Probability (%)", low = grey(0.9),
high = grey(0.1), breaks = c(0, 50, 100), limits = c(0, 100))
p <- p + geom_text(aes(label = text, color = side))
p <- p + scale_color_manual(values = c("black","white"), guide = FALSE)
p <- p + theme_bw() + coord_fixed()
p <- p + theme(text = element_text(size = 10))
p <- p + theme(axis.text = element_text(color = "black"))
p <- p + ylab("Examinee A Sum") + xlab("Examinee B Sum")
plot(p)
Profiles of the unnormalized log-posterior distribution of \(\zeta\) given \(\tilde{Y} = 0, 1, \dots, 5\) with normal and improper uniform prior distributions.
post <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
post[[i]] <- posttrace(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
zmin = -5, zmax = 5, length = 100)
post[[i]] <- cbind(post[[i]]$zeta, post[[i]]$post)
}
post <- data.frame(do.call("rbind", post))
names(post) <- c("zeta","post")
post$sum <- factor(rep(0:5, each = 100))
post.norm <- post
post.norm$prior <- "normal"
post <- vector("list", length = 6)
for (i in 1:6) {
y <- patterns(5, 2, i-1)
post[[i]] <- posttrace(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
zmin = -5, zmax = 5, prior = function(z) 1, length = 100)
post[[i]] <- cbind(post[[i]]$zeta, post[[i]]$post)
}
post <- data.frame(do.call("rbind", post))
names(post) <- c("zeta","post")
post$sum <- factor(rep(0:5, each = 100))
post.unif <- post
post.unif$prior = "uniform"
post <- rbind(post.norm, post.unif)
names(post)[4] <- "Prior"
p <- ggplot(post, aes(x = zeta, y = post, linetype = Prior))
p <- p + geom_line() + facet_wrap(~ sum)
p <- p + xlab(expression(zeta)) + ylab("Log-Posterior Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
plot(p)
Profile likelihoods for \(\zeta\) given \(\tilde{Y} = 2,3,4\) with maximum likelihood estimates and profile likelihood confidence intervals.
post <- post[post$Prior == "uniform",]
post <- post[post$sum %in% c(2,3,4),]
y2 <- profileci(fmodel3pl, patterns(5, 2, 2), apar = alph,
bpar = beta, cpar = gamm, lower = FALSE)
y3 <- profileci(fmodel3pl, patterns(5, 2, 3), apar = alph,
bpar = beta, cpar = gamm)
y4 <- profileci(fmodel3pl, patterns(5, 2, 4), apar = alph,
bpar = beta, cpar = gamm)
names(post)[3] <- "Sum"
p <- ggplot(post, aes(x = zeta, y = post, linetype = Sum))
p <- p + geom_line() + ylim(c(-14,0))
p <- p + xlab(expression(zeta)) + ylab("Log-Posterior Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 20))
p <- p + theme(axis.text.x = element_text(size = 15, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + annotate("segment", x = y2$zeta, xend = y2$zeta,
y = -14, yend = y2$post, linetype = 1)
p <- p + annotate("segment", x = y2$upper, xend = y2$upper,
y = -14, yend = y2$f.upper, linetype = 1)
p <- p + annotate("segment", x = y3$zeta, xend = y3$zeta,
y = -14, yend = y3$post, linetype = 3)
p <- p + annotate("segment", x = y3$lower, xend = y3$lower,
y = -14, yend = y3$f.lower, linetype = 3)
p <- p + annotate("segment", x = y3$upper, xend = y3$upper,
y = -14, yend = y3$f.upper, linetype = 3)
p <- p + annotate("segment", x = y4$zeta, xend = y4$zeta,
y = -14, yend = y4$post, linetype = 2)
p <- p + annotate("segment", x = y4$lower, xend = y4$lower,
y = -14, yend = y4$f.lower, linetype = 2)
p <- p + annotate("segment", x = y4$upper, xend = y4$upper,
y = -14, yend = y4$f.upper, linetype = 2)
plot(p)
Item information functions for five items of a 3-parameter binary logistic model.
zeta <- seq(-3, 3, length = 100)
info <- matrix(NA, 100, 5)
for (j in 1:100) {
info[j,] <- information(fmodel3pl, c(0,1,1,1,1), zeta = zeta[j],
apar = alph, bpar = beta, cpar = gamm)$item
}
matplot(zeta, info, type = "l", ylab = "Information", bty = "n", xlab = expression(zeta))
legend(-3, 0.3, paste("Item", 1:5), lty = 1:5, col = 1:5)
Posterior distribution of \(\zeta\) for the hyperbolic cosine model given \(\tilde{y} = 1\) (a), \(y' = (1,0,0,0,0)\) (b), \(y' = (1,1,1,0,0)\) ©, and \(y' = (0,1,1,1,0)\) (d).
fmodelhcm <- function(zeta, y, alph, beta, prior = dnorm, ...) {
if (is.vector(y)) y <- matrix(y, 1, length(y))
m <- ncol(y)
n <- nrow(y)
prob <- matrix(NA, m, 2)
yprb <- matrix(NA, n, m)
prob[,1] <- 2*cosh(zeta - beta)
prob[,2] <- exp(alph)
prob <- sweep(prob, 1, apply(prob, 1, sum), "/")
for (i in 1:n) {
yprb[i,] <- prob[row(prob) == 1:m & col(prob) == y[i,] + 1]
}
return(list(post = log(sum(apply(yprb, 1, prod)))
+ log(prior(zeta, ...)), prob = prob))
}
dmixnorm <- function(z, pi, m1, m2, s1, s2) {
return(pi*dnorm(z, m1, s1) + (1 - pi)*dnorm(z, m2, s2))
}
apar <- c(1,1,1,1,1)
bpar <- c(-2,-1,0,1,2)
n <- 10000
tmp.1 <- postsamp(fmodelhcm, patterns(5,2,1), alph = apar, beta = bpar, prior = dmixnorm,
m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))
tmp.2 <- postsamp(fmodelhcm, c(1,0,0,0,0), alph = apar, beta = bpar, prior = dmixnorm,
m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))
tmp.3 <- postsamp(fmodelhcm, c(1,1,1,0,0), alph = apar, beta = bpar, prior = dmixnorm,
m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))
tmp.4 <- postsamp(fmodelhcm, c(0,1,1,1,0), alph = apar, beta = bpar, prior = dmixnorm,
m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))
tmp <- data.frame(zeta = c(tmp.1$batch, tmp.2$batch, tmp.3$batch, tmp.4$batch),
Pattern = rep(letters[1:4], times = c(n,n,n,n*5)))
p <- ggplot(tmp, aes(x = zeta, color = Pattern, linetype = Pattern))
p <- p + geom_density(adjust = 2)
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 20))
p <- p + theme(axis.text.x = element_text(size = 15, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + xlab(expression(zeta)) + ylab("Posterior Density")
plot(p)
Item and test Fisher information functions for five items of a hyperbolic cosine model.
zeta <- seq(-3, 3, length = 100)
info <- matrix(NA, 100, 5+1)
for (j in 1:100) {
tmp <- information(fmodelhcm, c(0,0,0,0,0), zeta = zeta[j], alph = apar, beta = bpar)
info[j,] <- c(tmp$test, tmp$item)
}
info <- data.frame(cbind(zeta, info))
names(info) <- c("zeta","test",paste("item", 1:5))
info <- melt(info, id.vars = "zeta", variable.name = "Type", value.name = "information")
p <- ggplot(info, aes(x = zeta, y = information, color = Type, linetype = Type))
p <- p + geom_line() + ylab("Information") + xlab(expression(zeta))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text = element_text(size = 15))
plot(p)
Johnson, T. R. & Kuhn, K. M. (2015). Simulation-Based Bayesian Inference for Latent Traits of Item Response Models: Introduction to the ltbayes Package for R. Behavior Research Methods, 47, 1309–1327.