Skip to content

Commit c776c9a

Browse files
committed
Add sbc_hist
#146
1 parent 39e36f5 commit c776c9a

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

Diff for: NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ export(ppc_stat_freqpoly_grouped)
126126
export(ppc_stat_grouped)
127127
export(ppc_violin_grouped)
128128
export(rhat)
129+
export(sbc_hist)
129130
export(scatter_style_np)
130131
export(theme_default)
131132
export(trace_style_np)

Diff for: R/sbc.R

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
klUniform <- function(v, numPriorDraws, samplesPerPrior) {
2+
# D_{KL}(v || uniform)
3+
# https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
4+
expectedPr <- 1.0/samplesPerPrior
5+
observedPr <- table(v) / numPriorDraws
6+
sum(observedPr * log(observedPr/expectedPr))
7+
}
8+
9+
#' Histograms for Simulation Based Calibration
10+
#'
11+
#' @param ranks A list of sampling realizations.
12+
#' @param thin An integer vector of length one indicating the thinning interval
13+
#' when plotting
14+
#' @param perBin Number of histogram entries to combine into a single bar.
15+
#' @param worst If NA, plots all parameters. Otherwise how many parameters to show.
16+
#' Parameters are ordered by the degree of non-uniformity.
17+
#' @param alpha Uncertainty interval probability for a false positive (alpha level).
18+
#' @param hideAxes Whether to hide the plot axes.
19+
#'
20+
#' Each list element of \code{ranks} should be a matrix of rank
21+
#' comparison results (encoded as 0 or 1) associated with a single
22+
#' draw from the prior distribution. Each draw from the posterior is
23+
#' in the row and parameters are in columns. The matrix should have
24+
#' column names to correctly label the parameters.
25+
#'
26+
#' So that the histograms consist of independent realizations,
27+
#' draws from the posterior should be thinned to remove
28+
#' autocorrelation. Set \code{thin} such that the number of
29+
#' draws approximately matches the effective sample size.
30+
#'
31+
#' For best results, one plus the number of draws from the posterior
32+
#' should be evenly divisible by the number of histogram bins after
33+
#' thinning. For example, 511 draws after thinning results in 128
34+
#' draws. If perBin is set to 4 then 32 histogram bars are drawn.
35+
#'
36+
#' @template return-ggplot
37+
#'
38+
#' @references
39+
#' Talts, S., Betancourt, M., Simpson, D., Vehtari, A., and Gelman, A. (2018).
40+
#' Validating Bayesian Inference Algorithms with Simulation-Based Calibration.
41+
#' arXiv preprint arXiv:1804.06788. \url{https://arxiv.org/abs/1804.06788}
42+
#' @seealso
43+
#' \link[rstan]{sbc}
44+
#' @examples
45+
#' pars <- paste0('parameter',1:2)
46+
#' samplesPerPrior <- 511
47+
#' ranks <- list()
48+
#' for (px in 1:500) {
49+
#' r1 <- matrix(0, nrow=samplesPerPrior, ncol=length(pars),
50+
#' dimnames=list(NULL, pars))
51+
#' for (p1 in 1:length(pars)) {
52+
#' r1[sample.int(samplesPerPrior,
53+
#' floor(runif(1, 0, samplesPerPrior))), p1] <- 1
54+
#' }
55+
#' ranks[[px]] <- r1
56+
#' }
57+
#' sbc_hist(ranks)
58+
#' @export
59+
60+
sbc_hist <- function(ranks, thin = 4, perBin=4, worst=16, ...,
61+
alpha = 0.01, hideAxes=TRUE) {
62+
numPriorDraws <- length(ranks)
63+
thinner <- seq(from = 1, to = nrow(ranks[[1]]), by = thin)
64+
samplesPerPrior <- length(thinner)
65+
u <- t(sapply(ranks, FUN = function(r) 1 + colSums(r[thinner, , drop = FALSE])))
66+
if (ncol(ranks[[1]]) == 1) {
67+
u <- t(u)
68+
dimnames(u) <- list(NULL, colnames(ranks[[1]]))
69+
}
70+
71+
if (!is.na(worst)) {
72+
kl <- apply(u, 2, function(v) klUniform(v, numPriorDraws, samplesPerPrior))
73+
filter <- order(-kl)[1:min(worst,ncol(u))]
74+
# print(filter)
75+
u <- u[, filter, drop=FALSE ]
76+
}
77+
78+
parameter <- ordered(rep(colnames(u), each = nrow(u)),
79+
levels=colnames(u))
80+
d <- data.frame(u = c(u), parameter)
81+
if (samplesPerPrior %% perBin != 0) {
82+
warning(paste("perBin (", perBin, ") does not evenly divide the",
83+
"number of samples per prior (",samplesPerPrior,")"))
84+
}
85+
numBins <- samplesPerPrior/perBin
86+
CI <- qbinom(c(alpha/2,0.5,1-alpha/2), numPriorDraws, numBins^-1) + c(-.5,0,.5)
87+
offset <- perBin*2
88+
pl <- ggplot(d, aes(x = u)) +
89+
geom_polygon(data=data.frame(x=c(-offset,0,-offset,samplesPerPrior + offset,
90+
samplesPerPrior, samplesPerPrior + offset,-offset),
91+
y=c(CI[1],CI[2],CI[3],CI[3],CI[2],CI[1],CI[1])),
92+
aes(x=x,y=y),fill="grey45",color="grey25",alpha=0.5) +
93+
geom_histogram(bins=numBins, na.rm=TRUE) +
94+
# xlim(1,samplesPerPrior) +
95+
# https://github.com/tidyverse/ggplot2/issues/3332
96+
facet_wrap("parameter") +
97+
geom_hline(yintercept=CI[1], color='green', linetype="dotted", alpha=.5) +
98+
geom_hline(yintercept=CI[3], color='green', linetype="dotted", alpha=.5)
99+
if (hideAxes) {
100+
pl <- pl + theme(axis.text.x=element_blank(),
101+
axis.text.y=element_blank(),axis.ticks=element_blank(),
102+
axis.title.x=element_blank(),
103+
axis.title.y=element_blank())
104+
}
105+
pl
106+
}

0 commit comments

Comments
 (0)