Skip to content

Commit e0c69f5

Browse files
base -> ggplot2
1 parent 8582011 commit e0c69f5

15 files changed

+416
-608
lines changed

02-data-design.Rmd

+8-8
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ for (i in 1:nrow(s)) {
183183
sub[j] <- TRUE
184184
}
185185
}
186-
points(S, col = COL[1, 4 - 2 * sub], pch = 20)
186+
points(S, col = IMSCOL["blue", 4 - 2 * sub], pch = 20)
187187
text(0.5, 1, "all graduates", pos = 3, cex = 1.3)
188188
lines(
189189
(x - 0.5) * 2 * sqrt(0.07) + 0.55,
@@ -199,14 +199,14 @@ lines((x - 0.5) / 2 + 1.5,
199199
200200
SS <- (S[N, ] - 0.5) / 2 + 0.5
201201
these <- c(2, 5, 10, 12, 20, 21, 22, 23, 1, 8)
202-
points(SS[these, 1] + 1, SS[these, 2], col = COL[4, 2], pch = 20, cex = 1.5)
202+
points(SS[these, 1] + 1, SS[these, 2], col = IMSCOL["red", "f1"], pch = 20, cex = 1.5)
203203
text(1.5, 0.75, "sample", pos = 3, cex = 1.3)
204204
205205
for (i in these) {
206206
arrows(S[N[i], 1], S[N[i], 2],
207207
SS[i, 1] + 1 - 0.03, SS[i, 2],
208208
length = 0.08,
209-
col = COL[5],
209+
col = IMSCOL["black", "full"],
210210
lwd = 1.5
211211
)
212212
}
@@ -272,7 +272,7 @@ for (i in 1:nrow(s)) {
272272
sub[j] <- TRUE
273273
}
274274
}
275-
points(S, col = COL[1, 4 - 2 * sub], pch = 20)
275+
points(S, col = IMSCOL["blue", 4 - 2 * sub], pch = 20)
276276
text(0.5, 1, 'population of interest', pos = 3, cex = 1.3)
277277
lines((x - 0.5) * 2 * sqrt(0.115) + 0.35,
278278
(y - 0.5) * 2 * sqrt(0.115) + 0.55)
@@ -287,7 +287,7 @@ SS <- (S[N, ] - 0.5) / 2 + 0.5
287287
these <- c(2, 5, 6, 7, 15)
288288
points(SS[these, 1] + 1,
289289
SS[these, 2],
290-
col = COL[4, 2],
290+
col = IMSCOL["red", "f1"],
291291
pch = 20,
292292
cex = 1.5)
293293
text(1.5, 0.75, 'sample', pos=3, cex = 1.3)
@@ -298,7 +298,7 @@ for(i in these){
298298
SS[i, 1] + 1 - 0.03,
299299
SS[i, 2],
300300
length=0.08,
301-
col=COL[5],
301+
col=IMSCOL["black", "full"],
302302
lwd=1.5)
303303
}
304304
rect(0.145, 0.195, 0.775, 0.11,
@@ -381,9 +381,9 @@ arrows(0.67, 0.78, 0.8, 0.62,
381381
length = 0.08, lwd = 1.5)
382382
383383
arrows(0.5, 0.5, 0.69, 0.5,
384-
length = 0.08, col = COL[6,2])
384+
length = 0.08, col = IMSCOL["gray", "f1"])
385385
text(0.595, 0.565, "?",
386-
cex = 1.5, col = COL[4])
386+
cex = 1.5, col = IMSCOL["red", "full"])
387387
par(par_og) # restore original par
388388
```
389389

05-explore-numerical.Rmd

+27-19
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Looking at Figure \@ref(fig:loan50-amount-income), we see that there are many bo
4343
ggplot(county, aes(x = poverty/100, y = median_hh_income)) +
4444
geom_point(alpha = 0.3, fill = IMSCOL["black", "full"],
4545
shape = 21, size = 3) +
46-
geom_smooth(linetype = "dashed", color = IMSCOL["red", "full"], se = FALSE) +
46+
geom_smooth(linetype = "dashed", color = IMSIMSCOL["red", "full"], se = FALSE) +
4747
labs(x = "Poverty rate",y = "Median household income") +
4848
scale_x_continuous(labels = percent_format(accuracy = 1)) +
4949
scale_y_continuous(labels = dollar_format(scale = 0.001, suffix = "K"))
@@ -101,7 +101,7 @@ ggplot(loan50, aes(x = interest_rate)) +
101101
data = data.frame(x = c(loan50_interest_rate_mean - 1, loan50_interest_rate_mean + 1, loan50_interest_rate_mean),
102102
y = c(-0.1, -0.1, 0)),
103103
aes(x = x, y = y),
104-
fill = COL["red", "full"]
104+
fill = IMSCOL["red", "full"]
105105
)
106106
```
107107
@@ -462,9 +462,9 @@ ggplot(loan50, aes(x = interest_rate)) +
462462
labs(x = "Interest rate", y = "Count") +
463463
geom_histogram(breaks = seq(5, 27.5, 2.5)) +
464464
scale_x_continuous(breaks = seq(-5, 25, 5), labels = label_percent(scale = 1, accuracy = 1)) +
465-
geom_polygon(data = box_sd1, aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
466-
geom_polygon(data = box_sd2, aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
467-
geom_polygon(data = box_sd3, aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3)
465+
geom_polygon(data = box_sd1, aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3) +
466+
geom_polygon(data = box_sd2, aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3) +
467+
geom_polygon(data = box_sd3, aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3)
468468
```
469469
470470
```{r severalDiffDistWithSdOf1, fig.cap = "Three very different population distributions with the same mean (0) and standard deviation (1)."}
@@ -483,21 +483,29 @@ dists_mean0_sd1 <- tibble(
483483
ggplot(dists_mean0_sd1, aes(x = x)) +
484484
geom_histogram(binwidth = 1) +
485485
facet_grid(group ~ ., scales = "free_y") +
486-
theme(# remove y axis
487-
axis.title.y = element_blank(),
488-
axis.text.y = element_blank(),
489-
axis.ticks.y = element_blank(),
490-
# strip facet labels
491-
strip.background = element_blank(),
492-
strip.text.y = element_blank()) +
486+
theme(
487+
# remove y axis
488+
axis.title.y = element_blank(),
489+
axis.text.y = element_blank(),
490+
axis.ticks.y = element_blank(),
491+
# strip facet labels
492+
strip.background = element_blank(),
493+
strip.text.y = element_blank()
494+
) +
493495
scale_x_continuous(breaks = seq(-3, 3, 1)) +
494496
labs(x = NULL) +
495-
geom_polygon(data = data.frame(x = c(-1, -1, 1, 1), y = c(0, 1000, 1000, 0)),
496-
aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
497-
geom_polygon(data = data.frame(x = c(-2, -2, 2, 2), y = c(0, 1000, 1000, 0)),
498-
aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
499-
geom_polygon(data = data.frame(x = c(-3, -3, 3, 3), y = c(0, 1000, 1000, 0)),
500-
aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3)
497+
geom_polygon(
498+
data = data.frame(x = c(-1, -1, 1, 1), y = c(0, 1000, 1000, 0)),
499+
aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3
500+
) +
501+
geom_polygon(
502+
data = data.frame(x = c(-2, -2, 2, 2), y = c(0, 1000, 1000, 0)),
503+
aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3
504+
) +
505+
geom_polygon(
506+
data = data.frame(x = c(-3, -3, 3, 3), y = c(0, 1000, 1000, 0)),
507+
aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3
508+
)
501509
```
502510

503511
::: {.guidedpractice}
@@ -545,7 +553,7 @@ p_dotplot <- ggplot(loan50, aes(x = interest_rate)) +
545553
limits = c(0, 30))
546554

547555
p_boxplot <- ggplot(loan50, aes(x = interest_rate)) +
548-
geom_boxplot(outlier.size = 2.5, outlier.color = COL["blue", "full"]) +
556+
geom_boxplot(outlier.size = 2.5) +
549557
theme(axis.title.y = element_blank(),
550558
axis.text.y = element_blank(),
551559
axis.ticks.y = element_blank()) +

14-foundations-mathematical.Rmd

+10-10
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ Y <- dnorm(X)
569569
plot(X, Y, type='l', axes=F, xlim=c(-3.4,16+3.4), ylim=c(0, 0.652))
570570
lines(X, rep(0,length(X)))
571571
these <- which(X <= 8)
572-
polygon(c(X[these[1]], X[these],X[rev(these)[1]]), c(0,Y[these],0), col=COL[1])
572+
polygon(c(X[these[1]], X[these],X[rev(these)[1]]), c(0,Y[these],0), col=IMSCOL["blue", "full"])
573573
lines(X, Y)
574574
#abline(h=0)
575575
lines(c(0,0), dnorm(0)*c(0.01,0.99), col=COL[6], lty=3)
@@ -578,7 +578,7 @@ text(0, 0.58, format(c(1, 0.0001), scientific=FALSE)[1], cex=2)
578578
lines(X+8, Y, type='l', xlim=c(-3.4,3.4))
579579
lines(X+8, rep(0,length(X)))
580580
these <- which(X <= 0.43)
581-
polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8, c(0,Y[these],0), col=COL[1])
581+
polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8, c(0,Y[these],0), col=IMSCOL["blue", "full"])
582582
lines(X+8, Y)
583583
lines(c(0,0), dnorm(0)*c(0.01,0.99), col=COL[6], lty=3)
584584
lines(8+c(3,8-3), c(0.23,0.23), lwd=3)
@@ -588,7 +588,7 @@ text(8, 0.58, format(0.6664, scientific=FALSE)[1], cex=2)
588588
lines(X+8+8, Y, type='l', xlim=c(-3.4,3.4))
589589
lines(X+8+8, rep(0,length(X)))
590590
these <- which(X > 0.43)
591-
polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8+8, c(0,Y[these],0), col=COL[1])
591+
polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8+8, c(0,Y[these],0), col=IMSCOL["blue", "full"])
592592
lines(X+8+8, Y)
593593
lines(c(0,0), dnorm(0)*c(0.01,0.99), col=COL[6], lty=3)
594594
text(12, 0.58, '=', cex=2)
@@ -791,26 +791,26 @@ Y <- dnorm(X)
791791
plot(X, Y, type = "l", axes = F, xlim = c(-3.4, 24 + 3.4), ylim = c(0, 0.6), xlab = NA, ylab = NA)
792792
lines(X, rep(0, length(X)))
793793
these <- which(X <= 8)
794-
polygon(c(X[these[1]], X[these], X[rev(these)[1]]), c(0, Y[these], 0), col = COL[1])
794+
polygon(c(X[these[1]], X[these], X[rev(these)[1]]), c(0, Y[these], 0), col = IMSCOL["blue", "full"])
795795
lines(X, Y)
796796
lines(c(3, 8 - 3), c(0.2, 0.2), lwd = 3)
797797
lines(X + 8, Y, type = "l")
798798
lines(X + 8, rep(0, length(X)))
799799
these <- which(X < -0.303)
800-
polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 8, c(0, Y[these], 0), col = COL[1])
800+
polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 8, c(0, Y[these], 0), col = IMSCOL["blue", "full"])
801801
lines(X + 8, Y)
802802
lines(8 + c(3, 8 - 3), c(0.2, 0.2), lwd = 3)
803803
lines(X + 16, Y, type = "l")
804804
lines(X + 16, rep(0, length(X)))
805805
these <- which(X > 1.212)
806-
polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 16, c(0, Y[these], 0), col = COL[1])
806+
polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 16, c(0, Y[these], 0), col = IMSCOL[)
807807
lines(X + 16, Y)
808808
lines(16 + c(3, 8 - 3), c(0.23, 0.23), lwd = 3)
809809
lines(16 + c(3, 8 - 3), c(0.17, 0.17), lwd = 3)
810810
lines(X + 24, Y, type = "l", xlim = c(-3.4, 3.4))
811811
lines(X + 24, rep(0, length(X)))
812812
these <- which(X > -0.303 & X < 1.212)
813-
polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 24, c(0, Y[these], 0), col = COL[1])
813+
polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 24, c(0, Y[these], 0), col = IMSCOL["blue", "full"])
814814
lines(X + 24, Y)
815815
text(0, 0.53, "1.0000")
816816
text(8, 0.53, "0.3821")
@@ -1160,18 +1160,18 @@ plot(xR, yR,
11601160
type = "n", xlab = "",
11611161
ylab = "", axes = FALSE
11621162
)
1163-
abline(v = p, lty = 2, col = COL[6])
1163+
abline(v = p, lty = 2, col = IMSCOL["gray", "full"])
11641164
axis(1,
11651165
at = p + c(-0.05, 0, 0.05),
11661166
c("0.25", "p = 0.30", "0.35"), cex.axis = 1.15
11671167
)
11681168
for (i in 1:num.sim) {
11691169
ci <- p.hat[i] + 2 * c(-1, 1) * SE[i]
11701170
if (abs(p.hat[i] - p) > 2 * SE[i]) {
1171-
col <- COL[4]
1171+
col <- IMSCOL["red", "full"]
11721172
points(p.hat[i], i, cex = 1.4, col = col)
11731173
} else {
1174-
col <- COL[1]
1174+
col <- IMSCOL["blue", "full"]
11751175
}
11761176
points(p.hat[i], i, pch = 20, cex = 1.2, col = col)
11771177
lines(ci, rep(i, 2), col = col)

16-inference-one-prop.Rmd

+6-6
Original file line numberDiff line numberDiff line change
@@ -340,26 +340,26 @@ If $X$ is a normally distributed random variable, how often will $X$ be within 2
340340
par(mar = c(3.3, 1, .5, 1), mgp = c(2.1, 0.6, 0))
341341
X <- rev(seq(-4, 4, 0.025))
342342
Y <- dt(X, 10) # makes better visual
343-
plot(X, Y, type = "l", xlab = "standard deviations from the mean", ylab = "", axes = FALSE, xlim = 3.3 * c(-1, 1), ylim = c(0, 0.59), col = COL[6])
343+
plot(X, Y, type = "l", xlab = "standard deviations from the mean", ylab = "", axes = FALSE, xlim = 3.3 * c(-1, 1), ylim = c(0, 0.59), col = IMSCOL["gray", "full"])
344344
axis(1, at = -3:3)
345345
abline(h = 0)
346346
yMax <- 0.41
347347
X <- seq(-4, 4, 0.025)
348348
Y <- dt(X, 10) # makes better visual
349-
lines(X, Y, col = COL[6])
349+
lines(X, Y, col = IMSCOL["gray", "full"])
350350
351351
these <- (X < 2.58 & X > -2.58)
352352
x <- c(-2.58, X[these], 2.58)
353353
y <- c(0, dt(X[these], 10), 0)
354-
polygon(x, y, col = COL[1, 3], border = "#00000000")
354+
polygon(x, y, col = IMSCOL["blue", "f2"], border = "#00000000")
355355
these <- (X < 1.96 & X > -1.96)
356356
x <- c(-1.96, X[these], 1.96)
357357
y <- c(0, dt(X[these], 10), 0)
358-
polygon(x, y, col = COL[1], border = "#00000000")
358+
polygon(x, y, col = IMSCOL["blue", "full"], border = "#00000000")
359359
360360
lines(1.96 * c(-1, 1), rep(yMax, 2), lwd = 2)
361-
lines(rep(-1.96, 2), c(0, yMax), lty = 2, col = COL[6])
362-
lines(rep(1.96, 2), c(0, yMax), lty = 2, col = COL[6])
361+
lines(rep(-1.96, 2), c(0, yMax), lty = 2, col = IMSCOL["gray", "full"])
362+
lines(rep(1.96, 2), c(0, yMax), lty = 2, col = IMSCOL["gray", "full"])
363363
text(0, yMax, "95%, extends -1.96 to 1.96", pos = 3)
364364
365365
yMax <- 0.53

17-inference-two-props.Rmd

+10-14
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ The process by which we randomize observations to two groups is summarized and v
5252

5353
Figure \@ref(fig:malaria-rand-dot-plot) shows a stacked plot of the differences found from 100 randomization simulations (i.e., repeated iterations as described in Figure \@ref(fig:fullrand)), where each dot represents a simulated difference between the infection rates (control rate minus treatment rate).
5454

55-
```{r malaria-rand-dot-plot, fig.cap="A stacked dot plot of differences from 100 simulations produced under the independence model $H_0,$ where in these simulations infections are unaffected by the vaccine. Two of the 100 simulations had a difference of at least 64.3%, the difference observed in the study.", warning=FALSE, fig.width=10}
55+
```{r malaria-rand-dot-plot, fig.cap="A stacked dot plot of differences from 100 simulations produced under the independence model $H_0,$ where in these simulations infections are unaffected by the vaccine. Two of the 100 simulations had a difference of at least 64.3%, the difference observed in the study."}
5656
5757
set.seed(47)
5858
@@ -98,13 +98,11 @@ plot(X, Y,
9898
xlab = "Difference in Infection Rates",
9999
axes = FALSE,
100100
ylim = c(0, max(Y)),
101-
col = COL[1],
101+
col = IMSCOL["blue", "full"],
102102
pch = 20)
103-
# at <- seq(-0.4, 0.4, 0.1)
104-
# labels <- c(-0.4, "", -0.2, "", 0, "", 0.2, "", 0.4)
105103
axis(1) #, at, labels)
106104
abline(h = 0)
107-
points(X[X > 0.64], Y[X > 0.64], lwd=3, col = COL[4], cex=0.4)
105+
points(X[X > 0.64], Y[X > 0.64], lwd=3, col = IMSCOL["red", "full"], cex=0.4)
108106
```
109107

110108
### Observed statistic vs null statistics
@@ -256,13 +254,12 @@ histPlot(X, Y, breaks = breaks,
256254
ylab = "",
257255
axes = FALSE,
258256
#ylim = c(0, max(Y)),
259-
col = COL[1],
257+
col = IMSCOL["blue", "full"],
260258
pch = 20)
261259
at <- seq(-0.3, 0.3, 0.1)
262260
labels <- c(-0.3, "", -0.1, 0, 0.1, "", 0.3)
263261
axis(1, at, labels)
264262
abline(h = 0)
265-
#points(X[X > 0.64], Y[X > 0.64], lwd=3, col = COL[4], cex=0.4)
266263
```
267264

268265
### Bootstrap percentile vs. SE confidence intervals
@@ -300,9 +297,9 @@ histPlot(result, breaks = breaks,
300297
axes = FALSE, col = rgb(1,1,1),
301298
xlab = "", ylab="")
302299
histPlot(bsprops_up, breaks = breaks,
303-
col = COL[1], add = TRUE)
300+
col = IMSCOL["blue", "full"], add = TRUE)
304301
histPlot(bsprops_low, breaks = breaks,
305-
col = COL[1], add = TRUE)
302+
col = IMSCOL["blue", "full"], add = TRUE)
306303
axis(1)
307304
#axis(2, at = seq(0, 100, 50), labels = format(seq(0, 50, 25) / nsim))
308305
lines(c(bsq[6], bsq[6]), c(0, 80), lty = 3, lwd = 3)
@@ -356,7 +353,6 @@ If we are making 95% intervals, then 5% of the intervals we create over our life
356353
What we know is that over our lifetimes as scientists, 95% of the intervals created and reported on will capture the parameter value of interest: thus the language "95% confident."
357354

358355
```{r ci25ints, fig.cap = "One hypothetical population, parameter value of: $p_1 - p_2 = 0.47.$ Twenty-five different studies all which led to a different point estimate, SE, and confidence interval. The study at hand is one of the horizontal lines (hopefully a blue line!).", warning = FALSE, fig.width = 10}
359-
data(run09)
360356
set.seed(52)
361357
m <- 103.4594
362358
s <- 19.31445
@@ -380,16 +376,16 @@ plot(xR, yR,
380376
xlab = '',
381377
ylab = '',
382378
axes = FALSE)
383-
abline(v = m, lty = 2, col = COL[5,2])
379+
abline(v = m, lty = 2, col = IMSCOL["black", "f1"])
384380
axis(1, at = m, expression('p'[1]*' - p'[2]*' = 0.47'))
385381
for(i in 1:k){
386382
ci <- means[i] + 2 * c(-1, 1) * SE[i]
387383
if(abs(means[i] - m) > 1.96 * SE[i]){
388-
col <- COL[4]
384+
col <- IMSCOL["red", "full"]
389385
points(means[i], i, cex = 1.4, col = col)
390386
lines(ci, rep(i, 2), col = col, lwd = 4)
391387
} else {
392-
col <- COL[1]
388+
col <- IMSCOL["blue", "full"]
393389
}
394390
points(means[i], i, pch = 20, cex = 1.2, col = col)
395391
lines(ci, rep(i, 2), col = col)
@@ -692,7 +688,7 @@ That is, the difference in breast cancer death rates is reasonably explained by
692688

693689
```{r echo=FALSE}
694690
normTail(L = -0.17, U = 0.17,
695-
col = COL[1],
691+
col = IMSCOL["blue", "full"],
696692
axes = FALSE,
697693
xlim = c(-3.2, 3.2))
698694
at <- c(-10, -2, 0, 2, 10)

0 commit comments

Comments
 (0)