OpenIntroStat
diff --git a/‎02-data-design.Rmd
+8-8 b/‎02-data-design.Rmd
+8-8
diff --git a/‎05-explore-numerical.Rmd
+27-19 b/‎05-explore-numerical.Rmd
+27-19
diff --git a/‎14-foundations-mathematical.Rmd
+10-10 b/‎14-foundations-mathematical.Rmd
+10-10
diff --git a/‎16-inference-one-prop.Rmd
+6-6 b/‎16-inference-one-prop.Rmd
+6-6
diff --git a/‎17-inference-two-props.Rmd
+10-14 b/‎17-inference-two-props.Rmd
+10-14
@@ -183,7 +183,7 @@ for (i in 1:nrow(s)) {
     sub[j] <- TRUE
   }
 }
-points(S, col = COL[1, 4 - 2 * sub], pch = 20)
+points(S, col = IMSCOL["blue", 4 - 2 * sub], pch = 20)
 text(0.5, 1, "all graduates", pos = 3, cex = 1.3)
 lines(
   (x - 0.5) * 2 * sqrt(0.07) + 0.55,
@@ -199,14 +199,14 @@ lines((x - 0.5) / 2 + 1.5,
 
 SS <- (S[N, ] - 0.5) / 2 + 0.5
 these <- c(2, 5, 10, 12, 20, 21, 22, 23, 1, 8)
-points(SS[these, 1] + 1, SS[these, 2], col = COL[4, 2], pch = 20, cex = 1.5)
+points(SS[these, 1] + 1, SS[these, 2], col = IMSCOL["red", "f1"], pch = 20, cex = 1.5)
 text(1.5, 0.75, "sample", pos = 3, cex = 1.3)
 
 for (i in these) {
   arrows(S[N[i], 1], S[N[i], 2],
     SS[i, 1] + 1 - 0.03, SS[i, 2],
     length = 0.08,
-    col = COL[5],
+    col = IMSCOL["black", "full"],
     lwd = 1.5
   )
 }
@@ -272,7 +272,7 @@ for (i in 1:nrow(s)) {
     sub[j] <- TRUE
   }
 }
-points(S, col = COL[1, 4 - 2 * sub], pch = 20)
+points(S, col = IMSCOL["blue", 4 - 2 * sub], pch = 20)
 text(0.5, 1, 'population of interest', pos = 3, cex = 1.3)
 lines((x - 0.5) * 2 * sqrt(0.115) + 0.35,
       (y - 0.5) * 2 * sqrt(0.115) + 0.55)
@@ -287,7 +287,7 @@ SS <- (S[N, ] - 0.5) / 2 + 0.5
 these <- c(2, 5, 6, 7, 15)
 points(SS[these, 1] + 1,
        SS[these, 2],
-       col = COL[4, 2],
+       col = IMSCOL["red", "f1"],
        pch = 20,
        cex = 1.5)
 text(1.5, 0.75, 'sample', pos=3, cex = 1.3)
@@ -298,7 +298,7 @@ for(i in these){
 	       SS[i, 1] + 1 - 0.03,
 	       SS[i, 2],
 	       length=0.08,
-	       col=COL[5],
+	       col=IMSCOL["black", "full"],
 	       lwd=1.5)
 }
 rect(0.145, 0.195, 0.775, 0.11,
@@ -381,9 +381,9 @@ arrows(0.67, 0.78, 0.8, 0.62,
        length = 0.08, lwd = 1.5)
 
 arrows(0.5, 0.5, 0.69, 0.5,
-       length = 0.08, col = COL[6,2])
+       length = 0.08, col = IMSCOL["gray", "f1"])
 text(0.595, 0.565, "?",
-     cex = 1.5, col = COL[4])
+     cex = 1.5, col = IMSCOL["red", "full"])
 par(par_og) # restore original par
 ```
 
 
@@ -43,7 +43,7 @@ Looking at Figure \@ref(fig:loan50-amount-income), we see that there are many bo
 ggplot(county, aes(x = poverty/100, y = median_hh_income)) +
   geom_point(alpha = 0.3, fill = IMSCOL["black", "full"], 
              shape = 21, size = 3) +
-  geom_smooth(linetype = "dashed", color = IMSCOL["red", "full"], se = FALSE) +
+  geom_smooth(linetype = "dashed", color = IMSIMSCOL["red", "full"], se = FALSE) +
   labs(x = "Poverty rate",y = "Median household income") +
   scale_x_continuous(labels = percent_format(accuracy = 1)) +
   scale_y_continuous(labels = dollar_format(scale = 0.001, suffix = "K"))
@@ -101,7 +101,7 @@ ggplot(loan50, aes(x = interest_rate)) +
     data = data.frame(x = c(loan50_interest_rate_mean - 1, loan50_interest_rate_mean + 1, loan50_interest_rate_mean), 
                       y = c(-0.1, -0.1, 0)),
     aes(x = x, y = y),
-    fill = COL["red", "full"]
+    fill = IMSCOL["red", "full"]
   )
 ```
 
@@ -462,9 +462,9 @@ ggplot(loan50, aes(x = interest_rate)) +
   labs(x = "Interest rate", y = "Count") +
   geom_histogram(breaks = seq(5, 27.5, 2.5)) +
   scale_x_continuous(breaks = seq(-5, 25, 5), labels = label_percent(scale = 1, accuracy = 1)) +
-  geom_polygon(data = box_sd1, aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
-  geom_polygon(data = box_sd2, aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
-  geom_polygon(data = box_sd3, aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3)
+  geom_polygon(data = box_sd1, aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3) +
+  geom_polygon(data = box_sd2, aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3) +
+  geom_polygon(data = box_sd3, aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3)
 ```
 
 ```{r severalDiffDistWithSdOf1, fig.cap = "Three very different population distributions with the same mean (0) and standard deviation (1)."}
@@ -483,21 +483,29 @@ dists_mean0_sd1 <- tibble(
 ggplot(dists_mean0_sd1, aes(x = x)) +
   geom_histogram(binwidth = 1) +
   facet_grid(group ~ ., scales = "free_y") +
-  theme(# remove y axis
-        axis.title.y = element_blank(),
-        axis.text.y  = element_blank(),
-        axis.ticks.y = element_blank(),
-        # strip facet labels
-        strip.background = element_blank(),
-        strip.text.y = element_blank()) +
+  theme( 
+    # remove y axis
+    axis.title.y = element_blank(),
+    axis.text.y = element_blank(),
+    axis.ticks.y = element_blank(),
+    # strip facet labels
+    strip.background = element_blank(),
+    strip.text.y = element_blank()
+  ) +
   scale_x_continuous(breaks = seq(-3, 3, 1)) +
   labs(x = NULL) +
-  geom_polygon(data = data.frame(x = c(-1, -1, 1, 1), y = c(0, 1000, 1000, 0)), 
-               aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
-  geom_polygon(data = data.frame(x = c(-2, -2, 2, 2), y = c(0, 1000, 1000, 0)), 
-               aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3) +
-  geom_polygon(data = data.frame(x = c(-3, -3, 3, 3), y = c(0, 1000, 1000, 0)), 
-               aes(x = x, y = y), fill = COL["gray", "full"], alpha = 0.3)
+  geom_polygon(
+    data = data.frame(x = c(-1, -1, 1, 1), y = c(0, 1000, 1000, 0)),
+    aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3
+  ) +
+  geom_polygon(
+    data = data.frame(x = c(-2, -2, 2, 2), y = c(0, 1000, 1000, 0)),
+    aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3
+  ) +
+  geom_polygon(
+    data = data.frame(x = c(-3, -3, 3, 3), y = c(0, 1000, 1000, 0)),
+    aes(x = x, y = y), fill = IMSCOL["gray", "full"], alpha = 0.3
+  )
 ```
 
 ::: {.guidedpractice}
@@ -545,7 +553,7 @@ p_dotplot <- ggplot(loan50, aes(x = interest_rate)) +
                      limits = c(0, 30))
 
 p_boxplot <- ggplot(loan50, aes(x = interest_rate)) + 
-  geom_boxplot(outlier.size = 2.5, outlier.color = COL["blue", "full"]) +
+  geom_boxplot(outlier.size = 2.5) +
   theme(axis.title.y = element_blank(),
         axis.text.y  = element_blank(),
         axis.ticks.y = element_blank()) +
 
@@ -569,7 +569,7 @@ Y <- dnorm(X)
 plot(X, Y, type='l', axes=F, xlim=c(-3.4,16+3.4), ylim=c(0, 0.652))
 lines(X, rep(0,length(X)))
 these <- which(X <= 8)
-polygon(c(X[these[1]], X[these],X[rev(these)[1]]), c(0,Y[these],0), col=COL[1])
+polygon(c(X[these[1]], X[these],X[rev(these)[1]]), c(0,Y[these],0), col=IMSCOL["blue", "full"])
 lines(X, Y)
 #abline(h=0)
 lines(c(0,0), dnorm(0)*c(0.01,0.99), col=COL[6], lty=3)
@@ -578,7 +578,7 @@ text(0, 0.58, format(c(1, 0.0001), scientific=FALSE)[1], cex=2)
 lines(X+8, Y, type='l', xlim=c(-3.4,3.4))
 lines(X+8, rep(0,length(X)))
 these <- which(X <= 0.43)
-polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8, c(0,Y[these],0), col=COL[1])
+polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8, c(0,Y[these],0), col=IMSCOL["blue", "full"])
 lines(X+8, Y)
 lines(c(0,0), dnorm(0)*c(0.01,0.99), col=COL[6], lty=3)
 lines(8+c(3,8-3), c(0.23,0.23), lwd=3)
@@ -588,7 +588,7 @@ text(8, 0.58, format(0.6664, scientific=FALSE)[1], cex=2)
 lines(X+8+8, Y, type='l', xlim=c(-3.4,3.4))
 lines(X+8+8, rep(0,length(X)))
 these <- which(X > 0.43)
-polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8+8, c(0,Y[these],0), col=COL[1])
+polygon(c(X[these[1]], X[these],X[rev(these)[1]])+8+8, c(0,Y[these],0), col=IMSCOL["blue", "full"])
 lines(X+8+8, Y)
 lines(c(0,0), dnorm(0)*c(0.01,0.99), col=COL[6], lty=3)
 text(12, 0.58, '=', cex=2)
@@ -791,26 +791,26 @@ Y <- dnorm(X)
 plot(X, Y, type = "l", axes = F, xlim = c(-3.4, 24 + 3.4), ylim = c(0, 0.6), xlab = NA, ylab = NA)
 lines(X, rep(0, length(X)))
 these <- which(X <= 8)
-polygon(c(X[these[1]], X[these], X[rev(these)[1]]), c(0, Y[these], 0), col = COL[1])
+polygon(c(X[these[1]], X[these], X[rev(these)[1]]), c(0, Y[these], 0), col = IMSCOL["blue", "full"])
 lines(X, Y)
 lines(c(3, 8 - 3), c(0.2, 0.2), lwd = 3)
 lines(X + 8, Y, type = "l")
 lines(X + 8, rep(0, length(X)))
 these <- which(X < -0.303)
-polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 8, c(0, Y[these], 0), col = COL[1])
+polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 8, c(0, Y[these], 0), col = IMSCOL["blue", "full"])
 lines(X + 8, Y)
 lines(8 + c(3, 8 - 3), c(0.2, 0.2), lwd = 3)
 lines(X + 16, Y, type = "l")
 lines(X + 16, rep(0, length(X)))
 these <- which(X > 1.212)
-polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 16, c(0, Y[these], 0), col = COL[1])
+polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 16, c(0, Y[these], 0), col = IMSCOL[)
 lines(X + 16, Y)
 lines(16 + c(3, 8 - 3), c(0.23, 0.23), lwd = 3)
 lines(16 + c(3, 8 - 3), c(0.17, 0.17), lwd = 3)
 lines(X + 24, Y, type = "l", xlim = c(-3.4, 3.4))
 lines(X + 24, rep(0, length(X)))
 these <- which(X > -0.303 & X < 1.212)
-polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 24, c(0, Y[these], 0), col = COL[1])
+polygon(c(X[these[1]], X[these], X[rev(these)[1]]) + 24, c(0, Y[these], 0), col = IMSCOL["blue", "full"])
 lines(X + 24, Y)
 text(0, 0.53, "1.0000")
 text(8, 0.53, "0.3821")
@@ -1160,18 +1160,18 @@ plot(xR, yR,
   type = "n", xlab = "",
   ylab = "", axes = FALSE
 )
-abline(v = p, lty = 2, col = COL[6])
+abline(v = p, lty = 2, col = IMSCOL["gray", "full"])
 axis(1,
   at = p + c(-0.05, 0, 0.05),
   c("0.25", "p = 0.30", "0.35"), cex.axis = 1.15
 )
 for (i in 1:num.sim) {
   ci <- p.hat[i] + 2 * c(-1, 1) * SE[i]
   if (abs(p.hat[i] - p) > 2 * SE[i]) {
-    col <- COL[4]
+    col <- IMSCOL["red", "full"]
     points(p.hat[i], i, cex = 1.4, col = col)
   } else {
-    col <- COL[1]
+    col <- IMSCOL["blue", "full"]
   }
   points(p.hat[i], i, pch = 20, cex = 1.2, col = col)
   lines(ci, rep(i, 2), col = col)
 
@@ -340,26 +340,26 @@ If $X$ is a normally distributed random variable, how often will $X$ be within 2
 par(mar = c(3.3, 1, .5, 1), mgp = c(2.1, 0.6, 0))
 X <- rev(seq(-4, 4, 0.025))
 Y <- dt(X, 10) # makes better visual
-plot(X, Y, type = "l", xlab = "standard deviations from the mean", ylab = "", axes = FALSE, xlim = 3.3 * c(-1, 1), ylim = c(0, 0.59), col = COL[6])
+plot(X, Y, type = "l", xlab = "standard deviations from the mean", ylab = "", axes = FALSE, xlim = 3.3 * c(-1, 1), ylim = c(0, 0.59), col = IMSCOL["gray", "full"])
 axis(1, at = -3:3)
 abline(h = 0) 
 yMax <- 0.41
 X <- seq(-4, 4, 0.025)
 Y <- dt(X, 10) # makes better visual
-lines(X, Y, col = COL[6])
+lines(X, Y, col = IMSCOL["gray", "full"])
 
 these <- (X < 2.58 & X > -2.58)
 x <- c(-2.58, X[these], 2.58)
 y <- c(0, dt(X[these], 10), 0)
-polygon(x, y, col = COL[1, 3], border = "#00000000")
+polygon(x, y, col = IMSCOL["blue", "f2"], border = "#00000000")
 these <- (X < 1.96 & X > -1.96)
 x <- c(-1.96, X[these], 1.96)
 y <- c(0, dt(X[these], 10), 0)
-polygon(x, y, col = COL[1], border = "#00000000")
+polygon(x, y, col = IMSCOL["blue", "full"], border = "#00000000")
 
 lines(1.96 * c(-1, 1), rep(yMax, 2), lwd = 2)
-lines(rep(-1.96, 2), c(0, yMax), lty = 2, col = COL[6])
-lines(rep(1.96, 2), c(0, yMax), lty = 2, col = COL[6])
+lines(rep(-1.96, 2), c(0, yMax), lty = 2, col = IMSCOL["gray", "full"])
+lines(rep(1.96, 2), c(0, yMax), lty = 2, col = IMSCOL["gray", "full"])
 text(0, yMax, "95%, extends -1.96 to 1.96", pos = 3)
 
 yMax <- 0.53
 
@@ -52,7 +52,7 @@ The process by which we randomize observations to two groups is summarized and v
 
 Figure \@ref(fig:malaria-rand-dot-plot) shows a stacked plot of the differences found from 100 randomization simulations (i.e., repeated iterations as described in Figure \@ref(fig:fullrand)), where each dot represents a simulated difference between the infection rates (control rate minus treatment rate).
 
-```{r malaria-rand-dot-plot, fig.cap="A stacked dot plot of differences from 100 simulations produced under the independence model $H_0,$ where in these simulations infections are unaffected by the vaccine. Two of the 100 simulations had a difference of at least 64.3%, the difference observed in the study.", warning=FALSE, fig.width=10}
+```{r malaria-rand-dot-plot, fig.cap="A stacked dot plot of differences from 100 simulations produced under the independence model $H_0,$ where in these simulations infections are unaffected by the vaccine. Two of the 100 simulations had a difference of at least 64.3%, the difference observed in the study."}
 
 set.seed(47)
 
@@ -98,13 +98,11 @@ plot(X, Y,
      xlab = "Difference in Infection Rates",
      axes = FALSE,
      ylim = c(0, max(Y)),
-     col = COL[1],
+     col = IMSCOL["blue", "full"],
      pch = 20)
-# at <- seq(-0.4, 0.4, 0.1)
-# labels <- c(-0.4, "", -0.2, "", 0, "", 0.2, "", 0.4)
 axis(1) #, at, labels)
 abline(h = 0)
-points(X[X > 0.64], Y[X > 0.64], lwd=3, col = COL[4], cex=0.4)
+points(X[X > 0.64], Y[X > 0.64], lwd=3, col = IMSCOL["red", "full"], cex=0.4)
 ```
 
 ### Observed statistic vs null statistics
@@ -256,13 +254,12 @@ histPlot(X, Y, breaks = breaks,
      ylab = "",
      axes = FALSE,
      #ylim = c(0, max(Y)),
-     col = COL[1],
+     col = IMSCOL["blue", "full"],
      pch = 20)
  at <- seq(-0.3, 0.3, 0.1)
  labels <- c(-0.3, "", -0.1, 0, 0.1, "", 0.3)
 axis(1, at, labels)
 abline(h = 0)
-#points(X[X > 0.64], Y[X > 0.64], lwd=3, col = COL[4], cex=0.4)
 ```
 
 ### Bootstrap percentile vs. SE confidence intervals
@@ -300,9 +297,9 @@ histPlot(result, breaks = breaks,
          axes = FALSE, col = rgb(1,1,1),
      xlab = "", ylab="")
 histPlot(bsprops_up, breaks = breaks,
-         col = COL[1], add = TRUE)
+         col = IMSCOL["blue", "full"], add = TRUE)
 histPlot(bsprops_low, breaks = breaks, 
-         col = COL[1], add = TRUE)
+         col = IMSCOL["blue", "full"], add = TRUE)
 axis(1)
 #axis(2, at = seq(0, 100, 50), labels = format(seq(0, 50, 25) / nsim))
 lines(c(bsq[6], bsq[6]), c(0, 80), lty = 3, lwd = 3)
@@ -356,7 +353,6 @@ If we are making 95% intervals, then 5% of the intervals we create over our life
 What we know is that over our lifetimes as scientists, 95% of the intervals created and reported on will capture the parameter value of interest: thus the language "95% confident."
 
 ```{r ci25ints, fig.cap = "One hypothetical population, parameter value of: $p_1 - p_2 = 0.47.$  Twenty-five different studies all which led to a different point estimate, SE, and confidence interval.  The study at hand is one of the horizontal lines (hopefully a blue line!).", warning = FALSE, fig.width = 10}
-data(run09)
 set.seed(52)
 m <- 103.4594
 s <- 19.31445
@@ -380,16 +376,16 @@ plot(xR, yR,
      xlab = '',
      ylab = '',
      axes = FALSE)
-abline(v = m, lty = 2, col = COL[5,2])
+abline(v = m, lty = 2, col = IMSCOL["black", "f1"])
 axis(1, at = m, expression('p'[1]*' - p'[2]*' = 0.47'))
 for(i in 1:k){
   ci <- means[i] + 2 * c(-1, 1) * SE[i]
   if(abs(means[i] - m) > 1.96 * SE[i]){
-    col <- COL[4]
+    col <- IMSCOL["red", "full"]
     points(means[i], i, cex = 1.4, col = col)
     lines(ci, rep(i, 2), col = col, lwd = 4)
   } else {
-    col <- COL[1]
+    col <- IMSCOL["blue", "full"]
   }
   points(means[i], i, pch = 20, cex = 1.2, col = col)
   lines(ci, rep(i, 2), col = col)
@@ -692,7 +688,7 @@ That is, the difference in breast cancer death rates is reasonably explained by
 
 ```{r echo=FALSE}
 normTail(L = -0.17, U = 0.17,
-        col = COL[1],
+        col = IMSCOL["blue", "full"],
         axes = FALSE,
         xlim = c(-3.2, 3.2))
 at <- c(-10, -2, 0, 2, 10)