diff --git a/DESCRIPTION b/DESCRIPTION index ca21590c..41845a26 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: simtrial Type: Package Title: Clinical Trial Simulation -Version: 0.3.2.10 +Version: 0.3.2.13 Authors@R: c( person("Keaven", "Anderson", email = "keaven_anderson@merck.com", role = c("aut")), person("Yilong", "Zhang", email = "elong0527@gmail.com", role = c("aut")), diff --git a/R/maxcombo.R b/R/maxcombo.R index 18e3eb08..a8b93bc1 100644 --- a/R/maxcombo.R +++ b/R/maxcombo.R @@ -36,7 +36,7 @@ #' sim_pw_surv(n = 200) |> #' cut_data_by_event(150) |> #' maxcombo(rho = c(0, 0), gamma = c(0, 0.5)) -maxcombo <- function(data, rho, gamma){ +maxcombo <- function(data, rho, gamma) { stopifnot( is.numeric(rho), is.numeric(gamma), rho >= 0, gamma >= 0, @@ -50,7 +50,7 @@ maxcombo <- function(data, rho, gamma){ return_corr = TRUE ) - ans <- data.frame(p_value = pvalue_maxcombo(ans)) + ans <- data.frame(p_value = pvalue_maxcombo(ans)) return(ans) } diff --git a/R/rmst.R b/R/rmst.R index 8e4cfb86..5084c0c7 100644 --- a/R/rmst.R +++ b/R/rmst.R @@ -25,9 +25,25 @@ #' @param var_label_tte Column name of the TTE variable. #' @param var_label_event Column name of the event variable. #' @param var_label_group Column name of the grouping variable. +#' @param formula (default: `NULL`) A formula that indicates the TTE, event, and +#' group variables (in that exact order; see Details below). This is an +#' alternative to specifying the variables as strings. If a formula is +#' provided, the values passed to `var_label_tte`, `var_label_event`, and +#' `var_label_group` are ignored. #' @param reference A group label indicating the reference group. #' @param alpha Type I error. #' +#' @details +#' The argument `formula` is provided as a convenience to easily specify the TTE, +#' event, and grouping variables. Note however that only the order of the three +#' variables is actually used by the underlying function. Any functions applied +#' in the formula are ignored, and thus should only be used for documenting your +#' intent. For example, you can use the syntax from the survival package +#' `Surv(tte | event) ~ group` to highlight the relation between the TTE and +#' event variables, but the function `Surv()` is never actually executed. +#' Importantly, you shouldn't apply any transformation functions such as `log()` +#' since these will also be ignored. +#' #' @return The z statistics. #' #' @export @@ -42,14 +58,55 @@ #' tau = 10, #' reference = "0" #' ) +#' +#' # Formula interface +#' library("survival") +#' +#' rmst( +#' data = ex1_delayed_effect, +#' formula = Surv(month | evntd) ~ trt, +#' tau = 10, +#' reference = "0" +#' ) +#' +#' # alternative +#' rmst( +#' data = ex1_delayed_effect, +#' formula = ~ Surv(month, evntd, trt), +#' tau = 10, +#' reference = "0" +#' ) +#' +#' # This example doesn't make statistical sense, but demonstrates that only the +#' # order of the 3 variables actually matters +#' rmst( +#' data = ex1_delayed_effect, +#' formula = month ~ evntd + trt, +#' tau = 10, +#' reference = "0" +#' ) rmst <- function( data, tau = 10, var_label_tte = "tte", var_label_event = "event", var_label_group = "treatment", + formula = NULL, reference = "control", alpha = 0.05) { + stopifnot(is.data.frame(data)) + + if (!is.null(formula)) { + stopifnot(inherits(formula, "formula")) + variables <- colnames(stats::get_all_vars(formula = formula, data = data)) + if (length(variables) != 3) { + stop("The formula interface requires exactly 3 variables specified") + } + var_label_tte <- variables[1] + var_label_event <- variables[2] + var_label_group <- variables[3] + } + res <- rmst_two_arm( time_var = data[[var_label_tte]], event_var = data[[var_label_event]], diff --git a/R/sim_gs_n.R b/R/sim_gs_n.R index 34395994..8f0fddb1 100644 --- a/R/sim_gs_n.R +++ b/R/sim_gs_n.R @@ -285,7 +285,7 @@ sim_gs_n <- function( for (i_analysis in seq_len(n_analysis)) { # Get cut date - cut_date[i_analysis] <- cutting[[i_analysis]](data = simu_data) + cut_date[i_analysis] <- cutting[[i_analysis]](simu_data) # Cut the data simu_data_cut <- simu_data |> cut_data_by_date(cut_date[i_analysis]) diff --git a/R/wlr.R b/R/wlr.R index 672c8262..d855588c 100644 --- a/R/wlr.R +++ b/R/wlr.R @@ -40,23 +40,22 @@ #' cut_data_by_event(150) |> #' wlr(weight = early_zero(early_period = 4)) #' -wlr <- function(data, weight){ - +wlr <- function(data, weight) { if (inherits(weight, "fh")) { ans <- data |> counting_process(arm = "experimental") |> fh_weight(rho_gamma = data.frame(rho = weight$rho, gamma = weight$gamma)) - } else if (inherits(weight, "mb")) { ans <- data |> counting_process(arm = "experimental") |> mb_weight(delay = weight$delay, w_max = weight$w_max) setDT(ans) - ans <- ans[, - .( - s = sum(o_minus_e * mb_weight), - v = sum(var_o_minus_e * mb_weight^2) - ) + ans <- ans[ + , + .( + s = sum(o_minus_e * mb_weight), + v = sum(var_o_minus_e * mb_weight^2) + ) ][, .(z = s / sqrt(v))] setDF(ans) } else if (inherits(weight, "early_period")) { @@ -64,11 +63,12 @@ wlr <- function(data, weight){ counting_process(arm = "experimental") |> early_zero_weight(early_period = weight$early_period) setDT(ans) - ans <- ans[, - .( - s = sum(o_minus_e * weight), - v = sum(var_o_minus_e * weight^2) - ) + ans <- ans[ + , + .( + s = sum(o_minus_e * weight), + v = sum(var_o_minus_e * weight^2) + ) ][, .(z = s / sqrt(v))] setDF(ans) } diff --git a/R/wlr_weight.R b/R/wlr_weight.R index b4ae8d63..cfb0a542 100644 --- a/R/wlr_weight.R +++ b/R/wlr_weight.R @@ -25,7 +25,7 @@ #' @return A list of parameters of the Fleming-Harrington weighting function #' @examples #' fh(rho = 0, gamma = 0.5) -fh <- function(rho = 0, gamma = 0){ +fh <- function(rho = 0, gamma = 0) { structure(list(rho = rho, gamma = gamma), class = c("list", "fh", "wlr")) } @@ -42,7 +42,7 @@ fh <- function(rho = 0, gamma = 0){ #' #' @examples #' mb(delay = 6, w_max = 2) -mb <- function(delay = 4, w_max = Inf){ +mb <- function(delay = 4, w_max = Inf) { structure(list(delay = delay, w_max = w_max), class = c("list", "mb", "wlr")) } @@ -59,6 +59,6 @@ mb <- function(delay = 4, w_max = Inf){ #' #' @examples #' early_zero(6) -early_zero <- function(early_period){ +early_zero <- function(early_period) { structure(list(early_period = early_period), class = c("list", "early_period", "wlr")) } diff --git a/man/rmst.Rd b/man/rmst.Rd index b6822fdd..d6099705 100644 --- a/man/rmst.Rd +++ b/man/rmst.Rd @@ -10,6 +10,7 @@ rmst( var_label_tte = "tte", var_label_event = "event", var_label_group = "treatment", + formula = NULL, reference = "control", alpha = 0.05 ) @@ -27,6 +28,12 @@ event or censor.} \item{var_label_group}{Column name of the grouping variable.} +\item{formula}{(default: \code{NULL}) A formula that indicates the TTE, event, and +group variables (in that exact order; see Details below). This is an +alternative to specifying the variables as strings. If a formula is +provided, the values passed to \code{var_label_tte}, \code{var_label_event}, and +\code{var_label_group} are ignored.} + \item{reference}{A group label indicating the reference group.} \item{alpha}{Type I error.} @@ -37,6 +44,17 @@ The z statistics. \description{ RMST difference of 2 arms } +\details{ +The argument \code{formula} is provided as a convenience to easily specify the TTE, +event, and grouping variables. Note however that only the order of the three +variables is actually used by the underlying function. Any functions applied +in the formula are ignored, and thus should only be used for documenting your +intent. For example, you can use the syntax from the survival package +\code{Surv(tte | event) ~ group} to highlight the relation between the TTE and +event variables, but the function \code{Surv()} is never actually executed. +Importantly, you shouldn't apply any transformation functions such as \code{log()} +since these will also be ignored. +} \examples{ data(ex1_delayed_effect) rmst( @@ -47,4 +65,31 @@ rmst( tau = 10, reference = "0" ) + +# Formula interface +library("survival") + +rmst( + data = ex1_delayed_effect, + formula = Surv(month | evntd) ~ trt, + tau = 10, + reference = "0" +) + +# alternative +rmst( + data = ex1_delayed_effect, + formula = ~ Surv(month, evntd, trt), + tau = 10, + reference = "0" +) + +# This example doesn't make statistical sense, but demonstrates that only the +# order of the 3 variables actually matters +rmst( + data = ex1_delayed_effect, + formula = month ~ evntd + trt, + tau = 10, + reference = "0" +) } diff --git a/tests/testthat/helper-pmvnorm.R b/tests/testthat/helper-pmvnorm.R index 2b00eb63..f5cc23b9 100644 --- a/tests/testthat/helper-pmvnorm.R +++ b/tests/testthat/helper-pmvnorm.R @@ -137,7 +137,7 @@ ptvnorm <- function(h, r, ro) { hp2 <- (h2 * rr133 - h1 * f3 - h3 * f2) / fac / sqrt(rr133) TV <- w * exp((rr12 * h12 - h122) / rr122) / sqrt(rr122) * pnorm(hp1) * r12 + w * exp((rr13 * h13 - h132) / rr133) / sqrt(rr133) * pnorm(hp2) * - r13 + r13 TV <- sum(TV) rho <- matrix(c(1, r23, r23, 1), 2, 2) p2 <- mvtnorm::pmvnorm(-Inf, c(h2, h3), c(0, 0), rho) diff --git a/tests/testthat/helper-sim_gs_n.R b/tests/testthat/helper-sim_gs_n.R index 94e49a7b..49828a54 100644 --- a/tests/testthat/helper-sim_gs_n.R +++ b/tests/testthat/helper-sim_gs_n.R @@ -3,10 +3,12 @@ test_enroll_rate <- function() { # parameters for enrollment enroll_rampup_duration <- 4 # duration for enrollment ramp up - enroll_duration <- 16 # total enrollment duration + enroll_duration <- 16 # total enrollment duration enroll_rate <- gsDesign2::define_enroll_rate( - duration = c(enroll_rampup_duration, - enroll_duration - enroll_rampup_duration), + duration = c( + enroll_rampup_duration, + enroll_duration - enroll_rampup_duration + ), rate = c(10, 30) ) return(enroll_rate) @@ -14,13 +16,13 @@ test_enroll_rate <- function() { test_fail_rate <- function() { # parameters for treatment effect - delay_effect_duration <- 3 # delay treatment effect in months - median_ctrl <- 9 # survival median of the control arm - median_exp <- c(9, 14) # survival median of the experimental arm + delay_effect_duration <- 3 # delay treatment effect in months + median_ctrl <- 9 # survival median of the control arm + median_exp <- c(9, 14) # survival median of the experimental arm dropout_rate <- 0.001 fail_rate <- gsDesign2::define_fail_rate( duration = c(delay_effect_duration, 100), - fail_rate = log(2) / median_ctrl, + fail_rate = log(2) / median_ctrl, hr = median_ctrl / median_exp, dropout_rate = dropout_rate ) @@ -29,9 +31,9 @@ test_fail_rate <- function() { test_cutting <- function() { # other related parameters - alpha <- 0.025 # type I error - beta <- 0.1 # type II error - ratio <- 1 # randomization ratio (exp:ctrl) + alpha <- 0.025 # type I error + beta <- 0.1 # type II error + ratio <- 1 # randomization ratio (exp:ctrl) # Define cuttings of 2 IAs and 1 FA # IA1 # The 1st interim analysis will occur at the later of the following 3 conditions: diff --git a/tests/testthat/helper-simfix.R b/tests/testthat/helper-simfix.R index 1bbba3b9..5b9bf7f9 100644 --- a/tests/testthat/helper-simfix.R +++ b/tests/testthat/helper-simfix.R @@ -1,6 +1,6 @@ # Helper functions used by test-double_programming_simfix.R -test_simfix <- function () { +test_simfix <- function() { # Study design using gsDesign alpha <- 0.025 gamma <- c(5, 5, 47) diff --git a/tests/testthat/test-unvalidated-rmst.R b/tests/testthat/test-unvalidated-rmst.R new file mode 100644 index 00000000..7aad6887 --- /dev/null +++ b/tests/testthat/test-unvalidated-rmst.R @@ -0,0 +1,115 @@ +test_that("rmst() snapshot test", { + data("ex1_delayed_effect") + observed <- rmst( + data = ex1_delayed_effect, + var_label_tte = "month", + var_label_event = "evntd", + var_label_group = "trt", + tau = 10, + reference = "0" + ) + expected <- data.frame( + rmst_arm1 = 6.495175253205431, + rmst_arm0 = 5.630125973237457, + rmst_diff = 0.8650492799679741, + z = 2.2178796367487963 + ) + expect_equal(observed, expected) +}) + +test_that("formula method matches default method", { + data("ex1_delayed_effect") + + rmst_default <- rmst( + data = ex1_delayed_effect, + var_label_tte = "month", + var_label_event = "evntd", + var_label_group = "trt", + tau = 10, + reference = "0" + ) + + rmst_formula_1 <- rmst( + data = ex1_delayed_effect, + formula = month ~ evntd + trt, + tau = 10, + reference = "0" + ) + expect_equal(rmst_formula_1, rmst_default) + + rmst_formula_2 <- rmst( + data = ex1_delayed_effect, + formula = survival::Surv(month | evntd) ~ trt, + tau = 10, + reference = "0" + ) + expect_equal(rmst_formula_2, rmst_default) + + rmst_formula_3 <- rmst( + data = ex1_delayed_effect, + formula = ~ survival::Surv(month, evntd, trt), + tau = 10, + reference = "0" + ) + expect_equal(rmst_formula_3, rmst_default) +}) + +test_that("default and formula methods of rmst are pipeable", { + data("ex1_delayed_effect") + + rmst_default <- ex1_delayed_effect |> + rmst( + var_label_tte = "month", + var_label_event = "evntd", + var_label_group = "trt", + tau = 10, + reference = "0" + ) + + rmst_formula_1 <- ex1_delayed_effect |> + rmst( + formula = month ~ evntd + trt, + tau = 10, + reference = "0" + ) + + expect_equal(rmst_formula_1, rmst_default) +}) + +test_that("formula argument throws error for bad input data", { + data("ex1_delayed_effect") + + # formula with 2 variables + expect_error( + rmst_formula_1 <- rmst( + data = ex1_delayed_effect, + formula = month ~ evntd, + tau = 10, + reference = "0" + ), + "The formula interface requires exactly 3 variables specified" + ) + + # formula with 4 variables + expect_error( + rmst_formula_1 <- rmst( + data = ex1_delayed_effect, + formula = month ~ evntd + trt + id, + tau = 10, + reference = "0" + ), + "The formula interface requires exactly 3 variables specified" + ) + + # non-formula + expect_error( + rmst_formula_1 <- rmst( + data = ex1_delayed_effect, + formula = "month ~ evntd + trt + id", + tau = 10, + reference = "0" + ), + 'inherits(formula, "formula") is not TRUE', + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-unvalidated-sim_gs_n.R b/tests/testthat/test-unvalidated-sim_gs_n.R index b6a3df23..80bc742a 100644 --- a/tests/testthat/test-unvalidated-sim_gs_n.R +++ b/tests/testthat/test-unvalidated-sim_gs_n.R @@ -154,14 +154,14 @@ test_that("Test 5: RMST", { test_that("Test 6: Milestone", { observed <- sim_gs_n( - n_sim = 3, - sample_size = 400, - enroll_rate = test_enroll_rate(), - fail_rate = test_fail_rate(), - test = milestone, - cutting = test_cutting(), - seed = 2024, - ms_time = 10 + n_sim = 3, + sample_size = 400, + enroll_rate = test_enroll_rate(), + fail_rate = test_fail_rate(), + test = milestone, + cutting = test_cutting(), + seed = 2024, + ms_time = 10 ) expected <- data.frame( method = rep("milestone", 9L),