This github repo contains the code used for the simulation study of the cheap subsampling confidence interval.
To run the simulation study, one needs to use R with the targets package and run tar_make().
The code for the introduction is included in intro_cross_validation while
the code for two simulation studies are included in
simulation_study_with_cross_validation and simulation_study_without_cross_validation.
In addition, one needs the rtmle package which, at the time of writing, are not on CRAN. The necessary packages can be installed as follows:
install.packages(c("targets","data.table", "ggplot2", "tarchetypes", "crew", "crew.cluster","devtools", "dplyr", "tidyr", "tibble", "gt", "ggpubr"))
devtools::install_github("tagteam/rtmle")An example run for the simulation study can be run by running the following code in R:
# Load the necessary packages
library(data.table)
library(rtmle)
## Working directory
setwd(
"~/phd/cheap_subsampling_simulation_study_repo/simulation_study_without_cross_validation/"
)
# Arguments
time_horizon <- 2
eta <- 0.632
sample_size <- 1000
# Load functions from simulation study
source("functions/rtmle_functions.R")
source("functions/simulation_functions.R")
# Generate some data
simulated_data <- simulate_data(n = sample_size, time_horizon = time_horizon)
# RTMLE initialization
x <- rtmle_init(
intervals = time_horizon,
name_id = "pnr",
name_outcome = "Y",
name_competing = NULL,
name_censoring = "C",
censored_levels = c(1, 0),
censored_label = 0
)
# Add data
x <- add_baseline_data(x, data = simulated_data$baseline_data)
x$data$outcome_data <- simulated_data$outcome
x$data$timevar_data <- simulated_data$timevarying_covariates
x$data$timevar_data <- x$data$timevar_data[simulated_data$regimen, on = "pnr"]
x <- protocol(x, name = "Always_A", intervention = data.frame("A" = factor("1", levels = c("0", "1"))))
x <- prepare_data(x)
# Specify the target parameter
x <- target(
x,
name = "Outcome_risk",
strategy = "additive",
estimator = "tmle",
protocols = "Always_A"
)
x <- model_formula(x)
x <- run_rtmle(x, learner = "learn_glm", time_horizon = 1:time_horizon)
x <- cheap_bootstrap(
x,
B = 5,
time_horizon = 1:time_horizon,
M = 0.632 * NROW(x$prepared_data)
)