Clinical Trials Statistical Methods

Overview

Comprehensive clinical trial design and analysis methods in R covering sample size calculation, randomization, interim analyses, multiplicity adjustment, and regulatory-compliant statistical methods.

Sample Size Calculation

Two-Group Comparisons

library(pwr)

# Two-sample t-test
pwr.t.test(
  d = 0.5,           # Cohen's d effect size
  sig.level = 0.05,
  power = 0.80,
  type = "two.sample",
  alternative = "two.sided"
)

# Proportions (chi-square)
pwr.2p.test(
  h = ES.h(p1 = 0.6, p2 = 0.4),  # Cohen's h
  sig.level = 0.05,
  power = 0.80
)

# Two proportions (unequal groups)
pwr.2p2n.test(
  h = ES.h(p1 = 0.6, p2 = 0.4),
  n1 = 100,
  sig.level = 0.05
)

Survival Endpoints

library(gsDesign)

# Log-rank test sample size
nSurv(
  lambda1 = log(2)/12,  # Control median = 12 months
  lambda2 = log(2)/18,  # Treatment median = 18 months (HR = 0.67)
  Ts = 24,              # Study duration
  Tr = 12,              # Accrual duration
  alpha = 0.025,        # One-sided

  beta = 0.20,          # 80% power
  ratio = 1             # 1:1 randomization
)

# Using rpact
library(rpact)
getSampleSizeSurvival(
  hazardRatio = 0.67,
  lambda1 = log(2)/12,
  accrualTime = 12,
  followUpTime = 12,
  alpha = 0.025,
  beta = 0.20,
  allocationRatioPlanned = 1
)

Non-Inferiority Trials

library(TrialSize)

# Non-inferiority for proportions
TwoSampleProportion.NIS(
  p = 0.80,           # Expected proportion in both groups
  delta = 0.10,       # Non-inferiority margin
  alpha = 0.025,      # One-sided
  power = 0.80
)

# Non-inferiority for means
TwoSampleMean.NIS(
  sigma = 10,         # SD
  delta = 5,          # Non-inferiority margin
  alpha = 0.025,
  power = 0.80
)

Randomization

Simple Randomization

# Base R simple randomization
set.seed(123)
n <- 100
treatment <- sample(c("A", "B"), n, replace = TRUE)

# blockrand package
library(blockrand)
randomization <- blockrand(
  n = 100,
  num.levels = 2,
  levels = c("Treatment", "Control"),
  id.prefix = "PAT",
  block.prefix = "BLK"
)

Stratified Block Randomization

library(blockrand)

# Generate lists for each stratum
strata <- expand.grid(
  sex = c("Male", "Female"),
  age_group = c("<65", ">=65")
)

rand_lists <- lapply(1:nrow(strata), function(i) {
  blockrand(
    n = 50,
    num.levels = 2,
    levels = c("Treatment", "Control"),
    block.sizes = c(2, 4, 6),  # Variable block sizes
    stratum = paste(strata[i, ], collapse = "_")
  )
})

full_list <- do.call(rbind, rand_lists)

Minimization

library(Minirand)

# Minimization randomization
minirand(
  covariates = data.frame(
    sex = c("M", "F", "M"),
    age = c("young", "old", "young"),
    center = c("A", "A", "B")
  ),
  treatment = c("A", "B"),
  ratio = c(1, 1),
  p = 0.85  # Probability of assigning to minimizing treatment
)

Group Sequential Designs

gsDesign Package

library(gsDesign)

# O'Brien-Fleming boundaries
gs_design <- gsDesign(
  k = 3,               # Number of analyses
  test.type = 2,       # Two-sided symmetric
  alpha = 0.025,       # One-sided alpha
  beta = 0.20,         # Type II error
  sfu = "OF",          # O'Brien-Fleming spending function
  timing = c(0.5, 0.75, 1)  # Information fractions
)

# Summary
gs_design

# Boundaries
gs_design$upper$bound  # Upper efficacy boundary
gs_design$lower$bound  # Lower futility boundary

# Plot
plot(gs_design)

Alpha Spending Functions

# Pocock
gs_pocock <- gsDesign(k = 3, sfu = "Pocock")

# Hwang-Shih-DeCani
gs_hsd <- gsDesign(k = 3, sfu = sfHSD, sfupar = -4)

# Power family (Kim-DeMets)
gs_power <- gsDesign(k = 3, sfu = sfPower, sfupar = 2)

# Custom spending
gs_custom <- gsDesign(
  k = 3,
  sfu = sfPoints,
  sfupar = c(0.01, 0.03, 0.025),  # Cumulative alpha at each look
  timing = c(0.5, 0.75, 1)
)

rpact Package

library(rpact)

# Design
design <- getDesignGroupSequential(
  kMax = 3,
  alpha = 0.025,
  beta = 0.20,
  sided = 1,
  typeOfDesign = "OF",  # O'Brien-Fleming
  informationRates = c(0.5, 0.75, 1)
)

# Sample size
sampleSize <- getSampleSizeMeans(
  design = design,
  alternative = 0.5,
  stDev = 1
)

# Interim analysis
getAnalysisResults(
  design,
  dataInput = getDataset(
    n = c(50, 75),
    means = c(0.3, 0.4),
    stDevs = c(1, 1)
  )
)

Multiplicity Adjustment

P-value Adjustments

# Bonferroni
p.adjust(p_values, method = "bonferroni")

# Holm (step-down)
p.adjust(p_values, method = "holm")

# Hochberg (step-up)
p.adjust(p_values, method = "hochberg")

# Benjamini-Hochberg (FDR)
p.adjust(p_values, method = "BH")

# Hommel
p.adjust(p_values, method = "hommel")

Graphical Approaches

library(gMCP)

# Define hypothesis graph
graph <- matrix2graph(
  # Transition matrix
  m = matrix(c(
    0, 0.5, 0.5, 0,
    0.5, 0, 0, 0.5,
    0.5, 0, 0, 0.5,
    0, 0.5, 0.5, 0
  ), nrow = 4, byrow = TRUE),
  # Initial weights
  w = c(0.5, 0.5, 0, 0)
)

# Set hypothesis names
nodeNames(graph) <- c("H1_OS", "H2_OS", "H1_PFS", "H2_PFS")

# Plot graph
plot(graph)

# Perform test
gMCP(
  graph = graph,
  pvalues = c(0.01, 0.03, 0.02, 0.04),
  alpha = 0.025
)

Gatekeeping Procedures

library(multcomp)

# Serial gatekeeping
# Primary must be significant before testing secondary
serial_gate <- function(p_primary, p_secondary, alpha = 0.05) {
  if (p_primary < alpha) {
    # Primary significant, test secondary at full alpha
    return(c(primary = p_primary < alpha, secondary = p_secondary < alpha))
  } else {
    return(c(primary = FALSE, secondary = FALSE))
  }
}

Missing Data

Mixed Models for Repeated Measures (MMRM)

library(mmrm)

# MMRM model
mmrm_fit <- mmrm(
  formula = change ~ treatment * visit + baseline + us(visit | subject),
  data = long_data,
  weights = NULL,
  reml = TRUE
)

# Least squares means
library(emmeans)
emmeans(mmrm_fit, ~ treatment | visit)

# Treatment comparison at each visit
emmeans(mmrm_fit, pairwise ~ treatment | visit)

Multiple Imputation

library(mice)

# Create imputations
imp <- mice(
  data = df,
  m = 20,           # Number of imputations
  method = "pmm",   # Predictive mean matching
  maxit = 10
)

# Analyze each imputed dataset
analyses <- with(imp, lm(outcome ~ treatment + covariates))

# Pool results (Rubin's rules)
pooled <- pool(analyses)
summary(pooled)

Tipping Point Analysis

# Sensitivity analysis for MNAR
library(rbmi)

# Define imputation method with delta adjustment
draws <- draws(
  data = data,
  data_ice = ice_data,
  method = method_bayes(),
  vars = vars
)

# Impute with different delta values
impute(draws, references = c("Control" = "Control", "Treatment" = "Control"))

Subgroup Analysis

Forest Plots for Subgroups

library(forestplot)

# Calculate treatment effects by subgroup
subgroup_effects <- df |>
  group_by(subgroup) |>
  summarise(
    n = n(),
    effect = mean(outcome[trt == 1]) - mean(outcome[trt == 0]),
    se = sqrt(var(outcome[trt == 1])/sum(trt == 1) +
              var(outcome[trt == 0])/sum(trt == 0)),
    lower = effect - 1.96 * se,
    upper = effect + 1.96 * se
  )

# Create forest plot
forestplot(
  labeltext = subgroup_effects$subgroup,
  mean = subgroup_effects$effect,
  lower = subgroup_effects$lower,
  upper = subgroup_effects$upper,
  zero = 0,
  xlab = "Treatment Effect (95% CI)"
)

Interaction Tests

# Test for treatment-by-subgroup interaction
interaction_model <- lm(outcome ~ treatment * subgroup, data = df)
anova(interaction_model)

# Quantitative interaction test
library(QI)
qi_test(outcome ~ treatment | subgroup, data = df)

Regulatory Considerations

ICH E9 Estimands Framework

# Define estimand components:
# 1. Population
# 2. Variable (endpoint)
# 3. Intercurrent events and strategies
# 4. Population-level summary

# Example: Treatment policy estimand with MMRM
mmrm_fit <- mmrm(
  change ~ treatment * visit + baseline + us(visit | subject),
  data = data_all_randomized  # Include all randomized (ITT)
)

CONSORT Diagram

library(consort)

# Create CONSORT diagram
consort_plot(
  data = trial_data,
  orders = c(
    Assessed = "Assessed for eligibility",
    Randomized = "Randomized",
    Arm_A = "Allocated to Arm A",
    Arm_B = "Allocated to Arm B",
    Lost_A = "Lost to follow-up (Arm A)",
    Lost_B = "Lost to follow-up (Arm B)",
    Analyzed_A = "Analyzed (Arm A)",
    Analyzed_B = "Analyzed (Arm B)"
  ),
  side_box = c("Excluded", "Discontinued_A", "Discontinued_B"),
  cex = 0.8
)

Key Packages Summary

Package	Purpose
pwr	Power analysis
gsDesign	Group sequential designs
rpact	Adaptive designs
blockrand	Randomization
gMCP	Graphical multiplicity
mmrm	MMRM analysis
mice	Multiple imputation
rbmi	Reference-based imputation
emmeans	Least squares means
consort	CONSORT diagrams