Skip to contents

For applying the models, simulated data from clinical trials was used. The following code simulates recurrent event data for relapses in RRMS and SPMS patients. The data is then combined with baseline characteristics for RRMS and SPMS patients. The simulated data is saved as a CSV file for further use.

# Set seed for reproducibility
set.seed(123)
# Define a function to simulate recurrent event data using reda
simulate_recurrent_event_data <- function(n, max_event, mean_follow_up_time, max_follow_up_time, mstype, outcome) {
  
  simulate_individual_data <- function(id) {
    
    # Generate baseline hazard times (exponential distribution)
    basehaz <- rexp(n = pmax(pmin(round(rnorm(1, max_event/2, max_event/4)), max_event), 1), 
                    rate = 1 / mean_follow_up_time)
    basehaz <- basehaz[basehaz < max_follow_up_time]
    basehaz <- c(basehaz,max_follow_up_time)
    
    # Create a tibble for the individual's data
    tibble(
      id = id,
      status = c(rep(1, length(basehaz) - 1), 0),
      time = basehaz,
      mstype = mstype,
      outcome = outcome
    )
  }
  
  # Use purrr::map_dfr to apply the helper function to each individual and combine the results
  sim_data_all <- map_dfr(1:n, simulate_individual_data) %>%
    arrange(id, time)
  
  # Return the simulated data
  return(sim_data_all)
}
# Generate the datasets for relapses in RRMS and SPMS patients
# this parameters have been changed in order to find differences between groups
# just for illustrative purposes
rrms_relapse_data<-simulate_recurrent_event_data(n = 791,
                                                 max_event = 10,
                                                 mean_follow_up_time = 8,
                                                 max_follow_up_time = 10, 
                                                 mstype = "RRMS", 
                                                 outcome = "RELAPSE")
spms_relapse_data<-simulate_recurrent_event_data(n = 522,
                                                 max_event = 4,
                                                 mean_follow_up_time = 5,
                                                 max_follow_up_time = 10, 
                                                 mstype = "SPMS", 
                                                 outcome = "RELAPSE")
spms_relapse_data$id <- spms_relapse_data$id + 791
# Bind datasets
df <- bind_rows(rrms_relapse_data,
                spms_relapse_data)
#Add baaseline characteristics
# Function to generate data for RRMS
generate_rrms_data <- function(n) {
  tibble(
    id = 1:n,
    Age = rnorm(n, mean = 36.5, sd = 9.1),
    Sex = factor(rbinom(n, 1, prob = 0.70), labels = c("Male", "Female")),
    Race = factor(rbinom(n, 1, prob = 0.88), labels = c("No white", "White")),
    Time_since_diagnosis = rlnorm(n, meanlog = log(2), sdlog = log(5/2)),
    EDSS_overall = rlnorm(n, meanlog = log(2), sdlog = log(3.5/1.5))
  )
}

# Function to generate data for SPMS
generate_spms_data <- function(n) {
  tibble(
    id = 1:n,
    Age = rnorm(n, mean = 49.4, sd = 8.1),
    Sex = factor(rbinom(n, 1, prob = 0.63), labels = c("Male", "Female")),
    Race = factor(rbinom(n, 1, prob = 0.96), labels = c("No white", "White")),
    Time_since_diagnosis = rlnorm(n, meanlog = log(14.5), sdlog = log(22/7.8)), # approximate log-normal
    EDSS_overall = rlnorm(n, meanlog = log(6), sdlog = log(6.5/4.5)) # approximate log-normal
  )
}
# Generate the datasets
rrms_data <- generate_rrms_data(791)
spms_data <- generate_spms_data(522)
spms_data$id <- spms_data$id + 791

#Merge with the previous part
df_revents <- inner_join(bind_rows(rrms_data, spms_data),df, by="id")
kbl(df_revents[1:10,]) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
id Age Sex Race Time_since_diagnosis EDSS_overall status time mstype outcome
1 41.32254 Male White 6.099722 0.3727128 1 0.2526189 RRMS RELAPSE
1 41.32254 Male White 6.099722 0.3727128 1 0.4496878 RRMS RELAPSE
1 41.32254 Male White 6.099722 0.3727128 1 2.5320097 RRMS RELAPSE
1 41.32254 Male White 6.099722 0.3727128 0 10.0000000 RRMS RELAPSE
2 31.44477 Female White 3.371547 6.0269468 1 0.2332276 RRMS RELAPSE
2 31.44477 Female White 3.371547 6.0269468 1 1.1621344 RRMS RELAPSE
2 31.44477 Female White 3.371547 6.0269468 1 1.5062723 RRMS RELAPSE
2 31.44477 Female White 3.371547 6.0269468 1 2.2481090 RRMS RELAPSE
2 31.44477 Female White 3.371547 6.0269468 1 2.8411302 RRMS RELAPSE
2 31.44477 Female White 3.371547 6.0269468 1 3.0169426 RRMS RELAPSE

It is pertinent to mention that this type of data is not suitable for running recurrent events models (see database layout section). However, this raw data can be used for the first steps of data exploration as it is shown in the following section: descriptive analyses.