Libraries

set.seed(42)
library(tidyverse)
library(mlr3verse)
library(mlr3tuning)
library(mlr3tuningspaces)

Data

Read in data

dat = read.csv("./data/aml.all.df.csv")

Make outcome a binary variable (0/1 relapse)

dat$rbin = factor(dat$rbin, levels = c("yes", "no"))

Filter out any tests that are post-relapse

dat = dat[which(dat$bdate < dat$dor | is.na(dat$dor)), ]

Filter out relapse >720 days

dat = dat[which(dat$rbin == "no" | dat$rtime < 720),]

Filter out any missing tests

dat = dat[!is.na(dat$bmc_cdw) & !is.na(dat$bmc_cd3) & 
                  !is.na(dat$bmc_cd15) & !is.na(dat$bmc_cd34) &
                  !is.na(dat$pbc_cdw) & !is.na(dat$pbc_cd3) & 
                  !is.na(dat$pbc_cd15) & !is.na(dat$pbc_cd34),]

Get \(p(relapse)\) for baseline model

prbin = sum(as.numeric(dat$rbin)-1) / nrow(dat)
# dat2 <- dat %>%
#   select(rbin, txage, hla, tbi, abd, ci, mtx, mmf, agvhd, cgvhd,
#          bmc_cdw, bmc_cd3, bmc_cd15, bmc_cd34, 
#          pbc_cdw, pbc_cd3, pbc_cd15, pbc_cd34, ID)
dat2 <- dat %>%
  select(rbin, sex, txage, 
         rstatprtx, ghgp, tbi, 
         bmc_cdw, bmc_cd3, bmc_cd15, bmc_cd34, 
         pbc_cdw, pbc_cd3, pbc_cd15, pbc_cd34, ID)

dat2 <- dat2 %>% 
  mutate_if(is.character, as.factor)  %>% 
  mutate_if(is.integer, as.numeric) %>%
  # mutate(abd = tolower(abd)) %>%
  drop_na() %>%
  droplevels()

Tuning

Set up task

task_chim <- TaskClassif$new(id = "all", backend = dat2, 
                              target = "rbin")

Define patients for use in cross validation

# task_chim$col_roles$group <- "ID"
task_chim$set_col_roles("ID", remove_from = 'feature')

tune learner with default search space

instance_rf = tune(
  method = "grid_search",
  task = task_chim,
  learner = lts(lrn("classif.ranger")),
  resampling = rsmp ("cv", folds = 5),
  measure = msr("classif.bacc"),
  term_evals = 100
)
instance_rf$result
##    mtry.ratio replace sample.fraction num.trees learner_param_vals  x_domain
## 1:          1    TRUE               1      1556          <list[5]> <list[4]>
##    classif.bacc
## 1:     0.754902

  1. Stanford Medicine, ↩︎

  2. University of Utah, ↩︎