Set random seed for reproducibility
set.seed(1234)
library(tidyverse)
library(lubridate)
library(ggpubr)
library(ranger)
library(vivid)
Read in data
aml.df <- read.csv("./data/aml.all.df.csv")
Convert dates
aml.df$dot <- ymd(aml.df$dot)
aml.df$dor <- ymd(aml.df$dor)
aml.df$bdate <- ymd(aml.df$bdate)
aml.df$pdate <- ymd(aml.df$pdate)
Convert all character strings to factors
aml.df <- aml.df %>% mutate_if(is.character,as.factor)
Make outcome a binary variable (0/1 relapse)
aml.df$rbin <- factor(aml.df$rbin, levels = c("yes", "no"))
Filter out any tests that are post-relapse
aml.df <- aml.df[which(aml.df$bdate < aml.df$dor | is.na(aml.df$dor)), ]
Filter out relapse >720 days
aml.df <- aml.df[which(aml.df$rbin == "no" | aml.df$rtime < 720),]
Filter out any missing tests
aml.df <- aml.df[!is.na(aml.df$bmc_cdw) & !is.na(aml.df$bmc_cd3) &
!is.na(aml.df$bmc_cd15) & !is.na(aml.df$bmc_cd34) &
!is.na(aml.df$pbc_cdw) & !is.na(aml.df$pbc_cd3) &
!is.na(aml.df$pbc_cd15) & !is.na(aml.df$pbc_cd34),]
aml.df <<- aml.df
aml.df <- aml.df %>%
select(rbin, sex, txage,
rstatprtx, ghgp, tbi,
bmc_cdw, bmc_cd3, bmc_cd15, bmc_cd34,
pbc_cdw, pbc_cd3, pbc_cd15, pbc_cd34, ID)
aml.df <- aml.df %>%
mutate_if(is.character, as.factor) %>%
mutate_if(is.integer, as.numeric) %>%
# mutate(abd = tolower(abd)) %>%
drop_na() %>%
droplevels() %>%
select(-ID)
aml_rf <- ranger(rbin ~ ., aml.df,
importance = 'impurity',
probability = TRUE)
aml_rf
## Ranger result
##
## Call:
## ranger(rbin ~ ., aml.df, importance = "impurity", probability = TRUE)
##
## Type: Probability estimation
## Number of trees: 500
## Sample size: 102
## Number of independent variables: 13
## Mtry: 3
## Target node size: 10
## Variable importance mode: impurity
## Splitrule: gini
## OOB prediction error (Brier s.): 0.127328
aml_vivi <- vivi(fit = aml_rf,
data = aml.df,
response = "rbin",
importanceType = "impurity")
## Embedded impurity variable importance method used.
## Calculating interactions...
viviHeatmap(mat = aml_vivi)
viviNetwork(mat = aml_vivi)
Stanford Medicine, dcshyr@stanford.edu↩︎
University of Utah, simon.brewer@geog.utah.edu↩︎