Skip to contents

Introduction

The purpose of this article is to compare the results for the Charlson comorbidity flags returned by medicalcoder::comorbidities() vs the SQL code provided by Johnson et al. (2018).

MIMIC SQL

SQL code for applying a Charlson comorbidity algorithm to the MIMIC-IV data is available from MIT Laboratory for Computational Physiology (MIT_LCP) on GitHub. The code is expected to run on Google Big Query on the MIMIC-IV data. We make a few small modifications to the code so we can evaluate the SQL locally via RSQLite and on a local data set.

mimic_charson_query <-
  scan(
    file = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/278df75ec30991ff3a6f5ceb6d2221635a085e9f/mimic-iv/concepts/comorbidity/charlson.sql",
    what = character(),
    sep = "\n"
  )

# modify the query to work in SQLite
mimic_charson_query <-
  gsub(pattern = "physionet-data.mimiciv_hosp.admissions",
       replacement = "admissions",
       x = mimic_charson_query,
       fixed = TRUE)
mimic_charson_query <-
  gsub(pattern = "physionet-data.mimiciv_hosp.diagnoses_icd",
       replacement = "diagnoses",
       x = mimic_charson_query,
       fixed = TRUE)
mimic_charson_query <-
  gsub(pattern = "physionet-data.mimiciv_derived.age",
       replacement = "ages",
       x = mimic_charson_query,
       fixed = TRUE)
mimic_charson_query <-
  gsub(pattern = "GREATEST",
       replacement = "MAX",
       x = mimic_charson_query,
       fixed = TRUE)

mimic_charson_query <- paste(mimic_charson_query, collapse = "\n")
library(medicalcoder)
library(data.table)
mdcr_for_mimic <- data.table::copy(mdcr)
setDT(mdcr_for_mimic)

# create a hospital admission id  (one admission per patient id in mdcr)
setnames(
  mdcr_for_mimic,
  old = c("patid", "code", "icdv"),
  new = c("subject_id", "icd_code", "icd_version")
)

mdcr_for_mimic[, hadm_id := paste0(subject_id, "e1")]
mdcr_for_mimic[, seq_num := 1L:.N, by = .(subject_id, hadm_id)]
mdcr_for_mimic[, age := as.integer(substr(as.character(subject_id), 1L, 2L))]
library(odbc)
library(DBI)
library(RSQLite)

con <- dbConnect(drv = RSQLite::SQLite(), dbname = ":memory:")

# add data to the data base
dbWriteTable(conn = con, name = "diagnoses",  value = mdcr_for_mimic[dx == 1L])
dbWriteTable(conn = con, name = "admissions", value = mdcr_for_mimic[, unique(.SD), .SDcols = c("subject_id", "hadm_id")])
dbWriteTable(conn = con, name = "ages",       value = mdcr_for_mimic[, unique(.SD), .SDcols = c("hadm_id", "age")])

# get the charlson results via MIMIC-IV
mimic_charlson_results <- dbGetQuery(con, mimic_charson_query)

# close DB connection
dbDisconnect(conn = con)

setDT(mimic_charlson_results)
medicalcoder_charlson_results <-
  comorbidities(
    data = mdcr_for_mimic,
    id.vars = c("subject_id", "hadm_id"),
    icd.codes = "icd_code",
    icdv.var = "icd_version",
    dx.var = "dx",
    age.var = "age",
    method = "charlson_quan2005",
    full.codes = FALSE,
    flag.method = "current",
    poa = 1L,
    primarydx = 0L
  )
delta <-
  merge(
    x = medicalcoder_charlson_results,
    y = mimic_charlson_results,
    all = TRUE,
    by = c("subject_id", "hadm_id")
  )
uniqueN(mdcr_for_mimic$hadm_id)
## [1] 38262
nrow(mimic_charlson_results)
## [1] 38262
nrow(medicalcoder_charlson_results)
## [1] 38262

Conditions without multiple severity levels are the same between the two methods.

dcolumns <- fread(text = "
medicalcoder | mimic
aidshiv      | aids
cebvd        | cerebrovascular_disease
copd         | chronic_pulmonary_disease
chf          | congestive_heart_failure
dem          | dementia
hp           | paraplegia
mi           | myocardial_infarct
pud          | peptic_ulcer_disease
pvd          | peripheral_vascular_disease
rnd          | renal_disease
rhd          | rheumatic_disease
age_score.x  | age_score.y
cci          | charlson_comorbidity_index
")
for (i in seq_len(nrow(dcolumns))) {
  x <- dcolumns[["medicalcoder"]][i]
  y <- dcolumns[["mimic"]][i]
  e <- base::substitute(identical(delta[[X]], delta[[Y]]), list(X = x, Y = y))
  print(e)
  r <- eval(e)
  print(r)
  stopifnot(r)
  delta[[x]] <- NULL
  delta[[y]] <- NULL
}
## identical(delta[["aidshiv"]], delta[["aids"]])
## [1] TRUE
## identical(delta[["cebvd"]], delta[["cerebrovascular_disease"]])
## [1] TRUE
## identical(delta[["copd"]], delta[["chronic_pulmonary_disease"]])
## [1] TRUE
## identical(delta[["chf"]], delta[["congestive_heart_failure"]])
## [1] TRUE
## identical(delta[["dem"]], delta[["dementia"]])
## [1] TRUE
## identical(delta[["hp"]], delta[["paraplegia"]])
## [1] TRUE
## identical(delta[["mi"]], delta[["myocardial_infarct"]])
## [1] TRUE
## identical(delta[["pud"]], delta[["peptic_ulcer_disease"]])
## [1] TRUE
## identical(delta[["pvd"]], delta[["peripheral_vascular_disease"]])
## [1] TRUE
## identical(delta[["rnd"]], delta[["renal_disease"]])
## [1] TRUE
## identical(delta[["rhd"]], delta[["rheumatic_disease"]])
## [1] TRUE
## identical(delta[["age_score.x"]], delta[["age_score.y"]])
## [1] TRUE
## identical(delta[["cci"]], delta[["charlson_comorbidity_index"]])
## [1] TRUE

There are three comorbidities where there are different levels of severity. medicalcoder::comorbidities() will set the less severe condition indicator to 0 when the more severe condition is flagged. Both methods only consider the more severe case in the index scoring.

Diabetes - medicalcoder::comorbidities() sets the flag for diabetes without complication to 0 when diabetes with complication is present. MIMIC code retains the non-complex case.

delta[dm == 1L & dmc == 1L, .N == 0L] # medicalcoder
## [1] TRUE
delta[diabetes_without_cc == 0L & diabetes_with_cc == 0L, .N > 0L] # MIMIC
## [1] TRUE

delta[diabetes_without_cc == 0L & diabetes_with_cc == 0L, .N > 0L & all(dm == 0L) & all(dmc == 0L)]
## [1] TRUE
delta[diabetes_without_cc == 1L & diabetes_with_cc == 0L, .N > 0L & all(dm == 1L) & all(dmc == 0L)]
## [1] TRUE
delta[diabetes_without_cc == 0L & diabetes_with_cc == 1L, .N > 0L &                all(dmc == 1L)]
## [1] TRUE
delta[diabetes_without_cc == 1L & diabetes_with_cc == 1L, .N > 0L &                all(dmc == 1L)]
## [1] TRUE

delta[dm == 0L & dmc == 0L, .N > 0L & all(diabetes_without_cc == 0L) & all(diabetes_with_cc == 0L)]
## [1] TRUE
delta[dm == 1L & dmc == 0L, .N > 0L & all(diabetes_without_cc == 1L) & all(diabetes_with_cc == 0L)]
## [1] TRUE
delta[dm == 0L & dmc == 1L, .N > 0L &                                 all(diabetes_with_cc == 1L)]
## [1] TRUE
delta[dm == 1L & dmc == 1L, .N == 0L]
## [1] TRUE

delta[, dm := NULL]
delta[, dmc := NULL]
delta[, diabetes_with_cc := NULL]
delta[, diabetes_without_cc := NULL]

Cancer - medicalcoder::comorbidities() sets malignant cancer (mal) to 0 when metastatic solid tumor (mst) is present. MIMIC retains both flags.

delta[mal == 1L & mst == 1L, .N == 0L] # medicalcoder
## [1] TRUE
delta[malignant_cancer == 1L & metastatic_solid_tumor == 1L, .N > 0L] # MIMIC
## [1] TRUE

delta[malignant_cancer == 0L & metastatic_solid_tumor == 0L, .N > 0L & all(mal == 0L) & all(mst == 0L)]
## [1] TRUE
delta[malignant_cancer == 1L & metastatic_solid_tumor == 0L, .N > 0L & all(mal == 1L) & all(mst == 0L)]
## [1] TRUE
delta[malignant_cancer == 0L & metastatic_solid_tumor == 1L, .N > 0L &                 all(mst == 1L)]
## [1] TRUE
delta[malignant_cancer == 1L & metastatic_solid_tumor == 1L, .N > 0L &                 all(mst == 1L)]
## [1] TRUE

delta[mal == 0L & mst == 0L, .N > 0L & all(malignant_cancer == 0L) & all(metastatic_solid_tumor == 0L)]
## [1] TRUE
delta[mal == 1L & mst == 0L, .N > 0L & all(malignant_cancer == 1L) & all(metastatic_solid_tumor == 0L)]
## [1] TRUE
delta[mal == 0L & mst == 1L, .N > 0L &                              all(metastatic_solid_tumor == 1L)]
## [1] TRUE
delta[mal == 1L & mst == 1L, .N == 0L]
## [1] TRUE

delta[, mal := NULL]
delta[, mst := NULL]
delta[, malignant_cancer := NULL]
delta[, metastatic_solid_tumor := NULL]

Liver disease - medicalcoder::comorbidities() sets the flag for mild liver disease (mld) to 0 when moderate/severe liver disease (msld) is flagged. MIMIC retains both flags.

delta[mld == 1L & msld == 1L, .N == 0L] # medicalcoder
## [1] TRUE
delta[mild_liver_disease == 1L & severe_liver_disease == 1L, .N > 0L] # MIMIC
## [1] TRUE

delta[mild_liver_disease == 0L & severe_liver_disease == 0L, .N > 0L & all(mld == 0L) & all(msld == 0L)]
## [1] TRUE
delta[mild_liver_disease == 1L & severe_liver_disease == 0L, .N > 0L & all(mld == 1L) & all(msld == 0L)]
## [1] TRUE
delta[mild_liver_disease == 0L & severe_liver_disease == 1L, .N > 0L &                 all(msld == 1L)]
## [1] TRUE
delta[mild_liver_disease == 1L & severe_liver_disease == 1L, .N > 0L &                 all(msld == 1L)]
## [1] TRUE

delta[mld == 0L & msld == 0L, .N > 0L & all(mild_liver_disease == 0L) & all(severe_liver_disease == 0L)]
## [1] TRUE
delta[mld == 1L & msld == 0L, .N > 0L & all(mild_liver_disease == 1L) & all(severe_liver_disease == 0L)]
## [1] TRUE
delta[mld == 0L & msld == 1L, .N > 0L &                                all(severe_liver_disease == 1L)]
## [1] TRUE
delta[mld == 1L & msld == 1L, .N == 0L]
## [1] TRUE

delta[, mld := NULL]
delta[, msld := NULL]
delta[, mild_liver_disease := NULL]
delta[, severe_liver_disease := NULL]

All that is left in the delta data.frame are the id.vars and the num_cmrb, and cmrb_flag. These columns are from medicalcoder::comorbidities() and report the number of comorbidities flagged and indicator for any comorbidity.

str(delta)
## Classes 'medicalcoder_comorbidities', 'data.table' and 'data.frame': 38262 obs. of  4 variables:
##  $ subject_id: int  10000 10002 10005 10006 10008 10010 10014 10015 10017 10018 ...
##  $ hadm_id   : chr  "10000e1" "10002e1" "10005e1" "10006e1" ...
##  $ num_cmrb  : int  1 0 1 0 0 0 0 1 0 0 ...
##  $ cmrb_flag : int  1 0 1 0 0 0 0 1 0 0 ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr [1:2] "subject_id" "hadm_id"
##  - attr(*, "index")= int(0)

References

Johnson, Alistair E W, David J Stone, Leo A Celi, and Tom J Pollard. 2018. “The MIMIC Code Repository: Enabling Reproducibility in Critical Care Research.” Journal of the American Medical Informatics Association 25 (1): 32–39.