medicalcoder vs mimic
Source:vignettes/articles/medicalcoder-vs-mimic.Rmd
medicalcoder-vs-mimic.RmdIntroduction
The purpose of this article is to compare the results for the
Charlson comorbidity flags returned by
medicalcoder::comorbidities() vs the SQL code provided by
Johnson et al. (2018).
MIMIC SQL
SQL code for applying a Charlson comorbidity algorithm to the MIMIC-IV data is available from MIT Laboratory for Computational Physiology (MIT_LCP) on GitHub. The code is expected to run on Google Big Query on the MIMIC-IV data. We make a few small modifications to the code so we can evaluate the SQL locally via RSQLite and on a local data set.
The SQL file used here is vendored from mimic-code
commit 278df75ec30991ff3a6f5ceb6d2221635a085e9f so this
article does not depend on network access during rendering.
mimic_charson_query <-
scan(
file = system.file(
"sql", "mimic-iv-charlson-278df75.sql",
package = "medicalcoder",
mustWork = TRUE
),
what = character(),
sep = "\n"
)
# modify the query to work in SQLite
mimic_charson_query <-
gsub(pattern = "physionet-data.mimiciv_hosp.admissions",
replacement = "admissions",
x = mimic_charson_query,
fixed = TRUE)
mimic_charson_query <-
gsub(pattern = "physionet-data.mimiciv_hosp.diagnoses_icd",
replacement = "diagnoses",
x = mimic_charson_query,
fixed = TRUE)
mimic_charson_query <-
gsub(pattern = "physionet-data.mimiciv_derived.age",
replacement = "ages",
x = mimic_charson_query,
fixed = TRUE)
mimic_charson_query <-
gsub(pattern = "GREATEST",
replacement = "MAX",
x = mimic_charson_query,
fixed = TRUE)
mimic_charson_query <- paste(mimic_charson_query, collapse = "\n")
library(medicalcoder)
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:base':
##
## %notin%
mdcr_for_mimic <- data.table::copy(mdcr)
setDT(mdcr_for_mimic)
# create a hospital admission id (one admission per patient id in mdcr)
setnames(
mdcr_for_mimic,
old = c("patid", "code", "icdv"),
new = c("subject_id", "icd_code", "icd_version")
)
mdcr_for_mimic[, hadm_id := paste0(subject_id, "e1")]
mdcr_for_mimic[, seq_num := 1L:.N, by = .(subject_id, hadm_id)]
mdcr_for_mimic[, age := as.integer(substr(as.character(subject_id), 1L, 2L))]
library(odbc)
library(DBI)
library(RSQLite)
con <- dbConnect(drv = RSQLite::SQLite(), dbname = ":memory:")
# add data to the data base
dbWriteTable(conn = con, name = "diagnoses", value = mdcr_for_mimic[dx == 1L])
dbWriteTable(conn = con, name = "admissions", value = mdcr_for_mimic[, unique(.SD), .SDcols = c("subject_id", "hadm_id")])
dbWriteTable(conn = con, name = "ages", value = mdcr_for_mimic[, unique(.SD), .SDcols = c("hadm_id", "age")])
# get the charlson results via MIMIC-IV
mimic_charlson_results <- dbGetQuery(con, mimic_charson_query)
# close DB connection
dbDisconnect(conn = con)
setDT(mimic_charlson_results)
medicalcoder_charlson_results <-
comorbidities(
data = mdcr_for_mimic,
id.vars = c("subject_id", "hadm_id"),
icd.codes = "icd_code",
icdv.var = "icd_version",
dx.var = "dx",
age.var = "age",
method = "charlson_quan2005",
full.codes = FALSE,
flag.method = "current",
poa = 1L,
primarydx = 0L
)
delta <-
merge(
x = medicalcoder_charlson_results,
y = mimic_charlson_results,
all = TRUE,
by = c("subject_id", "hadm_id")
)
uniqueN(mdcr_for_mimic$hadm_id)
## [1] 38262
nrow(mimic_charlson_results)
## [1] 38262
nrow(medicalcoder_charlson_results)
## [1] 38262Conditions with multiple severity levels, and the metastatic cancer flags differ between the two methods.
dcolumns <- fread(text = "
medicalcoder | mimic
aidshiv | aids
mal | malignant_cancer
cebvd | cerebrovascular_disease
copd | chronic_pulmonary_disease
chf | congestive_heart_failure
dem | dementia
dmc | diabetes_with_cc
dm | diabetes_without_cc
hp | paraplegia
mld | mild_liver_disease
msld | severe_liver_disease
mi | myocardial_infarct
pud | peptic_ulcer_disease
pvd | peripheral_vascular_disease
rnd | renal_disease
rhd | rheumatic_disease
age_score.x | age_score.y
cci | charlson_comorbidity_index
")
for (i in seq_len(nrow(dcolumns))) {
x <- dcolumns[["medicalcoder"]][i]
y <- dcolumns[["mimic"]][i]
e <- base::substitute(identical(delta[[X]], delta[[Y]]), list(X = x, Y = y))
print(e)
r <- eval(e)
print(r)
#if (r) {
# delta[[x]] <- NULL
# delta[[y]] <- NULL
#}
}
## identical(delta[["aidshiv"]], delta[["aids"]])
## [1] TRUE
## identical(delta[["mal"]], delta[["malignant_cancer"]])
## [1] FALSE
## identical(delta[["cebvd"]], delta[["cerebrovascular_disease"]])
## [1] TRUE
## identical(delta[["copd"]], delta[["chronic_pulmonary_disease"]])
## [1] TRUE
## identical(delta[["chf"]], delta[["congestive_heart_failure"]])
## [1] TRUE
## identical(delta[["dem"]], delta[["dementia"]])
## [1] TRUE
## identical(delta[["dmc"]], delta[["diabetes_with_cc"]])
## [1] TRUE
## identical(delta[["dm"]], delta[["diabetes_without_cc"]])
## [1] FALSE
## identical(delta[["hp"]], delta[["paraplegia"]])
## [1] TRUE
## identical(delta[["mld"]], delta[["mild_liver_disease"]])
## [1] FALSE
## identical(delta[["msld"]], delta[["severe_liver_disease"]])
## [1] TRUE
## identical(delta[["mi"]], delta[["myocardial_infarct"]])
## [1] TRUE
## identical(delta[["pud"]], delta[["peptic_ulcer_disease"]])
## [1] TRUE
## identical(delta[["pvd"]], delta[["peripheral_vascular_disease"]])
## [1] TRUE
## identical(delta[["rnd"]], delta[["renal_disease"]])
## [1] TRUE
## identical(delta[["rhd"]], delta[["rheumatic_disease"]])
## [1] TRUE
## identical(delta[["age_score.x"]], delta[["age_score.y"]])
## [1] TRUE
## identical(delta[["cci"]], delta[["charlson_comorbidity_index"]])
## [1] FALSEThere are three comorbidities where there are different levels of
severity. medicalcoder::comorbidities() will set the less
severe condition indicator to 0 when the more severe condition is
flagged. Both methods only consider the more severe case in the index
scoring.
Diabetes - medicalcoder::comorbidities() sets the flag
for diabetes without complication to 0 when diabetes with complication
is present. MIMIC code retains the non-complex case.
delta[dm == 1L & dmc == 1L, .N == 0L] # medicalcoder
## [1] TRUE
delta[diabetes_without_cc == 0L & diabetes_with_cc == 0L, .N > 0L] # MIMIC
## [1] TRUE
delta[diabetes_without_cc == 0L & diabetes_with_cc == 0L, .N > 0L & all(dm == 0L) & all(dmc == 0L)]
## [1] TRUE
delta[diabetes_without_cc == 1L & diabetes_with_cc == 0L, .N > 0L & all(dm == 1L) & all(dmc == 0L)]
## [1] TRUE
delta[diabetes_without_cc == 0L & diabetes_with_cc == 1L, .N > 0L & all(dmc == 1L)]
## [1] TRUE
delta[diabetes_without_cc == 1L & diabetes_with_cc == 1L, .N > 0L & all(dmc == 1L)]
## [1] TRUE
delta[dm == 0L & dmc == 0L, .N > 0L & all(diabetes_without_cc == 0L) & all(diabetes_with_cc == 0L)]
## [1] TRUE
delta[dm == 1L & dmc == 0L, .N > 0L & all(diabetes_without_cc == 1L) & all(diabetes_with_cc == 0L)]
## [1] TRUE
delta[dm == 0L & dmc == 1L, .N > 0L & all(diabetes_with_cc == 1L)]
## [1] TRUE
delta[dm == 1L & dmc == 1L, .N == 0L]
## [1] TRUE
delta[, dm := NULL]
delta[, dmc := NULL]
delta[, diabetes_with_cc := NULL]
delta[, diabetes_without_cc := NULL]Cancer - medicalcoder::comorbidities() sets malignant
cancer (mal) to 0 when metastatic solid tumor (mst) is present. MIMIC
retains both flags.
delta[mal == 1L & mst == 1L, .N == 0L] # medicalcoder
## [1] TRUE
delta[malignant_cancer == 1L & metastatic_solid_tumor == 1L, .N > 0L] # MIMIC
## [1] TRUE
delta[malignant_cancer == 0L & metastatic_solid_tumor == 0L, .N > 0L & all(mal == 0L) & all(mst == 0L)]
## [1] FALSE
delta[malignant_cancer == 1L & metastatic_solid_tumor == 0L, .N > 0L & all(mal == 1L) & all(mst == 0L)]
## [1] TRUE
delta[malignant_cancer == 0L & metastatic_solid_tumor == 1L, .N > 0L & all(mst == 1L)]
## [1] TRUE
delta[malignant_cancer == 1L & metastatic_solid_tumor == 1L, .N > 0L & all(mst == 1L)]
## [1] TRUE
delta[mal == 0L & mst == 0L, .N > 0L & all(malignant_cancer == 0L) & all(metastatic_solid_tumor == 0L)]
## [1] TRUE
delta[mal == 1L & mst == 0L, .N > 0L & all(malignant_cancer == 1L) & all(metastatic_solid_tumor == 0L)]
## [1] TRUE
delta[mal == 0L & mst == 1L, .N > 0L & all(metastatic_solid_tumor == 1L)]
## [1] FALSE
delta[mal == 1L & mst == 1L, .N == 0L]
## [1] TRUEAdditionally, ICD-10 codes from CMS of the form C7A.x are not mapped by the MIMIC codes to metastatic_solid_tumor, but medicalcoder does map these codes to that comorbidity.
subset(
merge(
x = mdcr_for_mimic,
y = subset(delta, mst == 1 & metastatic_solid_tumor == 0, select = c("subject_id", "hadm_id")),
all = FALSE,
by = c("subject_id", "hadm_id")
),
grepl("^C7[A-Z]", icd_code)
)
## Key: <subject_id, hadm_id>
## subject_id hadm_id icd_version icd_code dx seq_num age
## <int> <char> <int> <char> <int> <int> <int>
## 1: 25628 25628e1 10 C7A098 1 10 25
## 2: 90045 90045e1 10 C7A8 1 1 90
## 3: 90045 90045e1 10 C7B8 1 2 90
## 4: 99058 99058e1 10 C7A8 1 2 99
subset(medicalcoder::get_icd_codes(with.descriptions = TRUE),
full_code %in% c("C7A.098", "C7A.8", "C7B.8"))
## icdv dx full_code code src known_start known_end assignable_start
## 148812 10 1 C7A.098 C7A098 cms 2014 2026 2014
## 148814 10 1 C7A.8 C7A8 cms 2014 2026 2014
## 148824 10 1 C7B.8 C7B8 cms 2014 2026 2014
## assignable_end desc desc_start
## 148812 2026 Malignant carcinoid tumors of other sites 2014
## 148814 2026 Other malignant neuroendocrine tumors 2014
## 148824 2026 Other secondary neuroendocrine tumors 2014
## desc_end
## 148812 2026
## 148814 2026
## 148824 2026
delta[, mal := NULL]
delta[, mst := NULL]
delta[, malignant_cancer := NULL]
delta[, metastatic_solid_tumor := NULL]Liver disease - medicalcoder::comorbidities() sets the
flag for mild liver disease (mld) to 0 when moderate/severe liver
disease (msld) is flagged. MIMIC retains both flags.
delta[mld == 1L & msld == 1L, .N == 0L] # medicalcoder
## [1] TRUE
delta[mild_liver_disease == 1L & severe_liver_disease == 1L, .N > 0L] # MIMIC
## [1] TRUE
delta[mild_liver_disease == 0L & severe_liver_disease == 0L, .N > 0L & all(mld == 0L) & all(msld == 0L)]
## [1] TRUE
delta[mild_liver_disease == 1L & severe_liver_disease == 0L, .N > 0L & all(mld == 1L) & all(msld == 0L)]
## [1] TRUE
delta[mild_liver_disease == 0L & severe_liver_disease == 1L, .N > 0L & all(msld == 1L)]
## [1] TRUE
delta[mild_liver_disease == 1L & severe_liver_disease == 1L, .N > 0L & all(msld == 1L)]
## [1] TRUE
delta[mld == 0L & msld == 0L, .N > 0L & all(mild_liver_disease == 0L) & all(severe_liver_disease == 0L)]
## [1] TRUE
delta[mld == 1L & msld == 0L, .N > 0L & all(mild_liver_disease == 1L) & all(severe_liver_disease == 0L)]
## [1] TRUE
delta[mld == 0L & msld == 1L, .N > 0L & all(severe_liver_disease == 1L)]
## [1] TRUE
delta[mld == 1L & msld == 1L, .N == 0L]
## [1] TRUE
delta[, mld := NULL]
delta[, msld := NULL]
delta[, mild_liver_disease := NULL]
delta[, severe_liver_disease := NULL]All that is left in the delta data.frame
are the id.vars and the num_cmrb, and
cmrb_flag. These columns are from
medicalcoder::comorbidities() and report the number of
comorbidities flagged and indicator for any comorbidity.
str(delta)
## Classes 'medicalcoder_comorbidities', 'data.table' and 'data.frame': 38262 obs. of 30 variables:
## $ subject_id : int 10000 10002 10005 10006 10008 10010 10014 10015 10017 10018 ...
## $ hadm_id : chr "10000e1" "10002e1" "10005e1" "10006e1" ...
## $ aidshiv : int 0 0 0 0 0 0 0 0 0 0 ...
## $ cebvd : int 0 0 0 0 0 0 0 0 0 0 ...
## $ copd : int 1 0 0 0 0 0 0 0 0 0 ...
## $ chf : int 0 0 0 0 0 0 0 0 0 0 ...
## $ dem : int 0 0 0 0 0 0 0 0 0 0 ...
## $ hp : int 0 0 0 0 0 0 0 1 0 0 ...
## $ mi : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pud : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pvd : int 0 0 0 0 0 0 0 0 0 0 ...
## $ rnd : int 0 0 0 0 0 0 0 0 0 0 ...
## $ rhd : int 0 0 0 0 0 0 0 0 0 0 ...
## $ num_cmrb : int 1 0 1 0 0 0 0 1 0 0 ...
## $ cmrb_flag : int 1 0 1 0 0 0 0 1 0 0 ...
## $ cci : int 1 0 6 0 0 0 0 2 0 0 ...
## $ age_score.x : int 0 0 0 0 0 0 0 0 0 0 ...
## $ age_score.y : int 0 0 0 0 0 0 0 0 0 0 ...
## $ myocardial_infarct : int 0 0 0 0 0 0 0 0 0 0 ...
## $ congestive_heart_failure : int 0 0 0 0 0 0 0 0 0 0 ...
## $ peripheral_vascular_disease: int 0 0 0 0 0 0 0 0 0 0 ...
## $ cerebrovascular_disease : int 0 0 0 0 0 0 0 0 0 0 ...
## $ dementia : int 0 0 0 0 0 0 0 0 0 0 ...
## $ chronic_pulmonary_disease : int 1 0 0 0 0 0 0 0 0 0 ...
## $ rheumatic_disease : int 0 0 0 0 0 0 0 0 0 0 ...
## $ peptic_ulcer_disease : int 0 0 0 0 0 0 0 0 0 0 ...
## $ paraplegia : int 0 0 0 0 0 0 0 1 0 0 ...
## $ renal_disease : int 0 0 0 0 0 0 0 0 0 0 ...
## $ aids : int 0 0 0 0 0 0 0 0 0 0 ...
## $ charlson_comorbidity_index : int 1 0 6 0 0 0 0 2 0 0 ...
## - attr(*, ".internal.selfref")=<pointer: 0x564c726dfee0>
## - attr(*, "sorted")= chr [1:2] "subject_id" "hadm_id"
## - attr(*, "index")= int(0)