Skip to content
This repository has been archived by the owner on Aug 17, 2021. It is now read-only.

Commit

Permalink
Merge pull request #113 from MIT-LCP/fix_v1_concepts
Browse files Browse the repository at this point in the history
Fix creation of concepts
  • Loading branch information
alistairewj authored Mar 17, 2021
2 parents 35ae66d + d6c4585 commit 41b2ede
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 35 deletions.
15 changes: 13 additions & 2 deletions concepts/make_concepts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
export TARGET_DATASET=mimic_derived

# generate tables in subfolders
for d in demographics measurement medication treatment firstday score;
# order is important for a few tables here:
# * firstday should go last
# * sepsis depends on score (sofa.sql in particular)
# * organfailure depends on measurement
for d in comorbidity demographics measurement medication organfailure treatment score sepsis firstday;
do
for fn in `ls $d`;
do
Expand All @@ -15,6 +19,9 @@ do
# skip first_day_sofa as it depends on other firstday queries
if [[ "${tbl}" == "first_day_sofa" ]]; then
continue
# kdigo_stages needs to be run after creat/uo
elif [[ "${tbl}" == "kdigo_stages" ]]; then
continue
fi
echo "Generating ${TARGET_DATASET}.${tbl}"
bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.${tbl} < ${d}/${fn}
Expand All @@ -24,4 +31,8 @@ done

# generate first_day_sofa table last
echo "Generating ${TARGET_DATASET}.first_day_sofa"
bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.first_day_sofa < firstday/first_day_sofa.sql
bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.first_day_sofa < firstday/first_day_sofa.sql

# generate first_day_sofa table last
echo "Generating ${TARGET_DATASET}.kdigo_stages"
bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.kdigo_stages < organfailure/kdigo_stages.sql
142 changes: 119 additions & 23 deletions concepts/measurement/blood_differential.sql
Original file line number Diff line number Diff line change
@@ -1,40 +1,136 @@
-- For reference, some common unit conversions:
-- 10^9/L == K/uL == 10^3/uL
WITH blood_diff AS
(
SELECT
MAX(subject_id) AS subject_id
, MAX(hadm_id) AS hadm_id
, MAX(charttime) AS charttime
, le.specimen_id
-- convert from itemid into a meaningful column
, MAX(CASE WHEN itemid = 52056 THEN valuenum ELSE NULL END) AS abs_basophils
, MAX(CASE WHEN itemid = 52060 THEN valuenum ELSE NULL END) AS abs_eosinophils
, MAX(CASE
WHEN itemid = 51133 THEN valuenum
-- convert #/uL to K/uL
WHEN itemid = 52733 THEN valuenum / 1000.0
ELSE NULL END) AS abs_lymphocytes
, MAX(CASE WHEN itemid = 52061 THEN valuenum ELSE NULL END) AS abs_monocytes
, MAX(CASE WHEN itemid = 52062 THEN valuenum ELSE NULL END) AS abs_neutrophils
, MAX(CASE WHEN itemid = 51143 THEN valuenum ELSE NULL END) AS atyps
-- create one set of columns for percentages, and one set of columns for counts
-- we harmonize all count units into K/uL == 10^9/L
-- counts have an "_abs" suffix, percentages do not

-- absolute counts
, MAX(CASE WHEN itemid in (51300, 51301, 51755) THEN valuenum ELSE NULL END) AS wbc
, MAX(CASE WHEN itemid = 52069 THEN valuenum ELSE NULL END) AS basophils_abs
-- 52073 in K/uL, 51199 in #/uL
, MAX(CASE WHEN itemid = 52073 THEN valuenum WHEN itemid = 51199 THEN valuenum / 1000.0 ELSE NULL END) AS eosinophils_abs
-- 51133 in K/uL, 52769 in #/uL
, MAX(CASE WHEN itemid = 51133 THEN valuenum WHEN itemid = 52769 THEN valuenum / 1000.0 ELSE NULL END) AS lymphocytes_abs
-- 52074 in K/uL, 51253 in #/uL
, MAX(CASE WHEN itemid = 52074 THEN valuenum WHEN itemid = 51253 THEN valuenum / 1000.0 ELSE NULL END) AS monocytes_abs
, MAX(CASE WHEN itemid = 52075 THEN valuenum ELSE NULL END) AS neutrophils_abs
-- convert from #/uL to K/uL
, MAX(CASE WHEN itemid = 51218 THEN valuenum / 1000.0 ELSE NULL END) AS granulocytes_abs

-- percentages, equal to cell count / white blood cell count
, MAX(CASE WHEN itemid = 51146 THEN valuenum ELSE NULL END) AS basophils
, MAX(CASE WHEN itemid = 51200 THEN valuenum ELSE NULL END) AS eosinophils
, MAX(CASE WHEN itemid in (51244, 51245) THEN valuenum ELSE NULL END) AS lymphocytes
, MAX(CASE WHEN itemid = 51254 THEN valuenum ELSE NULL END) AS monocytes
, MAX(CASE WHEN itemid = 51256 THEN valuenum ELSE NULL END) AS neutrophils

-- other cell count percentages
, MAX(CASE WHEN itemid = 51143 THEN valuenum ELSE NULL END) AS atypical_lymphocytes
, MAX(CASE WHEN itemid = 51144 THEN valuenum ELSE NULL END) AS bands
, MAX(CASE WHEN itemid = 52122 THEN valuenum ELSE NULL END) AS imm_granulocytes
, MAX(CASE WHEN itemid = 51251 THEN valuenum ELSE NULL END) AS metas
, MAX(CASE WHEN itemid = 52135 THEN valuenum ELSE NULL END) AS immature_granulocytes
, MAX(CASE WHEN itemid = 51251 THEN valuenum ELSE NULL END) AS metamyelocytes
, MAX(CASE WHEN itemid = 51257 THEN valuenum ELSE NULL END) AS nrbc

-- utility flags which determine whether imputation is possible
, CASE
-- WBC is available
WHEN MAX(CASE WHEN itemid in (51300, 51301, 51755) THEN valuenum ELSE NULL END) > 0
-- and we have at least one percentage from the diff
-- sometimes the entire diff is 0%, which looks like bad data
AND SUM(CASE WHEN itemid IN (51146, 51200, 51244, 51245, 51254, 51256) THEN valuenum ELSE NULL END) > 0
THEN 1 ELSE 0 END AS impute_abs

FROM mimic_hosp.labevents le
WHERE le.itemid IN
(
52056, -- Absolute basophil count
52060, -- Absolute Eosinophil count
51133, -- Absolute Lymphocyte Count, K/uL
52733, -- Absolute Lymphocyte Count, #/uL
52061, -- Absolute Monocyte Count
52062, -- Absolute Neutrophil Count
51146, -- basophils
52069, -- Absolute basophil count
51199, -- Eosinophil Count
51200, -- Eosinophils
52073, -- Absolute Eosinophil count
51244, -- Lymphocytes
51245, -- Lymphocytes, Percent
51133, -- Absolute Lymphocyte Count
52769, -- Absolute Lymphocyte Count
51253, -- Monocyte Count
51254, -- Monocytes
52074, -- Absolute Monocyte Count
51256, -- Neutrophils
52075, -- Absolute Neutrophil Count
51143, -- Atypical lymphocytes
51144, -- Bands (%)
52122, -- Immature granulocytes (%)
51218, -- Granulocyte Count
52135, -- Immature granulocytes (%)
51251, -- Metamyelocytes
51257 -- Nucleated RBC
51257, -- Nucleated Red Cells

-- wbc totals measured in K/uL
51300, 51301, 51755
-- 52220 (wbcp) is percentage

-- below are point of care tests which are extremely infrequent and usually low quality
-- 51697, -- Neutrophils (mmol/L)

-- below itemid do not have data as of MIMIC-IV v1.0
-- 51536, -- Absolute Lymphocyte Count
-- 51537, -- Absolute Neutrophil
-- 51690, -- Lymphocytes
-- 52151, -- NRBC
)
AND valuenum IS NOT NULL
-- lab values cannot be 0 and cannot be negative
AND valuenum > 0
-- differential values cannot be negative
AND valuenum >= 0
GROUP BY le.specimen_id
)
SELECT
subject_id, hadm_id, charttime, specimen_id

, wbc
-- impute absolute count if percentage & WBC is available
, ROUND(CASE
WHEN basophils_abs IS NULL AND basophils IS NOT NULL AND impute_abs = 1
THEN basophils * wbc
ELSE basophils_abs
END, 4) AS basophils_abs
, ROUND(CASE
WHEN eosinophils_abs IS NULL AND eosinophils IS NOT NULL AND impute_abs = 1
THEN eosinophils * wbc
ELSE eosinophils_abs
END, 4) AS eosinophils_abs
, ROUND(CASE
WHEN lymphocytes_abs IS NULL AND lymphocytes IS NOT NULL AND impute_abs = 1
THEN lymphocytes * wbc
ELSE lymphocytes_abs
END, 4) AS lymphocytes_abs
, ROUND(CASE
WHEN monocytes_abs IS NULL AND monocytes IS NOT NULL AND impute_abs = 1
THEN monocytes * wbc
ELSE monocytes_abs
END, 4) AS monocytes_abs
, ROUND(CASE
WHEN neutrophils_abs IS NULL AND neutrophils IS NOT NULL AND impute_abs = 1
THEN neutrophils * wbc
ELSE neutrophils_abs
END, 4) AS neutrophils_abs

, basophils
, eosinophils
, lymphocytes
, monocytes
, neutrophils

-- impute bands/blasts?
, atypical_lymphocytes
, bands
, immature_granulocytes
, metamyelocytes
, nrbc
FROM blood_diff
;
2 changes: 1 addition & 1 deletion concepts/organfailure/kdigo_stages.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ with cr_stg AS
, CASE
WHEN uo.uo_rt_6hr IS NULL THEN NULL
-- require patient to be in ICU for at least 6 hours to stage UO
WHEN uo.charttime <= ie.intime + interval '6' hour THEN 0
WHEN uo.charttime <= DATETIME_ADD(ie.intime, INTERVAL '6' HOUR) THEN 0
-- require the UO rate to be calculated over half the period
-- i.e. for uo rate over 24 hours, require documentation at least 12 hr apart
WHEN uo.uo_tm_24hr >= 11 AND uo.uo_rt_24hr < 0.3 THEN 3
Expand Down
15 changes: 6 additions & 9 deletions website/content/en/docs/tutorials/video.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,15 @@ linktitle: "Video Tutorial"
date: 2020-08-26
weight: 2
description: >
Video tutorial from ACM-CHIL 2020
Video tutorial overviewing MIMIC-IV data
---

### Analyzing critical care data, from speculation to publication, starring MIMIC-IV (ACM-CHIL 2020)
### Analyzing critical care data, from speculation to publication, starring MIMIC-IV

Part-1: Overview
This tutorial was presented at ACM-CHIL 2020. There were two parts to the tutorial:

[video](https://www.chilconference.org/tutorial_d.html)
1. An overview of all tables in MIMIC-IV (1 hour): [View on SlidesLive](https://slideslive.com/embed/presentation/38931965).
2. Reproducing a study in MIMIC-IV (30 minutes): [View on SlidesLive](https://slideslive.com/embed/presentation/38932058).

Part-2: Doing a study with MIMIC-IV

[video](https://www.chilconference.org/tutorial_f.html)

[code](https://github.com/alistairewj/mimic-iv-aline-study)
The code for the latter tutorial is available on the [mimic-iv-aline-study](https://github.com/alistairewj/mimic-iv-aline-study) GitHub page.

0 comments on commit 41b2ede

Please sign in to comment.