Fix RAPID temperature units (#201)

* Upload one-off script * Update RAPID parser & output file
globaldothealth · Nov 15, 2023 · 44e9262 · abhidg · Nov 16, 2023 · pipliggins
1 parent 3061496
commit 44e9262
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 7 deletions.
diff --git a/isaric/parsers/isaric-rapid.toml b/isaric/parsers/isaric-rapid.toml
@@ -877,7 +877,7 @@
       { ccm_a_fio2_lborres = { ">" = 0.21 } },
       { ccm_a_fio2_lborres = { "<=" = 1 } },
     ], apply = { function = "isNotNull" } },
-    { field = "ccm_a_fio2b_lborres", if = { daily_fio2b_lborres = { ">" = 21 } }, apply = { function = "isNotNull" } },
+    { field = "ccm_a_fio2b_lborres", if = { ccm_a_fio2b_lborres = { ">" = 21 } }, apply = { function = "isNotNull" } },
     { field = "ccm_a_fio2c_lborres", apply = { function = "isNotNull" } },
   ]
 
@@ -1812,13 +1812,13 @@
   name = "temperature_celsius"
   phase = "admission"
   date = { ref = "admissionDateHierarchy" }
-  value = { field = "temp_vsorres" }        # there is no source unit field, but a mix of celsius and farenheit in the data.
+  value = { field = "temp_vsorres_new" }
 
 [[observation]]
   name = "temperature_celsius"
   phase = "study"
   date = { field = "daily_date" }
-  value = { field = "daily_temp_vsorres" } # there is no source unit field
+  value = { field = "daily_temp_vsorres_new" }
   context = ['Most abnormal reading']
 
 [[observation]]

diff --git a/isaric/parsers/isaric-rapid/fix-temperature-units.py b/isaric/parsers/isaric-rapid/fix-temperature-units.py
@@ -0,0 +1,20 @@
+# Corrects temperatures recorded in farenheit to celsius based on max human internal temperature.
+
+import pandas as pd
+
+def convert_temperature_units(value):
+    if value <= 50:
+        return value
+    elif value > 50:
+        return (value - 32) * 5/9
+
+# import data
+df = pd.read_csv("ISARIC RAPID/ISARICCOVID19RAPIDFo_DATA_2022-07-06_0932.csv")
+
+# create new columns with the converted data
+df['temp_vsorres_new'] = df.apply(lambda x: convert_temperature_units(x.temp_vsorres), axis=1)
+df['daily_temp_vsorres_new'] = df.apply(lambda x: convert_temperature_units(x.daily_temp_vsorres), axis=1)
+
+# save the new file
+df2 = df.convert_dtypes()
+df2.to_csv("ISARIC RAPID/ISARICCOVID19RAPIDFo_DATA_2022-07-06_0932_temperaturefix.csv", index=False)
diff --git a/output/ISARIC RAPID/adtl-output.md b/output/ISARIC RAPID/adtl-output.md
@@ -1,10 +1,10 @@
->adtl isaric-rapid.toml ISARICCOVID19RAPIDFo_DATA_2022-07-06_0932.csv --include-defs isaric-rapid.json
+>adtl isaric-rapid.toml ISARICCOVID19RAPIDFo_DATA_2022-07-06_0932_temperaturefix.csv --include-defs isaric-rapid.json
 
 |table          |valid  |total  |percentage_valid|
 |---------------|-------|-------|----------------|
 |subject        |5546   |8061   |68.800397% |
 |visit          |4911   |8061   |60.922962% |
-|observation    |419391 |436586 |96.061486% |
+|observation    |416311 |430313 |96.746089% |
 
 ## subject
 
@@ -19,5 +19,5 @@
 
 ## observation
 
-* 14373: data must contain ['phase', 'date', 'name'] properties
-* 2822: data must be valid exactly by one definition (0 matches found)
+* 13853: data must contain ['phase', 'date', 'name'] properties
+* 149: data must be valid exactly by one definition (0 matches found)