24h readings

francesco-vaselli · Jan 28, 2024 · 216e7c7 · 216e7c7
1 parent 9429717
commit 216e7c7
Show file tree

Hide file tree

Showing 5 changed files with 492 additions and 144 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,7 +2,7 @@
 data
 logs
 src/models/baseline_figures
-.npy
+*.npy
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/configs/data_config.yaml b/configs/data_config.yaml
@@ -1,143 +1,149 @@
 # config for the full dataset builder 
-data_dir: "/home/fvaselli/Documents/PHD/TSA/TSA/data/data_oh"
+data_dir: "/home/fvaselli/Documents/TSA/data/data_oh"
+data_type: "OH_24h"
 # patients ids
 ids: ['72492570',
-      '86323903',
-      '05274556',
-      '95851255',
-      '69739062',
-      '73398408',
-      '80625186',
-      '27526291',
-      '86286077',
-      '27819368',
-      '94200862',
-      '84109428',
-      '15634563',
-      '37948668',
-      '76817975',
-      '67208817',
-      '49551394',
-      '37998755',
-      '57176789',
-      '54280576',
-      '19626656',
-      '60844515',
-      '62401782',
-      '33470634',
-      '70811987',
-      '71397255',
-      '20396154',
-      '02199852',
-      '56958831',
-      '14092221',
-      '70454270',
-      '68267781',
-      '85199788',
-      '68026248',
-      '13783771',
-      '99712241',
-      '97417885',
-      '20649783',
-      '86025410',
-      '48509634',
-      '96805916',
-      '93839818',
-      '67359234',
-      '48540630',
-      '79526193',
-      '89727223',
-      '40997757',
-      '66836068',
-      '41131654',
-      '32997134',
-      '71618088',
-      '91161972',
-      '37875431',
-      '00221634',
-      '28176124',
-      '66773091',
-      '17161370',
-      '63725802',
-      '74077367',
-      '28608066',
-      '65143138',
-      '69587086',
-      '13029224',
-      '98340749',
-      '51968000',
-      '35719805',
-      '33962890',
-      '18991425',
-      '98974339',
-      '95614431',
-      '90398368',
-      '25692073',
-      '38110191',
-      '85653830',
-      '69965708',
-      '24448124',
-      '33831564',
-      '67167655',
-      '43589707',
-      '80501215',
-      '93606058',
-      '39986716',
-      '77104076',
-      '40237051',
-      '21946407',
-      '35533061',
-      '41663654',
-      '45120081',
-      '47323535',
-      '78420229',
-      '73521474',
-      '23428091',
-      '84589080',
-      '77411181',
-      '28756888',
-      '81099003',
-      '46253612',
-      '22961398',
-      '99848889',
-      '20216809',
-      '89710417',
-      '66019205',
-      '99296581',
-      '49796612',
-      '56568290',
-      '00897741',
-      '13484299',
-      '04762925',
-      '64024750',
-      '42052178',
-      '26856617',
-      '88252802',
-      '81680176',
-      '07886752',
-      '89032650',
-      '97872409',
-      '28768536',
-      '01352464',
-      '62345070',
-      '40634871',
-      '02033176',
-      '12689381',
-      '61179686',
-      '16975609',
-      '00309157',
-      '15558575',
-      '84984656',
-      '71236754',
-      '50311906',
-      '80373992',
-      '32407882',
-      '66937570',
-      '87770486',
-      '88004055',
-      '96254963',
-      '47750728',]
-test_ids: ['99908129']
+      # '86323903',
+      # '05274556',
+      # '95851255',
+      # '69739062',
+      # '73398408',
+      # '80625186',
+      # '27526291',
+      # '86286077',
+      # '27819368',
+      # '94200862',
+      # '84109428',
+      # '15634563',
+      # '37948668',
+      # '76817975',
+      # '67208817',
+      # '49551394',
+      # '37998755',
+      # '57176789',
+      # '54280576',
+      # '19626656',
+      # '60844515',
+      # '62401782',
+      # '33470634',
+      # '70811987',
+      # '71397255',
+      # '20396154',
+      # '02199852',
+      # '56958831',
+      # '14092221',
+      # '70454270',
+      # '68267781',
+      # '85199788',
+      # '68026248',
+      # '13783771',
+      # '99712241',
+      # '97417885',
+      # '20649783',
+      # '86025410',
+      # '48509634',
+      # '96805916',
+      # '93839818',
+      # '67359234',
+      # '48540630',
+      # '79526193',
+      # '89727223',
+      # '40997757',
+      # '66836068',
+      # '41131654',
+      # '32997134',
+      # '71618088',
+      # '91161972',
+      # '37875431',
+      # '00221634',
+      # '28176124',
+      # '66773091',
+      # '17161370',
+      # '63725802',
+      # '74077367',
+      # '28608066',
+      # '65143138',
+      # '69587086',
+      # '13029224',
+      # '98340749',
+      # '51968000',
+      # '35719805',
+      # '33962890',
+      # '18991425',
+      # '98974339',
+      # '95614431',
+      # '90398368',
+      # '25692073',
+      # '38110191',
+      # '85653830',
+      # '69965708',
+      # '24448124',
+      # '33831564',
+      # '67167655',
+      # '43589707',
+      # '80501215',
+      # '93606058',
+      # '39986716',
+      # '77104076',
+      # '40237051',
+      # '21946407',
+      # '35533061',
+      # '41663654',
+      # '45120081',
+      # '47323535',
+      # '78420229',
+      # '73521474',
+      # '23428091',
+      # '84589080',
+      # '77411181',
+      # '28756888',
+      # '81099003',
+      # '46253612',
+      # '22961398',
+      # '99848889',
+      # '20216809',
+      # '89710417',
+      # '66019205',
+      # '99296581',
+      # '49796612',
+      # '56568290',
+      # '00897741',
+      # '13484299',
+      # '04762925',
+      # '64024750',
+      # '42052178',
+      # '26856617',
+      # '88252802',
+      # '81680176',
+      # '07886752',
+      # '89032650',
+      # '97872409',
+      # '28768536',
+      # '01352464',
+      # '62345070',
+      # '40634871',
+      # '02033176',
+      # '12689381',
+      # '61179686',
+      # '16975609',
+      # '00309157',
+      # '15558575',
+      # '84984656',
+      # '71236754',
+      # '50311906',
+      # '80373992',
+      # '32407882',
+      # '66937570',
+      # '87770486',
+      # '88004055',
+      # '96254963',
+      # '47750728',
+      # '99908129',
+      # '63047517',
+      # '80796147',
+      '64406000',]
+test_ids: []
+      # '99908129']
 
       #    '99908129',]
       # '63047517',
@@ -156,8 +162,8 @@ smooth: False
 # target_weight
 target_weight: 1
 # standardize
-standardize: False
-standardize_by_ref: True
+standardize: True
+standardize_by_ref: False
 standardize_params:
   mean: 127.836
   std: 60.410

diff --git a/src/data_processing/build_dataset.py b/src/data_processing/build_dataset.py
@@ -10,6 +10,7 @@
 
 def build_dataset(
     data_dir,
+    data_type,
     ids,
     test_ids,
     sampling_horizon,
@@ -53,7 +54,7 @@ def build_dataset(
     print(files_ids)
     for f, pid in zip(files, files_ids):
         reader = DataReader(
-                "OH", f, 5
+                data_type, f, 5
             )
         # a patient may have multiple json files
         # so we check if the patient is already in the dict
@@ -64,8 +65,18 @@ def build_dataset(
 
         print(f"Patient {pid} has {len(train_data[pid])} entries.")
 
+    # if data_type == "OH_24h", save the dict and return
+    if data_type == "OH_24h":
+        print("Saving dataset_24h.npy")
+        # print the shape of the dict
+        for k in train_data:
+            print(k, np.array(train_data[k]).shape)
+        train_data = np.array(train_data)
+        np.save("dataset_24h.npy", train_data)
+        print("Saved dataset_24h.npy")
+        return
     # a dumb dataset instance with first file of data_dir
-    train_dataset = CGMSDataSeg("OH", files[0], 5)
+    train_dataset = CGMSDataSeg(data_type, files[0], 5)
     print(len(train_dataset.data))  # Check length before
     train_pids = set(ids) - set(test_ids)
     local_train_data = []
@@ -112,6 +123,7 @@ def main(data_config):
         config = yaml.load(f, Loader=yaml.FullLoader)
 
     data_dir = config["data_dir"]
+    data_type = config["data_type"]
     ids = config["ids"]
     test_ids = config["test_ids"]
     sampling_horizon = config["sampling_horizon"]
@@ -129,6 +141,7 @@ def main(data_config):
 
     data, targets = build_dataset(
         data_dir,
+        data_type,
         ids,
         test_ids,
         sampling_horizon,
@@ -147,10 +160,10 @@ def main(data_config):
 
     # save data and targets as numpy arrays, in same file
     dataset = np.concatenate((data, targets), axis=1)
-    np.save("dataset_99908129_smooth_up.npy", dataset)
+    np.save("dataset_full_no_smooth.npy", dataset)
     # dataset = tf.data.Dataset.from_tensor_slices((data, targets))
     # save
     # dataset.save("data/dataset")
 
 if __name__ == "__main__":
-    main('/home/fvaselli/Documents/PHD/TSA/TSA/configs/data_config.yaml')
+    main('/home/fvaselli/Documents/TSA/configs/data_config.yaml')
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,7 +2,7 @@ @@
     data
     logs
     src/models/baseline_figures
-    .npy
+    *.npy
     # Byte-compiled / optimized / DLL files
     __pycache__/
@@ Expand Down @@