Skip to content

Commit

Permalink
24h readings
Browse files Browse the repository at this point in the history
  • Loading branch information
francesco-vaselli committed Jan 28, 2024
1 parent 9429717 commit 216e7c7
Show file tree
Hide file tree
Showing 5 changed files with 492 additions and 144 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
data
logs
src/models/baseline_figures
.npy
*.npy

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
284 changes: 145 additions & 139 deletions configs/data_config.yaml
Original file line number Diff line number Diff line change
@@ -1,143 +1,149 @@
# config for the full dataset builder
data_dir: "/home/fvaselli/Documents/PHD/TSA/TSA/data/data_oh"
data_dir: "/home/fvaselli/Documents/TSA/data/data_oh"
data_type: "OH_24h"
# patients ids
ids: ['72492570',
'86323903',
'05274556',
'95851255',
'69739062',
'73398408',
'80625186',
'27526291',
'86286077',
'27819368',
'94200862',
'84109428',
'15634563',
'37948668',
'76817975',
'67208817',
'49551394',
'37998755',
'57176789',
'54280576',
'19626656',
'60844515',
'62401782',
'33470634',
'70811987',
'71397255',
'20396154',
'02199852',
'56958831',
'14092221',
'70454270',
'68267781',
'85199788',
'68026248',
'13783771',
'99712241',
'97417885',
'20649783',
'86025410',
'48509634',
'96805916',
'93839818',
'67359234',
'48540630',
'79526193',
'89727223',
'40997757',
'66836068',
'41131654',
'32997134',
'71618088',
'91161972',
'37875431',
'00221634',
'28176124',
'66773091',
'17161370',
'63725802',
'74077367',
'28608066',
'65143138',
'69587086',
'13029224',
'98340749',
'51968000',
'35719805',
'33962890',
'18991425',
'98974339',
'95614431',
'90398368',
'25692073',
'38110191',
'85653830',
'69965708',
'24448124',
'33831564',
'67167655',
'43589707',
'80501215',
'93606058',
'39986716',
'77104076',
'40237051',
'21946407',
'35533061',
'41663654',
'45120081',
'47323535',
'78420229',
'73521474',
'23428091',
'84589080',
'77411181',
'28756888',
'81099003',
'46253612',
'22961398',
'99848889',
'20216809',
'89710417',
'66019205',
'99296581',
'49796612',
'56568290',
'00897741',
'13484299',
'04762925',
'64024750',
'42052178',
'26856617',
'88252802',
'81680176',
'07886752',
'89032650',
'97872409',
'28768536',
'01352464',
'62345070',
'40634871',
'02033176',
'12689381',
'61179686',
'16975609',
'00309157',
'15558575',
'84984656',
'71236754',
'50311906',
'80373992',
'32407882',
'66937570',
'87770486',
'88004055',
'96254963',
'47750728',]
test_ids: ['99908129']
# '86323903',
# '05274556',
# '95851255',
# '69739062',
# '73398408',
# '80625186',
# '27526291',
# '86286077',
# '27819368',
# '94200862',
# '84109428',
# '15634563',
# '37948668',
# '76817975',
# '67208817',
# '49551394',
# '37998755',
# '57176789',
# '54280576',
# '19626656',
# '60844515',
# '62401782',
# '33470634',
# '70811987',
# '71397255',
# '20396154',
# '02199852',
# '56958831',
# '14092221',
# '70454270',
# '68267781',
# '85199788',
# '68026248',
# '13783771',
# '99712241',
# '97417885',
# '20649783',
# '86025410',
# '48509634',
# '96805916',
# '93839818',
# '67359234',
# '48540630',
# '79526193',
# '89727223',
# '40997757',
# '66836068',
# '41131654',
# '32997134',
# '71618088',
# '91161972',
# '37875431',
# '00221634',
# '28176124',
# '66773091',
# '17161370',
# '63725802',
# '74077367',
# '28608066',
# '65143138',
# '69587086',
# '13029224',
# '98340749',
# '51968000',
# '35719805',
# '33962890',
# '18991425',
# '98974339',
# '95614431',
# '90398368',
# '25692073',
# '38110191',
# '85653830',
# '69965708',
# '24448124',
# '33831564',
# '67167655',
# '43589707',
# '80501215',
# '93606058',
# '39986716',
# '77104076',
# '40237051',
# '21946407',
# '35533061',
# '41663654',
# '45120081',
# '47323535',
# '78420229',
# '73521474',
# '23428091',
# '84589080',
# '77411181',
# '28756888',
# '81099003',
# '46253612',
# '22961398',
# '99848889',
# '20216809',
# '89710417',
# '66019205',
# '99296581',
# '49796612',
# '56568290',
# '00897741',
# '13484299',
# '04762925',
# '64024750',
# '42052178',
# '26856617',
# '88252802',
# '81680176',
# '07886752',
# '89032650',
# '97872409',
# '28768536',
# '01352464',
# '62345070',
# '40634871',
# '02033176',
# '12689381',
# '61179686',
# '16975609',
# '00309157',
# '15558575',
# '84984656',
# '71236754',
# '50311906',
# '80373992',
# '32407882',
# '66937570',
# '87770486',
# '88004055',
# '96254963',
# '47750728',
# '99908129',
# '63047517',
# '80796147',
'64406000',]
test_ids: []
# '99908129']

# '99908129',]
# '63047517',
Expand All @@ -156,8 +162,8 @@ smooth: False
# target_weight
target_weight: 1
# standardize
standardize: False
standardize_by_ref: True
standardize: True
standardize_by_ref: False
standardize_params:
mean: 127.836
std: 60.410
Expand Down
21 changes: 17 additions & 4 deletions src/data_processing/build_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

def build_dataset(
data_dir,
data_type,
ids,
test_ids,
sampling_horizon,
Expand Down Expand Up @@ -53,7 +54,7 @@ def build_dataset(
print(files_ids)
for f, pid in zip(files, files_ids):
reader = DataReader(
"OH", f, 5
data_type, f, 5
)
# a patient may have multiple json files
# so we check if the patient is already in the dict
Expand All @@ -64,8 +65,18 @@ def build_dataset(

print(f"Patient {pid} has {len(train_data[pid])} entries.")

# if data_type == "OH_24h", save the dict and return
if data_type == "OH_24h":
print("Saving dataset_24h.npy")
# print the shape of the dict
for k in train_data:
print(k, np.array(train_data[k]).shape)
train_data = np.array(train_data)
np.save("dataset_24h.npy", train_data)
print("Saved dataset_24h.npy")
return
# a dumb dataset instance with first file of data_dir
train_dataset = CGMSDataSeg("OH", files[0], 5)
train_dataset = CGMSDataSeg(data_type, files[0], 5)
print(len(train_dataset.data)) # Check length before
train_pids = set(ids) - set(test_ids)
local_train_data = []
Expand Down Expand Up @@ -112,6 +123,7 @@ def main(data_config):
config = yaml.load(f, Loader=yaml.FullLoader)

data_dir = config["data_dir"]
data_type = config["data_type"]
ids = config["ids"]
test_ids = config["test_ids"]
sampling_horizon = config["sampling_horizon"]
Expand All @@ -129,6 +141,7 @@ def main(data_config):

data, targets = build_dataset(
data_dir,
data_type,
ids,
test_ids,
sampling_horizon,
Expand All @@ -147,10 +160,10 @@ def main(data_config):

# save data and targets as numpy arrays, in same file
dataset = np.concatenate((data, targets), axis=1)
np.save("dataset_99908129_smooth_up.npy", dataset)
np.save("dataset_full_no_smooth.npy", dataset)
# dataset = tf.data.Dataset.from_tensor_slices((data, targets))
# save
# dataset.save("data/dataset")

if __name__ == "__main__":
main('/home/fvaselli/Documents/PHD/TSA/TSA/configs/data_config.yaml')
main('/home/fvaselli/Documents/TSA/configs/data_config.yaml')
Loading

0 comments on commit 216e7c7

Please sign in to comment.