From 0e12677d75b3a1cafd4a37eaf608a45bada85b99 Mon Sep 17 00:00:00 2001
From: Francesco Vaselli <francesco.vaselli@protonmail.com>
Date: Thu, 23 May 2024 10:07:58 +0200
Subject: [PATCH] ohio data and attention optim

---
 configs/ohio_data_config.yaml             |  8 ++++----
 src/data_processing/build_ohio_dataset.py |  8 ++++----
 src/models/param_scan.py                  |  2 +-
 src/utils.py                              | 16 ++++++++--------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/configs/ohio_data_config.yaml b/configs/ohio_data_config.yaml
index dec6af6..1637f05 100644
--- a/configs/ohio_data_config.yaml
+++ b/configs/ohio_data_config.yaml
@@ -1,5 +1,5 @@
 # config for the full dataset builder 
-data_dir: "/home/fvaselli/Documents/PHD/TSA/TSA/data/data_ohio"
+data_dir: "/home/fvaselli/Documents/PHD/TSA/TSA/data/test"
 # patients ids
 ids: ['540', '552', '544', '567', '584', '596']
 test_ids: []
@@ -12,15 +12,15 @@ scale: 1
 # outtype
 outtype: "History"
 # smooth
-smooth: True
+smooth: False
 # target_weight
 target_weight: 1
 # standardize
 standardize: False
 standardize_by_ref: True
 standardize_params:
-  mean: 127.836 # 144.982
-  std: 60.410 #57.941
+  mean: 144.96
+  std: 58.062 #57.941
 # Computed Mean: 144.98199462890625, Computed Std: 58.11943817138672
 # dataset smooth Computed Mean: 144.98204040527344, Computed Std: 57.940860748291016
 # cutpoint (negative= take all the data)
diff --git a/src/data_processing/build_ohio_dataset.py b/src/data_processing/build_ohio_dataset.py
index ac57b52..03884f0 100644
--- a/src/data_processing/build_ohio_dataset.py
+++ b/src/data_processing/build_ohio_dataset.py
@@ -30,9 +30,9 @@ def build_dataset(
     files = []
     files_ids = []
     for pid in ids:
-        files += [f"/home/fvaselli/Documents/TSA/data/data_ohio/{pid}-ws-testing.xml"]
+        files += [f"/home/fvaselli/Documents/PHD/TSA/TSA/data/test/{pid}-ws-testing.xml"]
         reader = DataReader(
-            "ohio", f"/home/fvaselli/Documents/TSA/data/data_ohio/{pid}-ws-testing.xml", 5
+            "ohio", f"/home/fvaselli/Documents/PHD/TSA/TSA/data/test/{pid}-ws-testing.xml", 5
         )
         train_data[pid] = reader.read()
         
@@ -121,10 +121,10 @@ def main(data_config):
 
     # save data and targets as numpy arrays, in same file
     dataset = np.concatenate((data, targets), axis=1)
-    np.save("/home/fvaselli/Documents/TSA/data/data_ohio/dataset_ohio_smooth_stdbyupsampled.npy", dataset)
+    np.save("/home/fvaselli/Documents/PHD/TSA/TSA/data/test/dataset_ohio_stdby.npy", dataset)
     # dataset = tf.data.Dataset.from_tensor_slices((data, targets))
     # save
     # dataset.save("data/dataset")
 
 if __name__ == "__main__":
-    main('/home/fvaselli/Documents/TSA/configs/ohio_data_config.yaml')
\ No newline at end of file
+    main('/home/fvaselli/Documents/PHD/TSA/TSA/configs/ohio_data_config.yaml')
\ No newline at end of file
diff --git a/src/models/param_scan.py b/src/models/param_scan.py
index 7baf9f4..6e8f940 100644
--- a/src/models/param_scan.py
+++ b/src/models/param_scan.py
@@ -359,7 +359,7 @@ def main():
         config = yaml.load(f, Loader=yaml.FullLoader)
 
     # models = ["cnn", "rnn", "transformer"]
-    targets = ["regression", "classification", "multi_classification"]
+    targets = ["multi_classification"]
 
     parser = argparse.ArgumentParser()
     parser.add_argument(
diff --git a/src/utils.py b/src/utils.py
index aaf9c0d..25e0279 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -197,10 +197,10 @@ def check_classification(
         pred *= 100
         true *= 100
 
-    # NOTE now it is 0 for hypo and 1 for hyper
-    # diffrent from exam project!!
-    pred_label = (pred[:, ind] > threshold).astype(int)  # Assuming feature_dim = 1 
-    true_label = (true[:, ind] > threshold).astype(int)  # Adjust index if different
+    # NOTE now it is 1 for hypo and 0 for hyper
+    # we make it so for the other models
+    pred_label = (pred[:, ind] < threshold).astype(int)  # Assuming feature_dim = 1 
+    true_label = (true[:, ind] < threshold).astype(int)  # Adjust index if different
 
     fpr, tpr, _ = roc_curve(true_label, pred_label)
     roc_auc = auc(fpr, tpr)
@@ -240,7 +240,7 @@ def on_epoch_end(self, epoch, logs=None):
             specificity = tn / (tn + fp)
             precision = tp / (tp + fp)
             npv = tn / (tn + fn)
-            f1 = 2 * (precision * sensitivity) / (precision + sensitivity)
+            f1 = 2 * (precision * sensitivity) / (precision + sensitivity) # in this way the f1 is relative to the hyper class 
             tf.summary.scalar("Accuracy", accuracy, step=epoch)
             tf.summary.scalar("Sensitivity", sensitivity, step=epoch)
             tf.summary.scalar("Specificity", specificity, step=epoch)
@@ -394,10 +394,10 @@ def plot_to_image(self, figure):
 
 def check_classification1(true, pred, threshold=0.5):
     # Assuming true and pred have shape [batch_size, 1]
-    # 0 for hypo and 1 for hyper
-    pred_label = (pred >= threshold).astype(int)
+    # 1 for hypo and 0 for hyper
+    pred_label = (pred < threshold).astype(int)
     # 
-    true_label = (true >= threshold).astype(int)
+    true_label = (true < threshold).astype(int)
     print("true_label shape:", true_label.shape)
     print("pred_label shape:", pred_label.shape)
     print("example pred vs true:", pred_label[0], true_label[0])