fix correlation values

neuromodulation · Nov 4, 2024 · 1c7cb9e · 1c7cb9e
1 parent 62c366b
commit 1c7cb9e
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 21 deletions.
diff --git a/figure_25_correlated_pkg_all_scores.py b/figure_25_correlated_pkg_all_scores.py
@@ -96,29 +96,29 @@
 plt.savefig(os.path.join(PATH_FIGURES, "pkg_bradykinesia_correlation_sum_subj.pdf"))  
 plt.show(block=True)
 
-plt.figure(figsize=(8, 20))
-for idx_plt_col, col_plt in enumerate(["UE", "LE", "postural", "kinetic", "updrs_tremor", "tremor_constancy"]):
+plt.figure(figsize=(6.5, 3.2))
+for idx_plt_col, col_plt in enumerate(["updrs_tremor"]):  # ["UE", "LE", "postural", "kinetic", "updrs_tremor", "tremor_constancy"]
     for idx_pkg, col_pkg in enumerate(["pkg_tremor_mean", "pkg_tremor_max", "pkg_tremor_75"]):
-        plt.subplot(6, 3, 1+idx_pkg + idx_plt_col*3)
+        plt.subplot(1, 3, 1+idx_pkg + idx_plt_col*3)
         idx_not_none = ~df_out[col_pkg].isnull()
         data_plt = df_out[idx_not_none].groupby(["sub"])[[col_plt, col_pkg]].mean().reset_index()
-        sb.regplot(data=data_plt, x=col_pkg, y=col_plt)
+        sb.regplot(data=data_plt, x=col_pkg, y=col_plt, scatter_kws={'s':14*1.7})
         rho, p = stats.spearmanr(data_plt[col_pkg], data_plt[col_plt])
-        _, p = nm_stats.permutationTestSpearmansRho(
-            data_plt[col_pkg], data_plt[col_plt], False, None, 5000
-        )
+        #_, p = nm_stats.permutationTestSpearmansRho(
+        #    data_plt[col_pkg], data_plt[col_plt], False, None, 5000
+        #)
         plt.title(f"rho={rho:.2f}, p={p:.2f}")
 plt.suptitle("Tremor PKG - UPDRS correlations")
 plt.tight_layout()
 plt.savefig(os.path.join(PATH_FIGURES, "pkg_tremor_correlation_mean_sub.pdf"))
 plt.show(block=True)
 
-plt.figure(figsize=(8, 5))
+plt.figure(figsize=(6.5, 3.4))
 for idx_plt_col, col_plt in enumerate(["UPDRS IV", ]):
     for idx_pkg, col_pkg in enumerate(["pkg_dk_mean", "pkg_dk_max", "pkg_dk_75"]):
         plt.subplot(1, 3, 1+idx_pkg + idx_plt_col*3)
         # remove inf values
-        #idx_not_none = ~df_out[col_pkg].isnull()
+        idx_not_none = ~df_out[col_pkg].isnull()
         idx_not_inf = np.isfinite(df_out[col_pkg])
         data_plt = df_out[idx_not_none].groupby(["sub"])[[col_plt, col_pkg]].mean().reset_index()
         sb.regplot(data=data_plt[idx_not_inf], x=col_pkg, y=col_plt)

diff --git a/figure_28_boxplot_regions.py b/figure_28_boxplot_regions.py
@@ -32,5 +32,6 @@
 plt.ylabel("Balanced accuracy")
 plt.title("Region-wise performances")
 plt.tight_layout()
+plt.savefig(os.path.join(PATH_FIGURES, "region_wise_performances.pdf"))
 plt.show(block=True)
 
diff --git a/figure_31_how_much_data_is_needed.py b/figure_31_how_much_data_is_needed.py
@@ -64,10 +64,11 @@
         plt.text(durations[i] / 60, np.array(sub_per).mean(axis=0)[i], f"{np.round(np.array(sub_per).mean(axis=0)[i], 2)}", ha="center", va="bottom")
 
     plt.xscale('log')
-    plt.title(f"LOHO PKG BK CV different training duration")
+    plt.title(f"LOHO PKG {label} CV different training duration")
     plt.tight_layout()
-    plt.savefig(os.path.join(PATH_FIGURE, f"LOHO_different_training_duration_sub_{label}.pdf"))
-    #plt.show(block=True)
+    #plt.savefig(os.path.join(PATH_FIGURE, f"LOHO_different_training_duration_sub_{label}.pdf"))
+    plt.show(block=True)
+    print("")
 
 plt.figure(figsize=(10, 5))
 sns.boxplot(data=df, x="dur", y="per", palette="viridis", showfliers=False, showmeans=True)

diff --git a/figure_33_joint_plot.py b/figure_33_joint_plot.py
@@ -111,7 +111,7 @@ def get_dur_per_relation(label):
 
     df = pd.concat(df_, axis=0)
     # clip the balanced accuracy to 0.5 and 1
-    if label != "bk":
+    if label != "pkg_bk":
         df["per"] = np.clip(df["per"], 0.5, 1)
 
     return df
@@ -184,7 +184,10 @@ def plot_per_train_time_relation(df, label):
             l_features.append(df)
 
         df_features = pd.concat(l_features, axis=0)
-        df_all_features = get_all_ch_performances(False, label_name, "corr_coeff")
+        if label_name == "pkg_bk":
+            df_all_features = get_all_ch_performances(False, label_name, "corr_coeff")
+        else:
+            df_all_features = get_all_ch_performances(True, label_name, "ba")
         df_all_features["feature_mod"] = "all"
         df_features_comb = pd.concat([df_features, df_all_features], axis=0)
 
@@ -194,7 +197,7 @@ def plot_per_train_time_relation(df, label):
         else:
             class_ = "True"
         l_norms = []
-        for norm_window in [5, 10, 20, 30, 60, 120, 180, 300, 480, 720, 960, 1200, 1440]:
+        for norm_window in [0, 5, 10, 20, 30, 60, 120, 180, 300, 480, 720, 960, 1200, 1440]:
             OUT_FILE = f"d_out_patient_across_{label_name}_class_{class_}_{norm_window}.pkl"
             PATH_READ = os.path.join(PATH_PER, OUT_FILE)
 
@@ -206,21 +209,25 @@ def plot_per_train_time_relation(df, label):
 
         df_per_dur_rel = get_dur_per_relation(label_name)
 
+        if label_name == "pkg_bk":
+            y_label = "Correlation coefficient"
+        else:
+            y_label = "Balanced accuracy"
         plt.subplot(3, 4, 4*idx_+1)
-        plot_boxplot(df_norm, "norm_window", "Correlation coefficient")
+        plot_boxplot(df_norm, "norm_window", y_label)
 
         plt.subplot(3, 4, 4*idx_+2)
-        plot_boxplot(df_features_comb, "feature_mod", "Correlation coefficient",
+        plot_boxplot(df_features_comb, "feature_mod", y_label,
                     order_=df_features_comb.groupby("feature_mod")["per"].mean().sort_values(ascending=True).index)
 
         plt.subplot(3, 4, 4*idx_+3)
-        plot_boxplot(df_models, "model", "Correlation coefficient",
+        plot_boxplot(df_models, "model", y_label,
                     order_=df_models.groupby("model")["per"].mean().sort_values(ascending=True).index)
 
         plt.subplot(3, 4, 4*idx_+4)
         plot_per_train_time_relation(df_per_dur_rel, label_name)
 
-    plt.savefig(os.path.join(PATH_FIGURES, "figure_33_joint_plot.pdf"))
+    #plt.savefig(os.path.join(PATH_FIGURES, "figure_33_joint_plot.pdf"))
     plt.show(block=True)
 
     print("df")
diff --git a/run_decoding_ucsf_across_patients_diff_norm_windows.py b/run_decoding_ucsf_across_patients_diff_norm_windows.py
@@ -30,10 +30,10 @@
                 continue
 
             if norm_window == 0:
-                PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std_10s_window_length"
+                PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std"
                 df_all = pd.read_csv(os.path.join(PATH_OUT, "all_merged_preprocessed.csv"), index_col=0)
             else:
-                PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std_10s_window_length"
+                PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized"
                 df_all = pd.read_csv(os.path.join(PATH_OUT, str(norm_window), "all_merged_normed.csv"), index_col=0)
             #df_all = df_all.drop(columns=["Unnamed: 0"])
             subs = df_all["sub"].unique()