Nm database (#350)

* first version of database integration * debug db commit * debug paths, adjust timing for lsl * check permission of github workflow * fixing permission * fixing gh workflow permission * set permissions for file and dir * checking for paths before setting permissions * fixing permissions * another try fixing gh workflow permissions * adding time_idx to db path to resolve conflicts * refactor the database * possibility to save feature_df as csv * auto delete old example dbs * debug db handling * conditional create table in run method, delete permission adjustments for db * debug times * changed paths for dbs and db creation * clean up nm_database * resolve nm_steam file name * added typehints, added save_interval param to run * add last commit to db * Toni review * Fix db bug * - Replace spaces with underscores for freq band - Replace hyphens with underscores for re-referenced channel names * Change "folder_name" parameter for "prefix" downstream of nm.Stream * Generate unique filenames for new database files * Format and fix Ruff warnings * change default params * add simple test for nm_db write * add catch to create test folder * simplify nm_database create out_dir --------- Co-authored-by: Toni M. Brotons <[email protected]> Co-authored-by: timonmerk <[email protected]>
neuromodulation · Aug 1, 2024 · 88d15aa · 88d15aa
1 parent 01bcf70
commit 88d15aa
Show file tree

Hide file tree

Showing 37 changed files with 558 additions and 243 deletions.
diff --git a/.gitignore b/.gitignore
@@ -48,6 +48,7 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+/test_data
 
 # Translations
 *.mo

diff --git a/examples/plot_0_first_demo.py b/examples/plot_0_first_demo.py
@@ -146,7 +146,7 @@ def generate_random_walk(NUM_CHANNELS, TIME_DATA_SAMPLES):
     line_noise=50,
 )
 
-features = stream.run(data)
+features = stream.run(data, save_csv=True)
 
 # %%
 # Feature Analysis

diff --git a/examples/plot_1_example_BIDS.py b/examples/plot_1_example_BIDS.py
@@ -49,9 +49,7 @@
     line_noise,
     coord_list,
     coord_names,
-) = nm_IO.read_BIDS_data(
-    PATH_RUN=PATH_RUN
-)
+) = nm_IO.read_BIDS_data(PATH_RUN=PATH_RUN)
 
 nm_channels = nm_define_nmchannels.set_channels(
     ch_names=raw.ch_names,
@@ -94,9 +92,9 @@
 settings.features.sharpwave_analysis = True
 settings.features.coherence = True
 
-settings.coherence.channels = [("LFP_RIGHT_0", "ECOG_RIGHT_0")] 
+settings.coherence.channels = [("LFP_RIGHT_0", "ECOG_RIGHT_0")]
 
-settings.coherence.frequency_bands = ["high beta", "low gamma"]
+settings.coherence.frequency_bands = ["high_beta", "low_gamma"]
 settings.sharpwave_analysis_settings.estimator["mean"] = []
 settings.sharpwave_analysis_settings.sharpwave_features.enable_all()
 for sw_feature in settings.sharpwave_analysis_settings.sharpwave_features.list_all():
@@ -118,6 +116,7 @@
     data=data,
     out_path_root=PATH_OUT,
     folder_name=RUN_NAME,
+    save_csv=True,
 )
 
 # %%
@@ -168,9 +167,9 @@
 # %%
 nm_plots.plot_corr_matrix(
     feature=feature_reader.feature_arr.filter(regex="ECOG_RIGHT_0"),
-    ch_name="ECOG_RIGHT_0-avgref",
+    ch_name="ECOG_RIGHT_0_avgref",
     feature_names=list(
-        feature_reader.feature_arr.filter(regex="ECOG_RIGHT_0-avgref").columns
+        feature_reader.feature_arr.filter(regex="ECOG_RIGHT_0_avgref").columns
     ),
     feature_file=feature_reader.feature_file,
     show_plot=True,

diff --git a/examples/plot_3_example_sharpwave_analysis.py b/examples/plot_3_example_sharpwave_analysis.py
@@ -310,16 +310,16 @@
     verbose=True,
 )
 
-df_features = stream.run(data=data[:, :30000])
+df_features = stream.run(data=data[:, :30000], save_csv=True)
 
 # %%
 # We can then plot two exemplary features, prominence and interval, and see that the movement amplitude can be clustered with those two features alone:
 
 plt.figure(figsize=(5, 3), dpi=300)
 print(df_features.columns)
 plt.scatter(
-    df_features["ECOG_RIGHT_0-avgref_Sharpwave_Max_prominence_range_5_80"],
-    df_features["ECOG_RIGHT_5-avgref_Sharpwave_Mean_interval_range_5_80"],
+    df_features["ECOG_RIGHT_0_avgref_Sharpwave_Max_prominence_range_5_80"],
+    df_features["ECOG_RIGHT_5_avgref_Sharpwave_Mean_interval_range_5_80"],
     c=df_features.MOV_RIGHT,
     alpha=0.8,
     s=30,

diff --git a/examples/plot_4_example_gridPointProjection.py b/examples/plot_4_example_gridPointProjection.py
@@ -87,6 +87,7 @@
     data=data[:, :int(sfreq*5)],
     out_path_root=PATH_OUT,
     folder_name=RUN_NAME,
+    save_csv=True, 
 )
 
 # %%

diff --git a/py_neuromodulation/nm_IO.py b/py_neuromodulation/nm_IO.py
@@ -9,7 +9,6 @@
 from py_neuromodulation import logger, PYNM_DIR
 
 if TYPE_CHECKING:
-    from py_neuromodulation.nm_settings import NMSettings
     from mne_bids import BIDSPath
     from mne import io as mne_io
 
@@ -78,11 +77,12 @@ def read_BIDS_data(
         coord_names,
     )
 
+
 def read_mne_data(
     PATH_RUN: "_PathLike | BIDSPath",
     line_noise: int = 50,
 ):
-    """Read data in the mne.io.read_raw supported format. 
+    """Read data in the mne.io.read_raw supported format.
 
     Parameters
     ----------
@@ -118,9 +118,10 @@ def read_mne_data(
         logger.info(
             f"Line noise is not available in the data, using value of {line_noise} Hz."
         )
-    
+
     return raw_arr.get_data(), sfreq, ch_names, ch_types, bads
 
+
 def get_coord_list(
     raw: "mne_io.BaseRaw",
 ) -> tuple[list, list] | tuple[None, None]:
@@ -228,6 +229,7 @@ def read_plot_modules(
         z_stn,
     )
 
+
 def write_csv(df, path_out):
     """
     Function to save Pandas dataframes to disk as CSV using
@@ -239,40 +241,46 @@ def write_csv(df, path_out):
 
     csv.write_csv(Table.from_pandas(df), path_out)
 
+
 def save_nm_channels(
     nmchannels: pd.DataFrame,
-    path_out: _PathLike,
-    folder_name: str = "",
+    out_dir: _PathLike,
+    prefix: str = "",
 ) -> None:
-    if folder_name:
-        path_out = PurePath(path_out, folder_name, folder_name + "_nm_channels.csv")
+    filename = f"{prefix}_nm_channels.csv" if prefix else "nm_channels.csv"
+    path_out = PurePath(out_dir, filename)
     write_csv(nmchannels, path_out)
     logger.info(f"nm_channels.csv saved to {path_out}")
 
 
 def save_features(
     df_features: pd.DataFrame,
-    path_out: _PathLike,
-    folder_name: str = "",
+    out_dir: _PathLike,
+    prefix: str = "",
 ) -> None:
-    if folder_name:
-        path_out = PurePath(path_out, folder_name, folder_name + "_FEATURES.csv")
-    write_csv(df_features, path_out)
-    logger.info(f"FEATURES.csv saved to {str(path_out)}")
+    filename = f"{prefix}_FEATURES.csv" if prefix else "_FEATURES.csv"
+    out_dir = PurePath(out_dir, filename)
+    write_csv(df_features, out_dir)
+    logger.info(f"FEATURES.csv saved to {str(out_dir)}")
 
 
-def save_sidecar(sidecar: dict, path_out: _PathLike, folder_name: str = "") -> None:
-    save_general_dict(sidecar, path_out, "_SIDECAR.json", folder_name)
+def save_sidecar(
+    sidecar: dict,
+    out_dir: _PathLike,
+    prefix: str = "",
+) -> None:
+    save_general_dict(sidecar, out_dir, prefix, "_SIDECAR.json")
 
 
 def save_general_dict(
     dict_: dict,
-    path_out: _PathLike,
+    out_dir: _PathLike,
+    prefix: str = "",
     str_add: str = "",
-    folder_name: str = "",
 ) -> None:
-    if folder_name:
-        path_out = PurePath(path_out, folder_name, folder_name + str_add)
+    # We should change this to a proper experiment name
+
+    path_out = PurePath(out_dir, f"{prefix}{str_add}")
 
     with open(path_out, "w") as f:
         json.dump(
@@ -387,3 +395,19 @@ def _todict(matobj) -> dict:
         else:
             dict[strg] = elem
     return dict
+
+
+def generate_unique_filename(path: _PathLike):
+    path = Path(path)
+
+    dir = path.parent
+    filename = path.stem
+    extension = path.suffix
+
+    counter = 1
+    while True:
+        new_filename = f"{filename}_{counter}{extension}"
+        new_file_path = dir / new_filename
+        if not new_file_path.exists():
+            return Path(new_file_path)
+        counter += 1
diff --git a/py_neuromodulation/nm_analysis.py b/py_neuromodulation/nm_analysis.py
@@ -392,7 +392,6 @@ def plot_all_features(
 
     @staticmethod
     def get_performace_sub_strip(performance_sub: dict, plt_grid: bool = False):
-
         ecog_strip_performance = []
         ecog_coords_strip = []
         cortex_grid = []
@@ -967,8 +966,8 @@ def set_score(
             nm_IO.save_general_dict(
                 dict_=performance_dict,
                 path_out=PATH_OUT,
+                prefix=folder_name,
                 str_add=str_add,
-                folder_name=folder_name,
             )
         return performance_dict
 

diff --git a/py_neuromodulation/nm_bispectra.py b/py_neuromodulation/nm_bispectra.py
@@ -41,6 +41,10 @@ def test_range(cls, filter_range):
         ), f"second frequency range value needs to be higher than first one, got {filter_range}"
         return filter_range
 
+    @field_validator("frequency_bands")
+    def fbands_spaces_to_underscores(cls, frequency_bands):
+        return [f.replace(" ", "_") for f in frequency_bands]
+
 
 FEATURE_DICT: dict[str, Callable] = {
     "mean": np.nanmean,
@@ -83,7 +87,7 @@ def __init__(
         self.max_freq = max(
             self.settings.f1s.frequency_high_hz, self.settings.f2s.frequency_high_hz
         )
-    
+
         # self.freqs: np.ndarray = np.array([]) # In case we pre-computed this
 
     def calc_feature(self, data: np.ndarray) -> dict:
@@ -115,16 +119,17 @@ def calc_feature(self, data: np.ndarray) -> dict:
             sampling_freq=self.sfreq,
             verbose=False,
         )
-        
+
         waveshape.compute(
             f1s=tuple(self.settings.f1s),  # type: ignore
             f2s=tuple(self.settings.f2s),  # type: ignore
         )
-        
+
         feature_results = {}
         for ch_idx, ch_name in enumerate(self.ch_names):
-
-            bispectrum = waveshape._bicoherence[ch_idx]  # Same as waveshape.results._data, skips a copy
+            bispectrum = waveshape._bicoherence[
+                ch_idx
+            ]  # Same as waveshape.results._data, skips a copy
 
             for component in self.settings.components.get_enabled():
                 spectrum_ch = COMPONENT_DICT[component](bispectrum)
@@ -146,4 +151,4 @@ def calc_feature(self, data: np.ndarray) -> dict:
                                 f"{ch_name}_Bispectrum_{component}_{bispectrum_feature}_whole_fband_range"
                             ] = FEATURE_DICT[bispectrum_feature](spectrum_ch)
 
-        return feature_results
+        return feature_results
diff --git a/py_neuromodulation/nm_bursts.py b/py_neuromodulation/nm_bursts.py
@@ -1,12 +1,13 @@
 import numpy as np
+
 if np.__version__ >= "2.0.0":
     from numpy.lib._function_base_impl import _quantile as np_quantile  # type:ignore
 else:
     from numpy.lib.function_base import _quantile as np_quantile  # type:ignore
 from collections.abc import Sequence
 from itertools import product
 
-from pydantic import Field
+from pydantic import Field, field_validator
 from py_neuromodulation.nm_types import BoolSelector, NMBaseModel
 from py_neuromodulation.nm_features import NMFeature
 
@@ -20,7 +21,6 @@
 
 
 def get_label_pos(burst_labels, valid_labels):
-
     max_label = np.max(burst_labels, axis=2).flatten()
     min_label = np.min(
         burst_labels, axis=2, initial=LARGE_NUM, where=burst_labels != 0
@@ -48,9 +48,13 @@ class BurstFeatures(BoolSelector):
 class BurstSettings(NMBaseModel):
     threshold: float = Field(default=75, ge=0, le=100)
     time_duration_s: float = Field(default=30, ge=0)
-    frequency_bands: list[str] = ["low beta", "high beta", "low gamma"]
+    frequency_bands: list[str] = ["low_beta", "high_beta", "low_gamma"]
     burst_features: BurstFeatures = BurstFeatures()
 
+    @field_validator("frequency_bands")
+    def fbands_spaces_to_underscores(cls, frequency_bands):
+        return [f.replace(" ", "_") for f in frequency_bands]
+
 
 class Burst(NMFeature):
     def __init__(
@@ -182,12 +186,11 @@ def calc_feature(self, data: np.ndarray) -> dict:
         ]
 
         # Find (channel, band) coordinates for each valid label and get an array that maps each valid label to its channel/band
-        # Channel band coordinate is flattened to a 1D array of length (n_channels x n_fbands) 
+        # Channel band coordinate is flattened to a 1D array of length (n_channels x n_fbands)
         label_positions = get_label_pos(burst_labels, valid_labels)
 
         # Now we're ready to calculate features
 
-
         if "duration" in self.used_features or "burst_rate_per_s" in self.used_features:
             # Handle division by zero using np.divide. Where num_bursts is 0, the result is 0
             self.burst_duration_mean = (
@@ -202,7 +205,9 @@ def calc_feature(self, data: np.ndarray) -> dict:
 
         if "duration" in self.used_features:
             # First get burst length for each valid burst
-            burst_lengths = label_sum(bursts, burst_labels, index=valid_labels) / self.sfreq
+            burst_lengths = (
+                label_sum(bursts, burst_labels, index=valid_labels) / self.sfreq
+            )
 
             # Now the max needs to be calculated per channel/band
             # For that, loop over channels/bands, get the corresponding burst lengths, and get the max
@@ -221,7 +226,7 @@ def calc_feature(self, data: np.ndarray) -> dict:
         if "amplitude" in self.used_features:
             # Max amplitude is just the max of the filtered data where there is a burst
             self.burst_amplitude_max = (filtered_data * bursts).max(axis=2)
-            
+
             # The mean is actually a mean of means, so we need the mean for each individual burst
             label_means = label_mean(filtered_data, burst_labels, index=valid_labels)
             # Now, loop over channels/bands, get the corresponding burst means, and calculate the mean of means
@@ -266,9 +271,9 @@ def store_duration(
     def store_amplitude(
         self, feature_results: dict, ch_i: int, ch: str, fb_i: int, fb: str
     ):
-        feature_results[f"{ch}_bursts_{fb}_amplitude_mean"] = (
-            self.burst_amplitude_mean[ch_i, fb_i]
-        )
+        feature_results[f"{ch}_bursts_{fb}_amplitude_mean"] = self.burst_amplitude_mean[
+            ch_i, fb_i
+        ]
         feature_results[f"{ch}_bursts_{fb}_amplitude_max"] = self.burst_amplitude_max[
             ch_i, fb_i
         ]