Skip to content

Commit

Permalink
Merge pull request #14 from catalystneuro/add_convert_all_sessions
Browse files Browse the repository at this point in the history
Add convert all sessions script
  • Loading branch information
weiglszonja authored Jul 11, 2024
2 parents 7a67ae8 + d811ce2 commit 77c9573
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 11 deletions.
15 changes: 9 additions & 6 deletions src/howe_lab_to_nwb/vu2024/extractors/cxdimagingextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,15 @@ def __init__(

with aicsimageio.readers.bioformats_reader.BioFile(file_path) as reader:
self._video = reader.to_dask()
if frame_indices is not None:
assert len(frame_indices) > 0, "frame_indices must be a non-empty list."
assert (
len(frame_indices) <= self._num_frames
), "frame_indices must be less than or equal to the number of frames."
self._video = self._video[frame_indices, ...]

if frame_indices is not None:
assert len(frame_indices) > 0, "frame_indices must be a non-empty list."
assert (
len(frame_indices) <= self._num_frames
), "frame_indices must be less than or equal to the number of frames."
self._video = self._video[frame_indices, ...]
self._times = self._times[frame_indices]
self._num_frames = len(frame_indices)

super().__init__(file_path=file_path, channel_name=channel_name, plane_name=plane_name)

Expand Down
202 changes: 202 additions & 0 deletions src/howe_lab_to_nwb/vu2024/vu2024_convert_all_sessions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import re
from pathlib import Path

import pandas as pd
from neuroconv.utils import FilePathType, FolderPathType
from nwbinspector import inspect_all
from nwbinspector.inspector_tools import save_report, format_messages
from tqdm import tqdm

from howe_lab_to_nwb.vu2024.vu2024_convert_dual_wavelength_session import (
dual_wavelength_session_to_nwb,
single_wavelength_session_to_nwb,
)


def convert_all_sessions(
data_table_path: FilePathType,
folder_path: FolderPathType,
nwbfile_folder_path: FolderPathType,
stub_test: bool = False,
overwrite: bool = False,
):
"""
Convert all sessions from the Vu 2024 dataset to NWB format.
Parameters
----------
data_table_path : FilePathType
The path to the XLSX file containing info for all the sessions.
folder_path : FolderPathType
The root folder path to search for the filenames in the data table.
nwbfile_folder_path : FolderPathType
The folder path to save the NWB files.
stub_test : bool, optional
Whether to run the conversion as a stub test.
When set to True, write only a subset of the data for each session.
When set to False, write the entire data for each session.
overwrite : bool, optional
Whether to overwrite existing NWB files, default is False.
"""

data_table = pd.read_excel(data_table_path)
folder_path = Path(folder_path)

columns_to_group = ["Mouse", "Date (YYYYMMDD)"]
total_file_paths = data_table.groupby(columns_to_group).count().shape[0]
progress_bar = tqdm(
data_table.groupby(columns_to_group),
desc=f"Converting {total_file_paths} sessions to NWB ...",
position=0,
total=total_file_paths,
dynamic_ncols=True,
)

for (subject_id, date), table in progress_bar:
experiment_type = "single-wavelength" if len(table) == 1 else "dual-wavelength"
subject_id = str(subject_id).replace("-", "")

subject_folder_path = folder_path / subject_id

behavior_file_name = table["Raw behavior file"].values[0]
ttl_file_paths = list(subject_folder_path.rglob(behavior_file_name))
if not len(ttl_file_paths):
raise FileNotFoundError(f"Behavior file not found: {behavior_file_name}")

session_folder_path = ttl_file_paths[0].parent

imaging_file_paths = list(session_folder_path.glob("*.cxd"))
if not len(imaging_file_paths):
raise FileNotFoundError(f"No .cxd files found in {session_folder_path}")

ttl_stream_names = []
behavior_file_paths = []
for processed_behavior_file_name in table["Processed behavior file"]:
pattern = re.compile(r"(ttlIn1|ttlIn2)")
# Search for the pattern in the provided string
match = pattern.search(processed_behavior_file_name)
ttl_stream_name = match.group(0) if match else None
if ttl_stream_name is None:
raise ValueError(f"TTL stream name (ttlIn1, ttlIn2) not found in {processed_behavior_file_name}")
ttl_stream_names.append(ttl_stream_name)

behavior_file_paths.append(session_folder_path / processed_behavior_file_name)

fiber_photometry_file_paths = list(session_folder_path.glob("*_ROIs*.mat"))

ttl_file_path = imaging_file_paths[0].parent / table["Raw behavior file"].values[0]

ttl_stream_names = []
behavior_file_paths = []
for processed_behavior_file_name in table["Processed behavior file"]:
pattern = re.compile(r"(ttlIn1|ttlIn2)")
match = pattern.search(processed_behavior_file_name)
ttl_stream_name = match.group(0) if match else None
if ttl_stream_name is None:
raise ValueError(f"TTL stream name (ttlIn1, ttlIn2) not found in {processed_behavior_file_name}")
ttl_stream_names.append(ttl_stream_name)

behavior_file_paths.append(imaging_file_paths[0].parent / processed_behavior_file_name)

if len(imaging_file_paths) < len(table):
# In this case we have dual wavelength imaging data in the same file
# We will use the frame indices in dual_wavelength_session_to_nwb to determine which excitation wavelength the frame belongs to
imaging_file_paths = imaging_file_paths * len(table)
motion_corrected_file_paths = list(session_folder_path.glob("*.tif"))
if not len(motion_corrected_file_paths):
motion_corrected_file_paths = [None] * len(imaging_file_paths)
if len(motion_corrected_file_paths) != len(imaging_file_paths):
raise ValueError("The number of motion corrected imaging files must match the number of raw imaging files.")

fiber_locations_file_path = imaging_file_paths[0].parent.parent / f"{subject_id}_fiber_locations.xlsx"
if not fiber_locations_file_path.exists():
raise FileNotFoundError(f"Fiber locations file not found: {fiber_locations_file_path}")

excitation_wavelengths_in_nm = table["LED excitation wavelength (nm)"].values
indicators = []
for aav_string in table["Relevant injected sensor"].values:
pattern = re.compile(r"(dLight1\.3b|GCaMP7f|Ach3\.0|jRGECO1a)")
# Search for the pattern in the provided string
match = pattern.search(aav_string)
# If a match is found, return the matched string; otherwise, return None
indicator = match.group(0) if match else None
if indicator is None:
raise ValueError(f"Indicator (dLight1.3b, GCaMP7f, Ach3.0, jRGECO1a) not found in {aav_string}")
indicators.append(indicator)

nwbfile_path = nwbfile_folder_path / f"{subject_id}-{date}.nwb"
if nwbfile_path.exists() and not overwrite:
progress_bar.update(1)
continue

progress_bar.set_description(f"Converting subject '{subject_id}' session '{date}' session to NWB ...")
if experiment_type == "dual-wavelength":
dual_wavelength_session_to_nwb(
raw_imaging_file_paths=imaging_file_paths,
motion_corrected_imaging_file_paths=motion_corrected_file_paths,
fiber_photometry_file_paths=fiber_photometry_file_paths,
fiber_locations_file_path=fiber_locations_file_path,
excitation_wavelengths_in_nm=excitation_wavelengths_in_nm,
indicators=indicators,
ttl_file_path=ttl_file_path,
ttl_stream_names=ttl_stream_names,
behavior_file_paths=behavior_file_paths,
nwbfile_path=nwbfile_path,
stub_test=stub_test,
)
elif experiment_type == "single-wavelength":
single_wavelength_session_to_nwb(
raw_imaging_file_path=imaging_file_paths[0],
motion_corrected_imaging_file_path=motion_corrected_file_paths[0],
raw_fiber_photometry_file_path=fiber_photometry_file_paths[0],
fiber_locations_file_path=fiber_locations_file_path,
excitation_wavelength_in_nm=excitation_wavelengths_in_nm[0],
indicator=indicators[0],
ttl_file_path=ttl_file_path,
ttl_stream_name=ttl_stream_names[0],
behavior_file_path=behavior_file_paths[0],
nwbfile_path=nwbfile_path,
stub_test=stub_test,
)
progress_bar.update(1)

report_path = nwbfile_folder_path / "inspector_result.txt"
if not report_path.exists():
results = list(inspect_all(path=nwbfile_folder_path))
save_report(
report_file_path=report_path,
formatted_messages=format_messages(
results,
levels=["importance", "file_path"],
),
)


if __name__ == "__main__":
# Parameters for conversion

# The path to the XLSX file containing info for all the sessions
data_table_excel_file_path = Path("/Volumes/t7-ssd/Howe/data table.xlsx")

# The root folder path to search for the filenames in the data table
folder_path = Path("/Volumes/t7-ssd/Howe")

# The folder path to save the NWB files
nwbfile_folder_path = Path("/Volumes/t7-ssd/Howe/nwbfiles")

# Whether to overwrite existing NWB files, default is False
overwrite = False

# Whether to run the conversion as a stub test
# When set to True, write only a subset of the data for each session
# When set to False, write the entire data for each session
stub_test = False

convert_all_sessions(
data_table_path=data_table_excel_file_path,
folder_path=folder_path,
nwbfile_folder_path=nwbfile_folder_path,
stub_test=stub_test,
overwrite=overwrite,
)
28 changes: 23 additions & 5 deletions src/howe_lab_to_nwb/vu2024/vu2024nwbconverter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Primary NWBConverter class for this dataset."""
from typing import Optional

from neuroconv import NWBConverter
from neuroconv.datainterfaces import TiffImagingInterface, VideoInterface
from neuroconv.tools.signal_processing import get_rising_frames_from_ttl
from neuroconv.utils import DeepDict
from pymatreader import read_mat
from pynwb import NWBFile

from howe_lab_to_nwb.vu2024.interfaces import (
CxdImagingInterface,
Expand All @@ -17,6 +19,8 @@
class Vu2024NWBConverter(NWBConverter):
"""Primary conversion class for the Vu 2024 fiber photometry dataset."""

aligned = False

data_interface_classes = dict(
Imaging=CxdImagingInterface,
ProcessedImaging=TiffImagingInterface,
Expand Down Expand Up @@ -49,11 +53,19 @@ def get_metadata(self) -> DeepDict:
return metadata

def temporally_align_data_interfaces(self):
if self.aligned:
return
imaging = self.data_interface_objects["Imaging"]
fiber_photometry = self.data_interface_objects["FiberPhotometry"]
# Use the timestamps from the fiber photometry interface for the imaging data
# The timestamps from the fiber photometry data is from the TTL signals
fiber_photometry_timestamps = fiber_photometry.get_timestamps()

if imaging.source_data["frame_indices"] is not None:
# we must fix the timestamps for fiber photometry
frame_indices = imaging.source_data["frame_indices"]
fiber_photometry_timestamps = fiber_photometry_timestamps[frame_indices]
fiber_photometry.set_aligned_timestamps(aligned_timestamps=fiber_photometry_timestamps)
imaging.set_aligned_timestamps(aligned_timestamps=fiber_photometry_timestamps)

video_interfaces = [self.data_interface_objects[key] for key in self.data_interface_objects if "Video" in key]
Expand All @@ -66,10 +78,16 @@ def temporally_align_data_interfaces(self):
else:
raise ValueError(f"Could not determine TTL stream for video file {video_file_path}.")

video_timestamps = video_interface.get_timestamps()
ttl_file_path = fiber_photometry.source_data["ttl_file_path"]
ttl_data = read_mat(filename=ttl_file_path)
first_ttl_frame = get_rising_frames_from_ttl(trace=ttl_data[ttl_stream_name])[0]
video_interface.set_aligned_segment_starting_times(
aligned_segment_starting_times=[video_timestamps[0][first_ttl_frame]]
)
rising_frames = get_rising_frames_from_ttl(trace=ttl_data[ttl_stream_name])
video_timestamps = ttl_data["timestamp"][rising_frames]
if len(video_timestamps) > len(fiber_photometry_timestamps):
video_timestamps = video_timestamps[: len(fiber_photometry_timestamps)]
video_interface.set_aligned_timestamps(aligned_timestamps=[video_timestamps])

self.aligned = True

def add_to_nwbfile(self, nwbfile: NWBFile, metadata, conversion_options: Optional[dict] = None) -> None:
self.temporally_align_data_interfaces()
return super().add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, conversion_options=conversion_options)

0 comments on commit 77c9573

Please sign in to comment.