Skip to content

Commit

Permalink
Add is_longitudinal attribute to the CuBIDS class
Browse files Browse the repository at this point in the history
  • Loading branch information
tientong98 committed Jan 17, 2025
1 parent 04b5862 commit 64b2097
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 43 deletions.
88 changes: 71 additions & 17 deletions cubids/cubids.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ class CuBIDS(object):
A data dictionary for TSV outputs.
use_datalad : :obj:`bool`
If True, use datalad to track changes to the BIDS dataset.
is_longitudinal : :obj:`bool`
If True, includes "ses" in filepath. Default is False.
"""

def __init__(
Expand All @@ -93,6 +95,7 @@ def __init__(
acq_group_level="subject",
grouping_config=None,
force_unlock=False,
is_longitudinal=False,
):
self.path = os.path.abspath(data_root)
self._layout = None
Expand All @@ -110,10 +113,11 @@ def __init__(
self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
self.data_dict = {} # data dictionary for TSV outputs
self.use_datalad = use_datalad # True if flag set, False if flag unset
self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset
if self.use_datalad:
self.init_datalad()

if self.acq_group_level == "session":
if self.is_longitudinal and self.acq_group_level == "session":
NON_KEY_ENTITIES.remove("session")

@property
Expand Down Expand Up @@ -452,7 +456,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
# remove renames file that gets created under the hood
subprocess.run(["rm", "-rf", "renames"])

def change_filename(self, filepath, entities):
def change_filename(self, filepath, entities, is_longitudinal=False):
"""Apply changes to a filename based on the renamed entity sets.
This function takes into account the new entity set names
Expand All @@ -464,6 +468,8 @@ def change_filename(self, filepath, entities):
Path prefix to a file in the affected entity set change.
entities : :obj:`dict`
A pybids dictionary of entities parsed from the new entity set name.
is_longitudinal : :obj:`bool`, optional
If True, includes "ses" in filepath. Default is False.
Notes
-----
Expand All @@ -473,6 +479,7 @@ def change_filename(self, filepath, entities):
filepath=filepath,
entities=entities,
out_dir=str(self.path),
is_longitudinal=self.is_longitudinal
)

exts = Path(filepath).suffixes
Expand All @@ -481,7 +488,8 @@ def change_filename(self, filepath, entities):
suffix = entities["suffix"]

sub = get_entity_value(filepath, "sub")
ses = get_entity_value(filepath, "ses")
if self.is_longitudinal:
ses = get_entity_value(filepath, "ses")

# Add the scan path + new path to the lists of old, new filenames
self.old_filenames.append(filepath)
Expand Down Expand Up @@ -577,7 +585,10 @@ def change_filename(self, filepath, entities):
self.new_filenames.append(new_labeling)

# RENAME INTENDED FORS!
ses_path = self.path + "/" + sub + "/" + ses
if self.is_longitudinal:
ses_path = self.path + "/" + sub + "/" + ses
elif not self.is_longitudinal:
ses_path = self.path + "/" + sub
files_with_if = []
files_with_if += Path(ses_path).rglob("fmap/*.json")
files_with_if += Path(ses_path).rglob("perf/*_m0scan.json")
Expand All @@ -595,7 +606,7 @@ def change_filename(self, filepath, entities):
# Coerce IntendedFor to a list.
data["IntendedFor"] = listify(data["IntendedFor"])
for item in data["IntendedFor"]:
if item == _get_participant_relative_path(filepath):
if item == _get_participant_relative_path(filepath):
# remove old filename
data["IntendedFor"].remove(item)
# add new filename
Expand Down Expand Up @@ -1363,6 +1374,7 @@ def get_layout(self):
return self.layout


# XXX: Remove _validate_json?
def _validate_json():
"""Validate a JSON file's contents.
Expand Down Expand Up @@ -1402,8 +1414,29 @@ def _get_participant_relative_path(scan):
This is what will appear in the IntendedFor field of any association.
Examples:
>>> _get_participant_relative_path(
... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
... )
'ses-01/func/sub-01_ses-01_bold.nii.gz'
>>> _get_participant_relative_path(
... "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
... )
'func/sub-01_bold.nii.gz'
>>> _get_participant_relative_path(
... "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
... )
Traceback (most recent call last):
ValueError: Could not find subject in ...
"""
return "/".join(Path(scan).parts[-3:])
parts = Path(scan).parts
# Find the first part that starts with "sub-"
for i, part in enumerate(parts):
if part.startswith("sub-"):
return "/".join(parts[i+1:])
raise ValueError(f"Could not find subject in {scan}")


def _get_bidsuri(filename, dataset_root):
Expand Down Expand Up @@ -1734,7 +1767,7 @@ def get_entity_value(path, key):
return part


def build_path(filepath, entities, out_dir):
def build_path(filepath, entities, out_dir, is_longitudinal=False):
"""Build a new path for a file based on its BIDS entities.
Parameters
Expand All @@ -1746,6 +1779,8 @@ def build_path(filepath, entities, out_dir):
This should include all of the entities in the filename *except* for subject and session.
out_dir : str
The output directory for the new file.
is_longitudinal : bool, optional
If True, add "ses" to file path. Default is False.
Returns
-------
Expand All @@ -1758,6 +1793,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
... {"acquisition": "VAR", "suffix": "T2w"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
Expand All @@ -1766,6 +1802,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
Expand All @@ -1775,6 +1812,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
Expand All @@ -1784,6 +1822,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
Expand All @@ -1792,6 +1831,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
Expand All @@ -1801,6 +1841,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
Expand All @@ -1809,6 +1850,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
Expand All @@ -1817,6 +1859,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
Expand All @@ -1825,19 +1868,19 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
... {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
... "/output",
... True,
... )
WARNING: DATATYPE CHANGE DETECTED
'/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
It expects a longitudinal structure, so providing a cross-sectional filename won't work.
XXX: This is a bug.
It also works for cross-sectional filename.
>>> build_path(
... "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... False,
... )
Traceback (most recent call last):
ValueError: Could not extract subject or session from ...
'/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
"""
exts = Path(filepath).suffixes
old_ext = "".join(exts)
Expand All @@ -1853,9 +1896,13 @@ def build_path(filepath, entities, out_dir):
entity_file_keys.append(key)

sub = get_entity_value(filepath, "sub")
ses = get_entity_value(filepath, "ses")
if sub is None or ses is None:
raise ValueError(f"Could not extract subject or session from {filepath}")
if sub is None:
raise ValueError(f"Could not extract subject from {filepath}")

if is_longitudinal:
ses = get_entity_value(filepath, "ses")
if ses is None:
raise ValueError(f"Could not extract session from {filepath}")

# Add leading zeros to run entity if it's an integer.
# If it's a string, respect the value provided.
Expand All @@ -1874,7 +1921,10 @@ def build_path(filepath, entities, out_dir):
.replace("reconstruction", "rec")
)
if len(filename) > 0:
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
if is_longitudinal:
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
elif not is_longitudinal:
filename = f"{sub}_{filename}_{suffix}{old_ext}"
else:
raise ValueError(f"Could not construct new filename for {filepath}")

Expand All @@ -1894,5 +1944,9 @@ def build_path(filepath, entities, out_dir):
dtype_new = dtype_orig

# Construct the new filename
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
if is_longitudinal:
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
elif not is_longitudinal:
new_path = str(Path(out_dir) / sub / dtype_new / filename)

return new_path
36 changes: 24 additions & 12 deletions cubids/metadata_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
return 0


def get_acq_dictionary():
def get_acq_dictionary(is_longitudinal=False):
"""Create a BIDS data dictionary from dataframe columns.
Parameters
----------
df : :obj:`pandas.DataFrame`
Pre export TSV that will be converted to a json dictionary.
is_longitudinal : :obj:`bool`, optional
If True, add "session" to acq_dict. Default is False.
Returns
-------
Expand All @@ -258,15 +258,16 @@ def get_acq_dictionary():
"""
acq_dict = {}
acq_dict["subject"] = {"Description": "Participant ID"}
acq_dict["session"] = {"Description": "Session ID"}
if is_longitudinal:
acq_dict["session"] = {"Description": "Session ID"}
docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
desc = "Acquisition Group. See Read the Docs for more information"
acq_dict["AcqGroup"] = {"Description": desc + docs}

return acq_dict


def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False):
"""Find unique sets of Key/Param groups across subjects.
This writes out the following files:
Expand All @@ -284,6 +285,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
Prefix for output files.
acq_group_level : {"subject", "session"}
Level at which to group acquisitions.
is_longitudinal : :obj:`bool`, optional
If True, add "session" to acq_dict. Default is False.
"""
from bids import config
from bids.layout import parse_file_entities
Expand All @@ -298,9 +301,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
file_entities = parse_file_entities(row.FilePath)

if acq_group_level == "subject":
acq_id = (file_entities.get("subject"), file_entities.get("session"))
if is_longitudinal:
acq_id = (file_entities.get("subject"), file_entities.get("session"))
elif not is_longitudinal:
acq_id = (file_entities.get("subject"))
acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
else:
elif is_longitudinal and acq_group_level == "session":
acq_id = (file_entities.get("subject"), None)
acq_groups[acq_id].append(
(row.EntitySet, row.ParamGroup, file_entities.get("session"))
Expand All @@ -326,17 +332,23 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
for groupnum, content_id_row in enumerate(descending_order, start=1):
content_id = content_ids[content_id_row]
acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id)
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
)
if is_longitudinal:
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
)
elif not is_longitudinal:
for subject in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "AcqGroup": groupnum}
)

# Write the mapping of subject/session to
acq_group_df = pd.DataFrame(grouped_sub_sess)
acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False)

# Create data dictionary for acq group tsv
acq_dict = get_acq_dictionary()
acq_dict = get_acq_dictionary(is_longitudinal)
with open(output_prefix + "_AcqGrouping.json", "w") as outfile:
json.dump(acq_dict, outfile, indent=4)

Expand Down
Loading

0 comments on commit 64b2097

Please sign in to comment.