Add is_longitudinal attribute to the CuBIDS class

PennLINC · Jan 17, 2025 · 64b2097 · 64b2097
1 parent 04b5862
commit 64b2097
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 43 deletions.
diff --git a/cubids/cubids.py b/cubids/cubids.py
@@ -84,6 +84,8 @@ class CuBIDS(object):
         A data dictionary for TSV outputs.
     use_datalad : :obj:`bool`
         If True, use datalad to track changes to the BIDS dataset.
+    is_longitudinal : :obj:`bool`
+        If True, includes "ses" in filepath. Default is False.
     """
 
     def __init__(
@@ -93,6 +95,7 @@ def __init__(
         acq_group_level="subject",
         grouping_config=None,
         force_unlock=False,
+        is_longitudinal=False,
     ):
         self.path = os.path.abspath(data_root)
         self._layout = None
@@ -110,10 +113,11 @@ def __init__(
         self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
         self.data_dict = {}  # data dictionary for TSV outputs
         self.use_datalad = use_datalad  # True if flag set, False if flag unset
+        self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset
         if self.use_datalad:
             self.init_datalad()
 
-        if self.acq_group_level == "session":
+        if self.is_longitudinal and self.acq_group_level == "session":
             NON_KEY_ENTITIES.remove("session")
 
     @property
@@ -452,7 +456,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
         # remove renames file that gets created under the hood
         subprocess.run(["rm", "-rf", "renames"])
 
-    def change_filename(self, filepath, entities):
+    def change_filename(self, filepath, entities, is_longitudinal=False):
         """Apply changes to a filename based on the renamed entity sets.
 
         This function takes into account the new entity set names
@@ -464,6 +468,8 @@ def change_filename(self, filepath, entities):
             Path prefix to a file in the affected entity set change.
         entities : :obj:`dict`
             A pybids dictionary of entities parsed from the new entity set name.
+        is_longitudinal : :obj:`bool`, optional
+            If True, includes "ses" in filepath. Default is False.
 
         Notes
         -----
@@ -473,6 +479,7 @@ def change_filename(self, filepath, entities):
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
+            is_longitudinal=self.is_longitudinal
         )
 
         exts = Path(filepath).suffixes
@@ -481,7 +488,8 @@ def change_filename(self, filepath, entities):
         suffix = entities["suffix"]
 
         sub = get_entity_value(filepath, "sub")
-        ses = get_entity_value(filepath, "ses")
+        if self.is_longitudinal:
+            ses = get_entity_value(filepath, "ses")
 
         # Add the scan path + new path to the lists of old, new filenames
         self.old_filenames.append(filepath)
@@ -577,7 +585,10 @@ def change_filename(self, filepath, entities):
                 self.new_filenames.append(new_labeling)
 
         # RENAME INTENDED FORS!
-        ses_path = self.path + "/" + sub + "/" + ses
+        if self.is_longitudinal:
+            ses_path = self.path + "/" + sub + "/" + ses
+        elif not self.is_longitudinal:
+            ses_path = self.path + "/" + sub
         files_with_if = []
         files_with_if += Path(ses_path).rglob("fmap/*.json")
         files_with_if += Path(ses_path).rglob("perf/*_m0scan.json")
@@ -595,7 +606,7 @@ def change_filename(self, filepath, entities):
                 # Coerce IntendedFor to a list.
                 data["IntendedFor"] = listify(data["IntendedFor"])
                 for item in data["IntendedFor"]:
-                    if item == _get_participant_relative_path(filepath):
+                    if item == _get_participant_relative_path(filepath):                       
                         # remove old filename
                         data["IntendedFor"].remove(item)
                         # add new filename
@@ -1363,6 +1374,7 @@ def get_layout(self):
         return self.layout
 
 
+# XXX: Remove _validate_json?
 def _validate_json():
     """Validate a JSON file's contents.
 
@@ -1402,8 +1414,29 @@ def _get_participant_relative_path(scan):
 
     This is what will appear in the IntendedFor field of any association.
 
+    Examples:
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
+    ... )
+    'ses-01/func/sub-01_ses-01_bold.nii.gz'
+    
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
+    ... )
+    'func/sub-01_bold.nii.gz'
+    
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
+    ... )
+    Traceback (most recent call last):
+    ValueError: Could not find subject in ...
     """
-    return "/".join(Path(scan).parts[-3:])
+    parts = Path(scan).parts
+    # Find the first part that starts with "sub-"
+    for i, part in enumerate(parts):
+        if part.startswith("sub-"):
+            return "/".join(parts[i+1:])
+    raise ValueError(f"Could not find subject in {scan}")
 
 
 def _get_bidsuri(filename, dataset_root):
@@ -1734,7 +1767,7 @@ def get_entity_value(path, key):
             return part
 
 
-def build_path(filepath, entities, out_dir):
+def build_path(filepath, entities, out_dir, is_longitudinal=False):
     """Build a new path for a file based on its BIDS entities.
 
     Parameters
@@ -1746,6 +1779,8 @@ def build_path(filepath, entities, out_dir):
         This should include all of the entities in the filename *except* for subject and session.
     out_dir : str
         The output directory for the new file.
+    is_longitudinal : bool, optional
+        If True, add "ses" to file path. Default is False.
 
     Returns
     -------
@@ -1758,6 +1793,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
     ...    {"acquisition": "VAR", "suffix": "T2w"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
 
@@ -1766,6 +1802,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1775,6 +1812,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
     ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
 
@@ -1784,6 +1822,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
     ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1792,6 +1831,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
     ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1801,6 +1841,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1809,6 +1850,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1817,6 +1859,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1825,19 +1868,19 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
     ...    {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
     ...    "/output",
+    ...    True,
     ... )
     WARNING: DATATYPE CHANGE DETECTED
     '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
 
-    It expects a longitudinal structure, so providing a cross-sectional filename won't work.
-    XXX: This is a bug.
+    It also works for cross-sectional filename.
     >>> build_path(
     ...    "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
+    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    False,
     ... )
-    Traceback (most recent call last):
-    ValueError: Could not extract subject or session from ...
+    '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
     """
     exts = Path(filepath).suffixes
     old_ext = "".join(exts)
@@ -1853,9 +1896,13 @@ def build_path(filepath, entities, out_dir):
             entity_file_keys.append(key)
 
     sub = get_entity_value(filepath, "sub")
-    ses = get_entity_value(filepath, "ses")
-    if sub is None or ses is None:
-        raise ValueError(f"Could not extract subject or session from {filepath}")
+    if sub is None:
+        raise ValueError(f"Could not extract subject from {filepath}")
+
+    if is_longitudinal:
+        ses = get_entity_value(filepath, "ses")
+        if ses is None:
+            raise ValueError(f"Could not extract session from {filepath}")
 
     # Add leading zeros to run entity if it's an integer.
     # If it's a string, respect the value provided.
@@ -1874,7 +1921,10 @@ def build_path(filepath, entities, out_dir):
         .replace("reconstruction", "rec")
     )
     if len(filename) > 0:
-        filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        if is_longitudinal:
+            filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        elif not is_longitudinal:
+            filename = f"{sub}_{filename}_{suffix}{old_ext}"
     else:
         raise ValueError(f"Could not construct new filename for {filepath}")
 
@@ -1894,5 +1944,9 @@ def build_path(filepath, entities, out_dir):
         dtype_new = dtype_orig
 
     # Construct the new filename
-    new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    if is_longitudinal:
+        new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    elif not is_longitudinal:
+        new_path = str(Path(out_dir) / sub / dtype_new / filename)
+
     return new_path
diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py
@@ -243,13 +243,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
     return 0
 
 
-def get_acq_dictionary():
+def get_acq_dictionary(is_longitudinal=False):
     """Create a BIDS data dictionary from dataframe columns.
 
     Parameters
     ----------
-    df : :obj:`pandas.DataFrame`
-        Pre export TSV that will be converted to a json dictionary.
+    is_longitudinal : :obj:`bool`, optional
+       If True, add "session" to acq_dict. Default is False.
 
     Returns
     -------
@@ -258,15 +258,16 @@ def get_acq_dictionary():
     """
     acq_dict = {}
     acq_dict["subject"] = {"Description": "Participant ID"}
-    acq_dict["session"] = {"Description": "Session ID"}
+    if is_longitudinal:
+        acq_dict["session"] = {"Description": "Session ID"}
     docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
     desc = "Acquisition Group. See Read the Docs for more information"
     acq_dict["AcqGroup"] = {"Description": desc + docs}
 
     return acq_dict
 
 
-def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
+def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False):
     """Find unique sets of Key/Param groups across subjects.
 
     This writes out the following files:
@@ -284,6 +285,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
         Prefix for output files.
     acq_group_level : {"subject", "session"}
         Level at which to group acquisitions.
+    is_longitudinal : :obj:`bool`, optional
+       If True, add "session" to acq_dict. Default is False.
     """
     from bids import config
     from bids.layout import parse_file_entities
@@ -298,9 +301,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
         file_entities = parse_file_entities(row.FilePath)
 
         if acq_group_level == "subject":
-            acq_id = (file_entities.get("subject"), file_entities.get("session"))
+            if is_longitudinal:
+                acq_id = (file_entities.get("subject"), file_entities.get("session"))
+            elif not is_longitudinal:
+                acq_id = (file_entities.get("subject"))
             acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
-        else:
+        elif is_longitudinal and acq_group_level == "session":
             acq_id = (file_entities.get("subject"), None)
             acq_groups[acq_id].append(
                 (row.EntitySet, row.ParamGroup, file_entities.get("session"))
@@ -326,17 +332,23 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
     for groupnum, content_id_row in enumerate(descending_order, start=1):
         content_id = content_ids[content_id_row]
         acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id)
-        for subject, session in contents_to_subjects[content_id]:
-            grouped_sub_sess.append(
-                {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
-            )
+        if is_longitudinal:
+            for subject, session in contents_to_subjects[content_id]:
+                grouped_sub_sess.append(
+                    {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
+                )
+        elif not is_longitudinal:
+            for subject in contents_to_subjects[content_id]:
+                grouped_sub_sess.append(
+                    {"subject": "sub-" + subject, "AcqGroup": groupnum}
+                )
 
     # Write the mapping of subject/session to
     acq_group_df = pd.DataFrame(grouped_sub_sess)
     acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False)
 
     # Create data dictionary for acq group tsv
-    acq_dict = get_acq_dictionary()
+    acq_dict = get_acq_dictionary(is_longitudinal)
     with open(output_prefix + "_AcqGrouping.json", "w") as outfile:
         json.dump(acq_dict, outfile, indent=4)