From d7d08dd8ee56806fa63ebeca7e508721746e6387 Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 20 Jan 2025 22:33:42 +0100 Subject: [PATCH] [MRG] Update empty-room matching to look for and prioritize same-session recordings with task "noise" (closes #1354) (#1364) * Changed empty-room matching logic to prioritize task-noise files * Simple test for noise task * Tests for the split noise file case and fallback to sub-emptyroom * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix line length and clean up after test * Work around the weird multiple-match behavior of path.match() * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Better handling of "run" set in the base path for ER search * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove an extraneous print statement * Update mne_bids/tests/test_path.py Co-authored-by: Stefan Appelhoff * Updated changelog, added self to author list per new contrib guide * Update CITATION.cff to remove name from original pub * Update doc/authors.rst to fix doc build Co-authored-by: Daniel McCloy * Added test logic for new code * Line length * Cover an unrelated error to bring up % * correct author order --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Stefan Appelhoff Co-authored-by: Daniel McCloy --- CITATION.cff | 4 +++ doc/authors.rst | 1 + doc/whats_new.rst | 2 ++ mne_bids/path.py | 64 ++++++++++++++++++++++++++++++++----- mne_bids/tests/test_path.py | 42 ++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 8 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 54752f43e..790e6d7dc 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -202,6 +202,10 @@ authors: family-names: O'Reilly affiliation: 'Computer Science and Engineering, University of South Carolina' orcid: 'https://orcid.org/0000-0002-3149-4934' + - given-names: Berk + family-names: Gerçek + affiliation: 'University of Geneva, Department of Fundamental Neuroscience' + orcid: 'https://orcid.org/0000-0003-1063-6769' - given-names: Alexandre family-names: Gramfort affiliation: 'Université Paris-Saclay, Inria, CEA, Palaiseau, France' diff --git a/doc/authors.rst b/doc/authors.rst index 031924101..cc061b1ea 100644 --- a/doc/authors.rst +++ b/doc/authors.rst @@ -6,6 +6,7 @@ .. _Anand Saini: https://github.com/anandsaini024 .. _Ariel Rokem: https://github.com/arokem .. _Austin Hurst: https://github.com/a-hurst +.. _Berk Gerçek: https://github.com/berkgercek .. _Bruno Hebling Vieira: https://github.com/bhvieira .. _Chris Holdgraf: https://bids.berkeley.edu/people/chris-holdgraf .. _Clemens Brunner: https://github.com/cbrnr diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 17039eeb0..fcf110636 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -18,6 +18,7 @@ Version 0.17 (unreleased) The following authors contributed for the first time. Thank you so much! 🤩 * `Christian O'Reilly`_ +* `Berk Gerçek`_ The following authors had contributed before. Thank you for sticking around! 🤘 @@ -33,6 +34,7 @@ Detailed list of changes - :func:`mne_bids.write_raw_bids()` can now handle mne `Raw` objects with `eyegaze` and `pupil` channels, by `Christian O'Reilly`_ (:gh:`1344`) - :func:`mne_bids.get_entity_vals()` has a new parameter ``ignore_suffixes`` to easily ignore sidecar files, by `Daniel McCloy`_ (:gh:`1362`) +- Empty-room matching now preferentially finds recordings in the subject directory tagged as `task-noise` before looking in the `sub-emptyroom` directories. This adds support for a part of the BIDS specification for ER recordings, by `Berk Gerçek`_ (:gh:`1364`) 🧐 API and behavior changes diff --git a/mne_bids/path.py b/mne_bids/path.py index d0471a0a8..a824ba50a 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -56,12 +56,62 @@ def _find_empty_room_candidates(bids_path): datatype = "meg" # We're only concerned about MEG data here bids_fname = bids_path.update(suffix=datatype).fpath _, ext = _parse_ext(bids_fname) + # Create a path for the empty-room directory to be used for matching. emptyroom_dir = BIDSPath(root=bids_root, subject="emptyroom").directory - if not emptyroom_dir.exists(): + # Find matching "task-noise" files in the same directory as the recording. + noisetask_path = bids_path.update( + split=None, run=None, task="noise", datatype=datatype, suffix=datatype + ) + + allowed_extensions = list(reader.keys()) + # `.pdf` is just a "virtual" extension for BTi data (which is stored inside + # a dedicated directory that doesn't have an extension) + del allowed_extensions[allowed_extensions.index(".pdf")] + + # Get possible noise task files in the same directory as the recording. + noisetask_tmp = [ + candidate + for candidate in noisetask_path.match() + if candidate.extension in allowed_extensions + ] + # For some reason a single file can produce multiple hits in the match function. + # Remove dups + noisetask_fns = [] + for i in range(len(noisetask_tmp)): + fn = noisetask_tmp.pop() + if len(noisetask_tmp) == 0 or not any( + fn.fpath == f.fpath for f in noisetask_fns + ): + noisetask_fns.append(fn) + + # If we have more than one noise task file, we need to disambiguate. + # It might be that it's a + # split recording. + if len(noisetask_fns) > 1 and any(path.split is not None for path in noisetask_fns): + noisetask_path.update(split="01") + noisetask_fns = [ + candidate + for candidate in noisetask_path.match() + if candidate.extension in allowed_extensions + ] + + # If it wasn't a split recording, warn the user that something is wonky, + # then resort to looking for sub-emptyroom recordings and date-matching. + if len(noisetask_fns) > 1: + msg = ( + "Found more than one matching noise task file." + " Falling back to looking for sub-emptyroom recordings and date-matching." + ) + warn(msg) + noisetask_fns = [] + elif len(noisetask_fns) == 1: + return noisetask_fns[0] + + if not emptyroom_dir.exists() and not noisetask_fns: return list() - # Find the empty-room recording sessions. + # Check the 'emptyroom' subject for empty-room recording sessions. emptyroom_session_dirs = [ x for x in emptyroom_dir.iterdir() @@ -71,12 +121,6 @@ def _find_empty_room_candidates(bids_path): emptyroom_session_dirs = [emptyroom_dir] # Now try to discover all recordings inside the session directories. - - allowed_extensions = list(reader.keys()) - # `.pdf` is just a "virtual" extension for BTi data (which is stored inside - # a dedicated directory that doesn't have an extension) - del allowed_extensions[allowed_extensions.index(".pdf")] - candidate_er_fnames = [] for session_dir in emptyroom_session_dirs: dir_contents = glob.glob( @@ -105,6 +149,10 @@ def _find_matched_empty_room(bids_path): from mne_bids import read_raw_bids # avoid circular import. candidates = _find_empty_room_candidates(bids_path) + # If a single candidate is returned, then there's a same-session noise + # task recording that takes priority. + if not isinstance(candidates, list): + return candidates # Walk through recordings, trying to extract the recording date: # First, from the filename; and if that fails, from `info['meas_date']`. diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 0eecdf49a..e617bf5fb 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -1146,9 +1146,13 @@ def test_find_empty_room(return_bids_test_dir, tmp_path): task="audiovisual", run="01", root=bids_root, + datatype="meg", suffix="meg", ) write_raw_bids(raw, bids_path, overwrite=True, verbose=False) + noroot = bids_path.copy().update(root=None) + with pytest.raises(ValueError, match="The root of the"): + noroot.find_empty_room() # No empty-room data present. er_basename = bids_path.find_empty_room() @@ -1173,6 +1177,44 @@ def test_find_empty_room(return_bids_test_dir, tmp_path): recovered_er_bids_path = bids_path.find_empty_room() assert er_bids_path == recovered_er_bids_path + # Test that when there is a noise task file in the subject directory it will take + # precedence over the emptyroom directory file + er_noise_task_path = bids_path.copy().update( + run=None, + task="noise", + ) + + write_raw_bids(er_raw, er_noise_task_path, overwrite=True, verbose=False) + recovered_er_bids_path = bids_path.find_empty_room() + assert er_noise_task_path == recovered_er_bids_path + er_noise_task_path.fpath.unlink() + + # When a split empty room file is present, the first split should be returned as + # the matching empty room file + split_er_bids_path = er_noise_task_path.copy().update(split="01", extension=".fif") + split_er_bids_path.fpath.touch() + split_er_bids_path2 = split_er_bids_path.copy().update( + split="02", extension=".fif" + ) # not used + split_er_bids_path2.fpath.touch() + recovered_er_bids_path = bids_path.find_empty_room() + assert split_er_bids_path == recovered_er_bids_path + split_er_bids_path.fpath.unlink() + split_er_bids_path2.fpath.unlink() + write_raw_bids(er_raw, er_noise_task_path, overwrite=True, verbose=False) + + # Check that when there are multiple matches that cannot be resolved via assigning + # split=01 that the sub-emptyroom is the fallback + dup_noise_task_path = er_noise_task_path.copy() + dup_noise_task_path.update(run="100", split=None) + write_raw_bids(er_raw, dup_noise_task_path, overwrite=True, verbose=False) + write_raw_bids(er_raw, er_bids_path, overwrite=True, verbose=False) + with pytest.warns(RuntimeWarning): + recovered_er_bids_path = bids_path.find_empty_room() + assert er_bids_path == recovered_er_bids_path + er_noise_task_path.fpath.unlink() + dup_noise_task_path.fpath.unlink() + # assert that we get best emptyroom if there are multiple available sh.rmtree(op.join(bids_root, "sub-emptyroom")) dates = ["20021204", "20021201", "20021001"]