From fe376e55bfd4443d4918d4aee199ba4c24ca2aef Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 3 Sep 2024 14:34:19 -0600 Subject: [PATCH] Add `condition_labels` as an argument (#18) * add condition labels * Update src/pynwb/ndx_binned_spikes/__init__.py * Update spec/ndx-binned-spikes.extensions.yaml * remove automatic creation of labels in the mock * typo on the spec generation --------- Co-authored-by: Ben Dichter --- README.md | 7 +++- spec/ndx-binned-spikes.extensions.yaml | 18 +++++++-- src/pynwb/ndx_binned_spikes/__init__.py | 13 ++++++- src/pynwb/ndx_binned_spikes/testing/mock.py | 39 ++++++++++++------- src/pynwb/tests/test_binned_aligned_spikes.py | 10 ++++- src/spec/create_extension_spec.py | 17 +++++++- 6 files changed, 82 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 43777ed..ba465db 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,7 @@ binned_aligned_spikes = BinnedAlignedSpikes( data=data, # Shape (number_of_units, number_of_events, number_of_bins) timestamps=timestamps, # Shape (number_of_events,) condition_indices=condition_indices, # Shape (number_of_events,) + condition_labels=condition_labels, # Shape (number_of_conditions,) or np.unique(condition_indices).size ) ``` @@ -199,6 +200,8 @@ Note that `number_of_events` here represents the total number of repetitions for The `condition_indices` is an indicator vector that should be constructed so that `data[:, condition_indices == condition_index, :]` corresponds to the binned spike counts for the condition with the specified condition_index. You can retrieve the same data using the convenience method `binned_aligned_spikes.get_data_for_condition(condition_index)`. +The `condition_labels` argument is optional and can be used to store the labels of the conditions. This is meant to help to understand the nature of the conditions + It's important to note that the timestamps must be in ascending order and must correspond positionally to the condition indices and the second dimension of the data. If they are not, a ValueError will be raised. To help organize the data correctly, you can use the convenience method `BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)`, which ensures the data is properly sorted. Here’s how it can be used: ```python @@ -209,7 +212,8 @@ binned_aligned_spikes = BinnedAlignedSpikes( milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin, data=sorted_data, event_timestamps=sorted_event_timestamps, - condition_indices=sorted_condition_indices, + condition_indices=sorted_condition_indices, + condition_labels=condition_labels ) ``` @@ -278,6 +282,7 @@ milliseconds_from_event_to_first_bin = -50.0 data = np.concatenate([data_for_first_stimuli, data_for_second_stimuli], axis=1) event_timestamps = np.concatenate([timestamps_first_stimuli, timestamps_second_stimuli]) condition_indices = np.concatenate([np.zeros(2), np.ones(3)]) +condition_labels = ["a", "b"] sorted_data, sorted_event_timestamps, sorted_condition_indices = BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices) diff --git a/spec/ndx-binned-spikes.extensions.yaml b/spec/ndx-binned-spikes.extensions.yaml index 863cf43..3486762 100644 --- a/spec/ndx-binned-spikes.extensions.yaml +++ b/spec/ndx-binned-spikes.extensions.yaml @@ -2,7 +2,7 @@ groups: - neurodata_type_def: BinnedAlignedSpikes neurodata_type_inc: NWBDataInterface default_name: BinnedAlignedSpikes - doc: A data interface for binned spike data aligned to an event (e.g. a stimuli + doc: A data interface for binned spike data aligned to an event (e.g. a stimulus or the beginning of a trial). attributes: - name: name @@ -11,7 +11,8 @@ groups: doc: The name of this container - name: description dtype: text - value: Spikes data binned and aligned to the timestamps of one or multiple conditions. + value: Spikes data binned and aligned to the event timestamps of one or multiple + conditions. doc: A description of what the data represents - name: bin_width_in_milliseconds dtype: float64 @@ -25,7 +26,7 @@ groups: required: false datasets: - name: data - dtype: numeric + dtype: uint64 dims: - num_units - number_of_events @@ -54,6 +55,17 @@ groups: type, trial number, category, etc.).This is only used when the data is aligned to multiple conditions quantity: '?' + - name: condition_labels + dtype: text + dims: + - number_of_conditions + shape: + - null + doc: The labels of the conditions that the data is aligned to. The size of this + array should match the number of conditions. This is only used when the data + is aligned to multiple conditions. First condition is index 0, second is index + 1, etc. + quantity: '?' - name: units_region neurodata_type_inc: DynamicTableRegion doc: A reference to the Units table region that contains the units of the data. diff --git a/src/pynwb/ndx_binned_spikes/__init__.py b/src/pynwb/ndx_binned_spikes/__init__.py index 687b9c4..53dd29c 100644 --- a/src/pynwb/ndx_binned_spikes/__init__.py +++ b/src/pynwb/ndx_binned_spikes/__init__.py @@ -38,7 +38,7 @@ class BinnedAlignedSpikes(NWBDataInterface): ) DEFAULT_NAME = "BinnedAlignedSpikes" - DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the timestamps of one or multiple conditions." + DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the event timestamps of one or multiple conditions." @docval( { @@ -97,6 +97,17 @@ class BinnedAlignedSpikes(NWBDataInterface): "shape": (None,), "default": None, }, + { + "name":"condition_labels", + "type": "array_data", + "doc": ( + "The labels of the conditions that the data is aligned to. The size of this array should match " + "the number of conditions. This is only used when the data is aligned to multiple conditions. " + "First condition is index 0, second is index 1, etc." + ), + "shape": (None,), + "default": None, + }, { "name": "units_region", "type": DynamicTableRegion, diff --git a/src/pynwb/ndx_binned_spikes/testing/mock.py b/src/pynwb/ndx_binned_spikes/testing/mock.py index 9fa28e8..939b928 100644 --- a/src/pynwb/ndx_binned_spikes/testing/mock.py +++ b/src/pynwb/ndx_binned_spikes/testing/mock.py @@ -6,6 +6,7 @@ from pynwb.misc import Units from hdmf.common import DynamicTableRegion + # TODO: Remove once pynwb 2.7.0 is released and use the mock class there def mock_Units( num_units: int = 10, @@ -47,11 +48,12 @@ def mock_BinnedAlignedSpikes( event_timestamps: Optional[np.ndarray] = None, data: Optional[np.ndarray] = None, condition_indices: Optional[np.ndarray] = None, + condition_labels: Optional[np.ndarray] = None, units_region: Optional[DynamicTableRegion] = None, sort_data: bool = True, ) -> BinnedAlignedSpikes: """ - Generate a mock BinnedAlignedSpikes object with specified parameters or from given data. + Generate a mock BinnedAlignedSpikes object with specified parameters or from given data. Parameters ---------- @@ -77,11 +79,16 @@ def mock_BinnedAlignedSpikes( An array of event_timestamps for each event. If not provided, it will be automatically generated. It should have size `number_of_events`. condition_indices : np.ndarray, optional - An array of indices characterizing each condition. If not provided, it will be automatically generated. + An array of indices characterizing each condition. If not provided, it will be automatically generated + from the number of conditions and number of events. It should have size `number_of_events`. + If provided, the `number_of_conditions` parameter will be ignored and the number of conditions will be + inferred from the unique values in `condition_indices`. + condition_labels: np.ndarray, optional + An array of labels for each condition. It should have size `number_of_conditions`. units_region: DynamicTableRegion, optional A reference to the Units table region that contains the units of the data. sort_data: bool, optional - If True, the data will be sorted by timestamps. + If True, the data will be sorted by timestamps. Returns ------- BinnedAlignedSpikes @@ -107,14 +114,13 @@ def mock_BinnedAlignedSpikes( if event_timestamps.shape[0] != number_of_events: raise ValueError("The shape of `event_timestamps` does not match `number_of_events`.") - + if condition_indices is None and number_of_conditions > 0: - - - assert number_of_conditions < number_of_events, ( - "The number of conditions should be less than the number of events." - ) - + + assert ( + number_of_conditions < number_of_events + ), "The number of conditions should be less than the number of events." + condition_indices = np.zeros(number_of_events, dtype=int) all_indices = np.arange(number_of_conditions, dtype=int) @@ -126,12 +132,16 @@ def mock_BinnedAlignedSpikes( size=number_of_events - number_of_conditions, replace=True, ) + if condition_indices is not None: - assert ( - condition_indices.shape[0] == number_of_events - ), "The shape of `condition_indices` does not match `number_of_events`." - condition_indices = np.array(condition_indices, dtype=int) + number_of_conditions = np.unique(condition_indices).size + + if condition_labels is not None: + condition_labels = np.asarray(condition_labels, dtype="U") + + if condition_labels.size != number_of_conditions: + raise ValueError("The number of condition labels should match the number of conditions.") # Sort the data by timestamps if sort_data: @@ -146,6 +156,7 @@ def mock_BinnedAlignedSpikes( data=data, event_timestamps=event_timestamps, condition_indices=condition_indices, + condition_labels=condition_labels, units_region=units_region, ) return binned_aligned_spikes diff --git a/src/pynwb/tests/test_binned_aligned_spikes.py b/src/pynwb/tests/test_binned_aligned_spikes.py index 3162a18..2582987 100644 --- a/src/pynwb/tests/test_binned_aligned_spikes.py +++ b/src/pynwb/tests/test_binned_aligned_spikes.py @@ -167,6 +167,8 @@ def setUp(self): self.event_timestamps = np.concatenate([self.timestamps_first_condition, self.timestamps_second_condition]) self.sorted_indices = np.argsort(self.event_timestamps) + + self.condition_labels = ["first", "second"] def test_constructor(self): """Test that the constructor for BinnedAlignedSpikes sets values as expected.""" @@ -193,6 +195,7 @@ def test_constructor(self): data=data, event_timestamps=event_timestamps, condition_indices=condition_indices, + condition_labels=self.condition_labels, ) np.testing.assert_array_equal(aggregated_binnned_align_spikes.data, self.data[:, self.sorted_indices, :]) @@ -202,6 +205,11 @@ def test_constructor(self): np.testing.assert_array_equal( aggregated_binnned_align_spikes.event_timestamps, self.event_timestamps[self.sorted_indices] ) + + np.testing.assert_array_equal( + aggregated_binnned_align_spikes.condition_labels, self.condition_labels + ) + self.assertEqual(aggregated_binnned_align_spikes.bin_width_in_milliseconds, self.bin_width_in_milliseconds) self.assertEqual( aggregated_binnned_align_spikes.milliseconds_from_event_to_first_bin, @@ -259,7 +267,7 @@ def test_roundtrip_acquisition(self): """ # Testing here - self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=0) + self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=3, condition_labels=["a", "b", "c"]) self.nwbfile.add_acquisition(self.binned_aligned_spikes) diff --git a/src/spec/create_extension_spec.py b/src/spec/create_extension_spec.py index 25fb936..cfd38c2 100644 --- a/src/spec/create_extension_spec.py +++ b/src/spec/create_extension_spec.py @@ -29,7 +29,7 @@ def main(): "The binned data. It should be an array whose first dimension is the number of units, the second dimension " "is the number of events, and the third dimension is the number of bins." ), - dtype="numeric", # TODO should this be a uint64? + dtype="uint64", shape=[None, None, None], dims=["num_units", "number_of_events", "number_of_bins"], ) @@ -63,12 +63,25 @@ def main(): quantity="?", ) + condition_labels = NWBDatasetSpec( + name="condition_labels", + doc=( + "The labels of the conditions that the data is aligned to. The size of this array should match " + "the number of conditions. This is only used when the data is aligned to multiple conditions. " + "First condition is index 0, second is index 1, etc." + ), + dtype="text", + shape=[None], + dims=["number_of_conditions"], + quantity="?", + ) + binned_aligned_spikes = NWBGroupSpec( neurodata_type_def="BinnedAlignedSpikes", neurodata_type_inc="NWBDataInterface", default_name="BinnedAlignedSpikes", doc="A data interface for binned spike data aligned to an event (e.g. a stimulus or the beginning of a trial).", - datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, units_region], + datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, condition_labels, units_region], attributes=[ NWBAttributeSpec( name="name",