From fe376e55bfd4443d4918d4aee199ba4c24ca2aef Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 3 Sep 2024 14:34:19 -0600
Subject: [PATCH] Add `condition_labels` as an argument (#18)

* add condition labels

* Update src/pynwb/ndx_binned_spikes/__init__.py

* Update spec/ndx-binned-spikes.extensions.yaml

* remove automatic creation of labels in the mock

* typo on the spec generation

---------

Co-authored-by: Ben Dichter <ben.dichter@gmail.com>
---
 README.md                                     |  7 +++-
 spec/ndx-binned-spikes.extensions.yaml        | 18 +++++++--
 src/pynwb/ndx_binned_spikes/__init__.py       | 13 ++++++-
 src/pynwb/ndx_binned_spikes/testing/mock.py   | 39 ++++++++++++-------
 src/pynwb/tests/test_binned_aligned_spikes.py | 10 ++++-
 src/spec/create_extension_spec.py             | 17 +++++++-
 6 files changed, 82 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 43777ed..ba465db 100644
--- a/README.md
+++ b/README.md
@@ -192,6 +192,7 @@ binned_aligned_spikes = BinnedAlignedSpikes(
     data=data,  # Shape (number_of_units, number_of_events, number_of_bins)
     timestamps=timestamps,  # Shape (number_of_events,)
     condition_indices=condition_indices,  # Shape (number_of_events,)
+    condition_labels=condition_labels,  # Shape (number_of_conditions,) or np.unique(condition_indices).size
 )
 ```
 
@@ -199,6 +200,8 @@ Note that `number_of_events` here represents the total number of repetitions for
 
 The `condition_indices` is an indicator vector that should be constructed so that `data[:, condition_indices == condition_index, :]` corresponds to the binned spike counts for the condition with the specified condition_index. You can retrieve the same data using the convenience method `binned_aligned_spikes.get_data_for_condition(condition_index)`.
 
+The `condition_labels` argument is optional and can be used to store the labels of the conditions. This is meant to help to understand the nature of the conditions
+
 It's important to note that the timestamps must be in ascending order and must correspond positionally to the condition indices and the second dimension of the data. If they are not, a ValueError will be raised. To help organize the data correctly, you can use the convenience method `BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)`, which ensures the data is properly sorted. Here’s how it can be used:
 
 ```python
@@ -209,7 +212,8 @@ binned_aligned_spikes = BinnedAlignedSpikes(
     milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
     data=sorted_data,   
     event_timestamps=sorted_event_timestamps,  
-    condition_indices=sorted_condition_indices,  
+    condition_indices=sorted_condition_indices,
+    condition_labels=condition_labels
 )
 ```
 
@@ -278,6 +282,7 @@ milliseconds_from_event_to_first_bin = -50.0
 data = np.concatenate([data_for_first_stimuli, data_for_second_stimuli], axis=1)
 event_timestamps = np.concatenate([timestamps_first_stimuli, timestamps_second_stimuli])
 condition_indices = np.concatenate([np.zeros(2), np.ones(3)])
+condition_labels = ["a", "b"]
 
 sorted_data, sorted_event_timestamps, sorted_condition_indices = BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)
 
diff --git a/spec/ndx-binned-spikes.extensions.yaml b/spec/ndx-binned-spikes.extensions.yaml
index 863cf43..3486762 100644
--- a/spec/ndx-binned-spikes.extensions.yaml
+++ b/spec/ndx-binned-spikes.extensions.yaml
@@ -2,7 +2,7 @@ groups:
 - neurodata_type_def: BinnedAlignedSpikes
   neurodata_type_inc: NWBDataInterface
   default_name: BinnedAlignedSpikes
-  doc: A data interface for binned spike data aligned to an event (e.g. a stimuli
+  doc: A data interface for binned spike data aligned to an event (e.g. a stimulus
     or the beginning of a trial).
   attributes:
   - name: name
@@ -11,7 +11,8 @@ groups:
     doc: The name of this container
   - name: description
     dtype: text
-    value: Spikes data binned and aligned to the timestamps of one or multiple conditions.
+    value: Spikes data binned and aligned to the event timestamps of one or multiple
+      conditions.
     doc: A description of what the data represents
   - name: bin_width_in_milliseconds
     dtype: float64
@@ -25,7 +26,7 @@ groups:
     required: false
   datasets:
   - name: data
-    dtype: numeric
+    dtype: uint64
     dims:
     - num_units
     - number_of_events
@@ -54,6 +55,17 @@ groups:
       type, trial number, category, etc.).This is only used when the data is aligned
       to multiple conditions
     quantity: '?'
+  - name: condition_labels
+    dtype: text
+    dims:
+    - number_of_conditions
+    shape:
+    - null
+    doc: The labels of the conditions that the data is aligned to. The size of this
+      array should match the number of conditions. This is only used when the data
+      is aligned to multiple conditions. First condition is index 0, second is index
+      1, etc.
+    quantity: '?'
   - name: units_region
     neurodata_type_inc: DynamicTableRegion
     doc: A reference to the Units table region that contains the units of the data.
diff --git a/src/pynwb/ndx_binned_spikes/__init__.py b/src/pynwb/ndx_binned_spikes/__init__.py
index 687b9c4..53dd29c 100644
--- a/src/pynwb/ndx_binned_spikes/__init__.py
+++ b/src/pynwb/ndx_binned_spikes/__init__.py
@@ -38,7 +38,7 @@ class BinnedAlignedSpikes(NWBDataInterface):
     )
 
     DEFAULT_NAME = "BinnedAlignedSpikes"
-    DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the timestamps of one or multiple conditions."
+    DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the event timestamps of one or multiple conditions."
 
     @docval(
         {
@@ -97,6 +97,17 @@ class BinnedAlignedSpikes(NWBDataInterface):
             "shape": (None,),
             "default": None,
         },
+        {
+            "name":"condition_labels",
+            "type": "array_data",
+            "doc": (
+                "The labels of the conditions that the data is aligned to. The size of this array should match "
+                "the number of conditions. This is only used when the data is aligned to multiple conditions. "
+                "First condition is index 0, second is index 1, etc."
+            ),
+            "shape": (None,),
+            "default": None,
+        },
         {
             "name": "units_region",
             "type": DynamicTableRegion,
diff --git a/src/pynwb/ndx_binned_spikes/testing/mock.py b/src/pynwb/ndx_binned_spikes/testing/mock.py
index 9fa28e8..939b928 100644
--- a/src/pynwb/ndx_binned_spikes/testing/mock.py
+++ b/src/pynwb/ndx_binned_spikes/testing/mock.py
@@ -6,6 +6,7 @@
 from pynwb.misc import Units
 from hdmf.common import DynamicTableRegion
 
+
 # TODO: Remove once pynwb 2.7.0 is released and use the mock class there
 def mock_Units(
     num_units: int = 10,
@@ -47,11 +48,12 @@ def mock_BinnedAlignedSpikes(
     event_timestamps: Optional[np.ndarray] = None,
     data: Optional[np.ndarray] = None,
     condition_indices: Optional[np.ndarray] = None,
+    condition_labels: Optional[np.ndarray] = None,
     units_region: Optional[DynamicTableRegion] = None,
     sort_data: bool = True,
 ) -> BinnedAlignedSpikes:
     """
-    Generate a mock BinnedAlignedSpikes object with specified parameters or from given data. 
+    Generate a mock BinnedAlignedSpikes object with specified parameters or from given data.
 
     Parameters
     ----------
@@ -77,11 +79,16 @@ def mock_BinnedAlignedSpikes(
         An array of event_timestamps for each event. If not provided, it will be automatically generated.
         It should have size `number_of_events`.
     condition_indices : np.ndarray, optional
-        An array of indices characterizing each condition. If not provided, it will be automatically generated.
+        An array of indices characterizing each condition. If not provided, it will be automatically generated
+        from the number of conditions and number of events. It should have size `number_of_events`.
+        If provided, the `number_of_conditions` parameter will be ignored and the number of conditions will be
+        inferred from the unique values in `condition_indices`.
+    condition_labels: np.ndarray, optional
+        An array of labels for each condition. It should have size `number_of_conditions`.
     units_region: DynamicTableRegion, optional
         A reference to the Units table region that contains the units of the data.
     sort_data: bool, optional
-        If True, the data will be sorted by timestamps. 
+        If True, the data will be sorted by timestamps.
     Returns
     -------
     BinnedAlignedSpikes
@@ -107,14 +114,13 @@ def mock_BinnedAlignedSpikes(
 
     if event_timestamps.shape[0] != number_of_events:
         raise ValueError("The shape of `event_timestamps` does not match `number_of_events`.")
-    
+
     if condition_indices is None and number_of_conditions > 0:
-        
-        
-        assert number_of_conditions < number_of_events, (
-            "The number of conditions should be less than the number of events."
-        )
-        
+
+        assert (
+            number_of_conditions < number_of_events
+        ), "The number of conditions should be less than the number of events."
+
         condition_indices = np.zeros(number_of_events, dtype=int)
         all_indices = np.arange(number_of_conditions, dtype=int)
 
@@ -126,12 +132,16 @@ def mock_BinnedAlignedSpikes(
             size=number_of_events - number_of_conditions,
             replace=True,
         )
+        
 
     if condition_indices is not None:
-        assert (
-            condition_indices.shape[0] == number_of_events
-        ), "The shape of `condition_indices` does not match `number_of_events`."
-        condition_indices = np.array(condition_indices, dtype=int)
+        number_of_conditions = np.unique(condition_indices).size
+        
+        if condition_labels is not None:
+            condition_labels = np.asarray(condition_labels, dtype="U")
+            
+            if condition_labels.size != number_of_conditions:
+                raise ValueError("The number of condition labels should match the number of conditions.")
 
     # Sort the data by timestamps
     if sort_data:
@@ -146,6 +156,7 @@ def mock_BinnedAlignedSpikes(
         data=data,
         event_timestamps=event_timestamps,
         condition_indices=condition_indices,
+        condition_labels=condition_labels,
         units_region=units_region,
     )
     return binned_aligned_spikes
diff --git a/src/pynwb/tests/test_binned_aligned_spikes.py b/src/pynwb/tests/test_binned_aligned_spikes.py
index 3162a18..2582987 100644
--- a/src/pynwb/tests/test_binned_aligned_spikes.py
+++ b/src/pynwb/tests/test_binned_aligned_spikes.py
@@ -167,6 +167,8 @@ def setUp(self):
         self.event_timestamps = np.concatenate([self.timestamps_first_condition, self.timestamps_second_condition])
 
         self.sorted_indices = np.argsort(self.event_timestamps)
+        
+        self.condition_labels = ["first", "second"]
 
     def test_constructor(self):
         """Test that the constructor for BinnedAlignedSpikes sets values as expected."""
@@ -193,6 +195,7 @@ def test_constructor(self):
             data=data,
             event_timestamps=event_timestamps,
             condition_indices=condition_indices,
+            condition_labels=self.condition_labels,
         )
 
         np.testing.assert_array_equal(aggregated_binnned_align_spikes.data, self.data[:, self.sorted_indices, :])
@@ -202,6 +205,11 @@ def test_constructor(self):
         np.testing.assert_array_equal(
             aggregated_binnned_align_spikes.event_timestamps, self.event_timestamps[self.sorted_indices]
         )
+        
+        np.testing.assert_array_equal(
+            aggregated_binnned_align_spikes.condition_labels, self.condition_labels
+        )
+        
         self.assertEqual(aggregated_binnned_align_spikes.bin_width_in_milliseconds, self.bin_width_in_milliseconds)
         self.assertEqual(
             aggregated_binnned_align_spikes.milliseconds_from_event_to_first_bin,
@@ -259,7 +267,7 @@ def test_roundtrip_acquisition(self):
         """
 
         # Testing here
-        self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=0)
+        self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=3, condition_labels=["a", "b", "c"])
 
         self.nwbfile.add_acquisition(self.binned_aligned_spikes)
 
diff --git a/src/spec/create_extension_spec.py b/src/spec/create_extension_spec.py
index 25fb936..cfd38c2 100644
--- a/src/spec/create_extension_spec.py
+++ b/src/spec/create_extension_spec.py
@@ -29,7 +29,7 @@ def main():
             "The binned data. It should be an array whose first dimension is the number of units, the second dimension "
             "is the number of events, and the third dimension is the number of bins."
             ),
-        dtype="numeric",  # TODO should this be a uint64?
+        dtype="uint64",  
         shape=[None, None, None],
         dims=["num_units", "number_of_events", "number_of_bins"],
     )
@@ -63,12 +63,25 @@ def main():
         quantity="?",
     )
     
+    condition_labels = NWBDatasetSpec(
+        name="condition_labels",
+        doc=(
+            "The labels of the conditions that the data is aligned to. The size of this array should match "
+            "the number of conditions. This is only used when the data is aligned to multiple conditions. "
+            "First condition is index 0, second is index 1, etc."
+        ),
+        dtype="text",
+        shape=[None],
+        dims=["number_of_conditions"],
+        quantity="?",
+    )
+    
     binned_aligned_spikes = NWBGroupSpec(
         neurodata_type_def="BinnedAlignedSpikes",
         neurodata_type_inc="NWBDataInterface",
         default_name="BinnedAlignedSpikes",
         doc="A data interface for binned spike data aligned to an event (e.g. a stimulus or the beginning of a trial).",
-        datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, units_region],
+        datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, condition_labels, units_region],
         attributes=[
             NWBAttributeSpec(
                 name="name",