From b8fc7fe86eb2cf04b0e78e7965da32ad7d19aa77 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 3 Oct 2023 17:41:09 +0100 Subject: [PATCH 01/26] wip: initial commit --- src/coffea/nanoevents/factory.py | 66 +++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 38e06d601..c43b182ad 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -77,23 +77,48 @@ def __init__( self.metadata = metadata self.version = version - def extract_form_keys_base_columns(self, form_keys): - base_columns = [] - for form_key in form_keys: - base_columns.extend( + def keys_for_buffer_keys(self, buffer_keys): + base_columns = set() + for buffer_key in buffer_keys: + form_key, attribute = self.parse_buffer_key(buffer_key) + base_columns.update( [ acolumn for acolumn in urllib.parse.unquote(form_key).split(",") if not acolumn.startswith("!") ] ) - return list(set(base_columns)) + return base_columns + + def parse_buffer_key(self, buffer_key): + prefix, attribute, form_key = buffer_key.rsplit("/", maxsplit=2) + if attribute == "offsets": + return (form_key[: -len("%2C%21offsets")], attribute) + else: + return (form_key, attribute) + + @property + def buffer_key(self): + return partial(self._key_formatter, "") def _key_formatter(self, prefix, form_key, form, attribute): if attribute == "offsets": form_key += "%2C%21offsets" return prefix + f"/{attribute}/{form_key}" + # TODO: deprecate + def extract_form_keys_base_columns(self, form_keys): + base_columns = [] + for form_key in form_keys: + base_columns.extend( + [ + acolumn + for acolumn in urllib.parse.unquote(form_key).split(",") + if not acolumn.startswith("!") + ] + ) + return list(set(base_columns)) + class _map_schema_uproot(_map_schema_base): def __init__( @@ -125,7 +150,36 @@ def __call__(self, form): }, "form_key": None, } - return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form) + return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self + + def create_column_mapping(self, tree, keys, start, stop, interp_options): + from functools import partial + + from coffea.nanoevents.util import tuple_to_key + + partition_key = ( + str(tree.file.uuid), + tree.object_path, + f"{start}-{stop}", + ) + uuidpfn = {partition_key[0]: tree.file.file_path} + mapping = UprootSourceMapping( + TrivialUprootOpener(uuidpfn, interp_options), + start, + stop, + cache={}, + access_log=None, + use_ak_forth=True, + ) + mapping.preload_column_source(partition_key[0], partition_key[1], tree) + buffer_key = partial(self._key_formatter, tuple_to_key(partition_key)) + + class TranslateBufferKeys: + def __getitem__(this, key): + form_key, attribute = self.parse_buffer_key(key) + return mapping[buffer_key(form_key=form_key, attribute=attribute, form=None)] + + return TranslateBufferKeys() def create_column_mapping_and_key(self, tree, start, stop, interp_options): from functools import partial From 1b4bd50af71c7e06166a7d7d285e341e606c3ee4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 16:43:11 +0000 Subject: [PATCH 02/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index c43b182ad..cc0eff0ad 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -150,7 +150,10 @@ def __call__(self, form): }, "form_key": None, } - return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self + return ( + awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), + self, + ) def create_column_mapping(self, tree, keys, start, stop, interp_options): from functools import partial @@ -177,7 +180,9 @@ def create_column_mapping(self, tree, keys, start, stop, interp_options): class TranslateBufferKeys: def __getitem__(this, key): form_key, attribute = self.parse_buffer_key(key) - return mapping[buffer_key(form_key=form_key, attribute=attribute, form=None)] + return mapping[ + buffer_key(form_key=form_key, attribute=attribute, form=None) + ] return TranslateBufferKeys() From daa8529cfb7ea5027d0ae8606615c575f0119519 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 4 Oct 2023 10:18:45 +0100 Subject: [PATCH 03/26] fix: rename function --- src/coffea/nanoevents/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index c43b182ad..559504c57 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -152,7 +152,7 @@ def __call__(self, form): } return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self - def create_column_mapping(self, tree, keys, start, stop, interp_options): + def load_buffers(self, tree, keys, start, stop, interp_options): from functools import partial from coffea.nanoevents.util import tuple_to_key From 66c8710c3ea1630a519a2de6eabd3af6327329d3 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 4 Oct 2023 11:17:43 +0100 Subject: [PATCH 04/26] fix: use report_necessary_buffers --- src/coffea/processor/executor.py | 6 +++--- tests/test_jetmet_tools.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coffea/processor/executor.py b/src/coffea/processor/executor.py index 618b1c741..9698fa296 100644 --- a/src/coffea/processor/executor.py +++ b/src/coffea/processor/executor.py @@ -1718,7 +1718,7 @@ def _work_function( import dask_awkward to_compute = processor_instance.process(events) - materialized = dask_awkward.necessary_columns(to_compute) + # materialized = dask_awkward.report_necessary_buffers(to_compute) out = dask.compute(to_compute, scheduler="single-threaded")[0] except Exception as e: raise Exception(f"Failed processing file: {item!r}") from e @@ -1734,11 +1734,11 @@ def _work_function( metrics = {} if isinstance(file, uproot.ReadOnlyDirectory): metrics["bytesread"] = file.file.source.num_requested_bytes + # metrics["data_and_shape_buffers"] = set(materialized) + # metrics["shape_only_buffers"] = set(materialized) if schema is not None and issubclass(schema, schemas.BaseSchema): - metrics["columns"] = set(materialized) metrics["entries"] = len(events) else: - metrics["columns"] = set(materialized) metrics["entries"] = events.size metrics["processtime"] = toc - tic return {"out": out, "metrics": metrics, "processed": {item}} diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index a7ef91385..b1375afa2 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -837,9 +837,9 @@ def test_corrected_jets_factory(optimization_enabled): **{name: evaluator[name] for name in jec_stack_names[5:6]} ) - print(dak.necessary_columns(jets.eta)) + print(dak.report_necessary_buffers(jets.eta)) print( - dak.necessary_columns( + dak.report_necessary_buffers( resosf.getScaleFactor( JetEta=jets.eta, ) From 2353a2306aae1ab57e72bb181b37d6e6f03f5e7d Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 4 Oct 2023 12:19:57 +0100 Subject: [PATCH 05/26] fix: properly parse form keys --- src/coffea/nanoevents/factory.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 9b25a6c6a..24b31feed 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -81,11 +81,16 @@ def keys_for_buffer_keys(self, buffer_keys): base_columns = set() for buffer_key in buffer_keys: form_key, attribute = self.parse_buffer_key(buffer_key) + operands = urllib.parse.unquote(form_key).split(",") + + it_operands = iter(operands) + next(it_operands) + base_columns.update( [ - acolumn - for acolumn in urllib.parse.unquote(form_key).split(",") - if not acolumn.startswith("!") + name + for name, maybe_transform in zip(operands, it_operands) + if maybe_transform == "!load" ] ) return base_columns From bd07d03fada9a61b1de1db726d560a917525aa44 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Oct 2023 14:05:25 +0100 Subject: [PATCH 06/26] hack: convert Content to array --- src/coffea/nanoevents/mapping/base.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coffea/nanoevents/mapping/base.py b/src/coffea/nanoevents/mapping/base.py index c6a5e8e2e..f20b3bb2c 100644 --- a/src/coffea/nanoevents/mapping/base.py +++ b/src/coffea/nanoevents/mapping/base.py @@ -111,14 +111,18 @@ def __getitem__(self, key): if len(stack) != 1: raise RuntimeError(f"Syntax error in form key {nodes}") out = stack.pop() - try: - out = numpy.array(out) - except ValueError: - if self._debug: - print(out) - raise RuntimeError( - f"Left with non-bare array after evaluating form key {nodes}" - ) + import awkward + if isinstance(out, awkward.contents.Content): + out = awkward.to_numpy(out) + else: + try: + out = numpy.array(out) + except ValueError: + if self._debug: + print(out) + raise RuntimeError( + f"Left with non-bare array after evaluating form key {nodes}" + ) return out @abstractmethod From a6848a0824d94be45793f5ab58505bca60754fd2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:10:21 +0000 Subject: [PATCH 07/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/mapping/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coffea/nanoevents/mapping/base.py b/src/coffea/nanoevents/mapping/base.py index f20b3bb2c..3d87b410c 100644 --- a/src/coffea/nanoevents/mapping/base.py +++ b/src/coffea/nanoevents/mapping/base.py @@ -112,6 +112,7 @@ def __getitem__(self, key): raise RuntimeError(f"Syntax error in form key {nodes}") out = stack.pop() import awkward + if isinstance(out, awkward.contents.Content): out = awkward.to_numpy(out) else: From 9c90205c576b10d3334e3dc482c6fb8c6d374fa3 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Oct 2023 22:14:05 +0100 Subject: [PATCH 08/26] fix: ensure layout nodes converted to arrays --- src/coffea/nanoevents/transforms.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/coffea/nanoevents/transforms.py b/src/coffea/nanoevents/transforms.py index e969310b2..2985f9709 100644 --- a/src/coffea/nanoevents/transforms.py +++ b/src/coffea/nanoevents/transforms.py @@ -13,6 +13,15 @@ def to_layout(array): return array.layout +def ensure_array(arraylike): + if isinstance(arraylike, (awkward.contents.Content, awkward.Array)): + return awkward.to_numpy(arraylike) + elif isinstance(arraylike, awkward.index.Index): + return arraylike.data + else: + return numpy.asarray(arraylike) + + def data(stack): """Extract content from array (currently a noop, can probably take place of !content) @@ -96,7 +105,7 @@ def counts2offsets(stack): Signature: counts,!counts2offsets Outputs an array with length one larger than input """ - counts = numpy.array(stack.pop()) + counts = ensure_array(stack.pop()) offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64) offsets[0] = 0 numpy.cumsum(counts, out=offsets[1:]) @@ -123,11 +132,11 @@ def local2global(stack): Signature: index,target_offsets,!local2global Outputs a content array with same shape as index content """ - target_offsets = numpy.asarray(stack.pop()) + target_offsets = ensure_array(stack.pop()) index = stack.pop() index = index.mask[index >= 0] + target_offsets[:-1] index = index.mask[index < target_offsets[1:]] - out = numpy.array(awkward.flatten(awkward.fill_none(index, -1), axis=None)) + out = ensure_array(awkward.flatten(awkward.fill_none(index, -1), axis=None)) if out.dtype != numpy.int64: raise RuntimeError stack.append(out) From 04b5a1a235a14ed80500054b59e7b921aefc335e Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 09:13:07 -0500 Subject: [PATCH 09/26] adjust coffea pins to latest releases and pre-releases --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f5b168c1e..177cd9926 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,11 +37,11 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.4.2", - "uproot>=5.0.10", + "awkward>=2.4.5", + "uproot>=5.1.0rc1", "dask[array]>=2023.4.0", - "dask-awkward>=2023.7.1,!=2023.8.0", - "dask-histogram>=2023.6.0", + "dask-awkward>=2023.10a1,!=2023.8.0", + "dask-histogram>=2023.7a0", "correctionlib>=2.0.0", "pyarrow>=6.0.0", "fsspec", From f19c11b17e8bdbcc6024f7456f515b96d9fc085c Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 09:36:27 -0500 Subject: [PATCH 10/26] use pytorch-only triton image --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe8453b8d..234ac2e21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -98,7 +98,7 @@ jobs: - name: Start triton server with example model if: matrix.os == 'ubuntu-latest' run: | - docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models + docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-pyt-python-py3 tritonserver --model-repository=/models - name: Test with pytest run: | From 7051d2e40a8655b7d9aa86359a014d50bc9dd1a1 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 09:40:30 -0500 Subject: [PATCH 11/26] streamline version requirements Co-authored-by: Angus Hollands --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 177cd9926..36fada2d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "awkward>=2.4.5", "uproot>=5.1.0rc1", "dask[array]>=2023.4.0", - "dask-awkward>=2023.10a1,!=2023.8.0", + "dask-awkward>=2023.10a1", "dask-histogram>=2023.7a0", "correctionlib>=2.0.0", "pyarrow>=6.0.0", From 33d2e681301c1c37b257dbbfe6d50ef7cf56c47a Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Sun, 8 Oct 2023 12:19:48 +0100 Subject: [PATCH 12/26] fix: don't import protocol --- src/coffea/nanoevents/factory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 40d1bda53..8361eaaa2 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -11,7 +11,6 @@ import dask_awkward import fsspec import uproot -from dask_awkward import ImplementsFormTransformation from coffea.nanoevents.mapping import ( CachedMapping, @@ -68,7 +67,7 @@ def _key_formatter(prefix, form_key, form, attribute): return prefix + f"/{attribute}/{form_key}" -class _map_schema_base(ImplementsFormTransformation): +class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo def __init__( self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None ): From 9d94cb0b8d50e89d6e79ff3a4be3ba88602dc0e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Oct 2023 11:20:09 +0000 Subject: [PATCH 13/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 8361eaaa2..9b2557ac6 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -67,7 +67,7 @@ def _key_formatter(prefix, form_key, form, attribute): return prefix + f"/{attribute}/{form_key}" -class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo +class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo def __init__( self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None ): From 0d9c913c1be461d669b8313208f5ee52764a72a5 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 11 Oct 2023 02:17:32 -0500 Subject: [PATCH 14/26] remove deprecated interface definition --- src/coffea/nanoevents/factory.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 9b2557ac6..d82b434e4 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -110,19 +110,6 @@ def _key_formatter(self, prefix, form_key, form, attribute): form_key += "%2C%21offsets" return prefix + f"/{attribute}/{form_key}" - # TODO: deprecate - def extract_form_keys_base_columns(self, form_keys): - base_columns = [] - for form_key in form_keys: - base_columns.extend( - [ - acolumn - for acolumn in urllib.parse.unquote(form_key).split(",") - if not acolumn.startswith("!") - ] - ) - return list(set(base_columns)) - class _map_schema_uproot(_map_schema_base): def __init__( From bb4df59dc10f8aabcb979b51e18dba661aa5df9c Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 11 Oct 2023 08:28:51 +0100 Subject: [PATCH 15/26] Update tests/test_jetmet_tools.py --- tests/test_jetmet_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index b1375afa2..aace9b1bf 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -837,7 +837,7 @@ def test_corrected_jets_factory(optimization_enabled): **{name: evaluator[name] for name in jec_stack_names[5:6]} ) - print(dak.report_necessary_buffers(jets.eta)) + print(dak.report_necessary_columns(jets.eta)) print( dak.report_necessary_buffers( resosf.getScaleFactor( From e0694ad6c112b5a15a2aa2b976daced0d3a4da44 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 11 Oct 2023 08:29:04 +0100 Subject: [PATCH 16/26] Update tests/test_jetmet_tools.py --- tests/test_jetmet_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index aace9b1bf..8be3a97f3 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -839,7 +839,7 @@ def test_corrected_jets_factory(optimization_enabled): print(dak.report_necessary_columns(jets.eta)) print( - dak.report_necessary_buffers( + dak.report_necessary_columns( resosf.getScaleFactor( JetEta=jets.eta, ) From e7384f995853733faaef19514632399a109e5064 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 11 Oct 2023 03:00:13 -0500 Subject: [PATCH 17/26] remove further remnants of old remapping interface --- src/coffea/nanoevents/factory.py | 48 -------------------------------- 1 file changed, 48 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index d82b434e4..ee9e75d0b 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -177,29 +177,6 @@ def __getitem__(this, key): return TranslateBufferKeys() - def create_column_mapping_and_key(self, tree, start, stop, interp_options): - from functools import partial - - from coffea.nanoevents.util import tuple_to_key - - partition_key = ( - str(tree.file.uuid), - tree.object_path, - f"{start}-{stop}", - ) - uuidpfn = {partition_key[0]: tree.file.file_path} - mapping = UprootSourceMapping( - TrivialUprootOpener(uuidpfn, interp_options), - start, - stop, - cache={}, - access_log=None, - use_ak_forth=True, - ) - mapping.preload_column_source(partition_key[0], partition_key[1], tree) - - return mapping, partial(self._key_formatter, tuple_to_key(partition_key)) - class _map_schema_parquet(_map_schema_base): def __init__( @@ -224,31 +201,6 @@ def __call__(self, form): return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form) - def create_column_mapping_and_key(self, columns, start, stop, interp_options): - from functools import partial - - from coffea.nanoevents.util import tuple_to_key - - uuid = "NO_UUID" - obj_path = "NO_OBJECT_PATH" - - partition_key = ( - str(uuid), - obj_path, - f"{start}-{stop}", - ) - uuidpfn = {uuid: columns} - mapping = PreloadedSourceMapping( - PreloadedOpener(uuidpfn), - start, - stop, - cache={}, - access_log=None, - ) - mapping.preload_column_source(partition_key[0], partition_key[1], columns) - - return mapping, partial(self._key_formatter, tuple_to_key(partition_key)) - class NanoEventsFactory: """A factory class to build NanoEvents objects""" From 92efdb20012675be136f18f17ed3cc0ed3e92044 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 11 Oct 2023 13:48:16 +0100 Subject: [PATCH 18/26] refactor: make key translation obvious --- src/coffea/nanoevents/factory.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index ee9e75d0b..9bed55ed2 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -111,6 +111,15 @@ def _key_formatter(self, prefix, form_key, form, attribute): return prefix + f"/{attribute}/{form_key}" +class _TranslatedMapping: + def __init__(self, func, mapping): + self._func = func + self._mapping = mapping + + def __getitem__(self, index): + return self._mapping[self._func(index)] + + class _map_schema_uproot(_map_schema_base): def __init__( self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None @@ -168,14 +177,15 @@ def load_buffers(self, tree, keys, start, stop, interp_options): mapping.preload_column_source(partition_key[0], partition_key[1], tree) buffer_key = partial(self._key_formatter, tuple_to_key(partition_key)) - class TranslateBufferKeys: - def __getitem__(this, key): - form_key, attribute = self.parse_buffer_key(key) - return mapping[ - buffer_key(form_key=form_key, attribute=attribute, form=None) - ] + # The buffer-keys that dask-awkward knows about will not include the + # partition key. Therefore, we must translate the keys here. + def translate_key(index): + form_key, attribute = self.parse_buffer_key(index) + return mapping[ + buffer_key(form_key=form_key, attribute=attribute, form=None) + ] - return TranslateBufferKeys() + return _TranslatedMapping(translate_key, mapping) class _map_schema_parquet(_map_schema_base): From 8ae3cd5660bbf1acb6c2e7dd30683a1eda84bfbb Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Thu, 12 Oct 2023 02:35:06 -0500 Subject: [PATCH 19/26] fix typo from refactor --- src/coffea/nanoevents/factory.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 9bed55ed2..123f6a131 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -181,9 +181,7 @@ def load_buffers(self, tree, keys, start, stop, interp_options): # partition key. Therefore, we must translate the keys here. def translate_key(index): form_key, attribute = self.parse_buffer_key(index) - return mapping[ - buffer_key(form_key=form_key, attribute=attribute, form=None) - ] + return buffer_key(form_key=form_key, attribute=attribute, form=None) return _TranslatedMapping(translate_key, mapping) From 45a006004349ce5800e3c7221623004f2b4bf4f3 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 10:22:42 -0500 Subject: [PATCH 20/26] update pins (note uncapped numpy and numba skooch) --- pyproject.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 36fada2d0..a12f82a74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,17 +37,17 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.4.5", - "uproot>=5.1.0rc1", + "awkward>=2.4.6", + "uproot>=5.1.1", "dask[array]>=2023.4.0", - "dask-awkward>=2023.10a1", - "dask-histogram>=2023.7a0", - "correctionlib>=2.0.0", + "dask-awkward>=2023.10.0", + "dask-histogram>=2023.10.0", + "correctionlib>=2.3.3", "pyarrow>=6.0.0", "fsspec", "matplotlib>=3", - "numba>=0.57.0", - "numpy>=1.22.0,<1.25", # < 1.25 for numba 0.57 series + "numba>=0.58.0", + "numpy>=1.22.0", "scipy>=1.1.0", "tqdm>=4.27.0", "lz4", From 14d2cc2ed36fe07a2cacdef791574d6fffcfb65f Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 11:02:51 -0500 Subject: [PATCH 21/26] try to convince pip to upgrade numpy upon installing coffea --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 234ac2e21..16aed3abc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ jobs: python -m pip install xgboost python -m pip install tritonclient[grpc,http] # install checked out coffea - python -m pip install -q -e '.[dev,parsl,dask,spark]' + python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade python -m pip list java -version - name: Install dependencies (MacOS) @@ -80,7 +80,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask,spark]' + python -m pip install -q -e '.[dev,dask,spark]' --upgrade python -m pip list java -version - name: Install dependencies (Windows) @@ -91,7 +91,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask]' + python -m pip install -q -e '.[dev,dask]' --upgrade python -m pip list java -version From ab3599e5ef408788068da6b2d8dd82cb86b57ce7 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 11:11:49 -0500 Subject: [PATCH 22/26] be more insistent --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16aed3abc..ea61615a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ jobs: python -m pip install xgboost python -m pip install tritonclient[grpc,http] # install checked out coffea - python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade + python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade --upgrade-strategy eager python -m pip list java -version - name: Install dependencies (MacOS) @@ -80,7 +80,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask,spark]' --upgrade + python -m pip install -q -e '.[dev,dask,spark]' --upgrade --upgrade-strategy eager python -m pip list java -version - name: Install dependencies (Windows) @@ -91,7 +91,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask]' --upgrade + python -m pip install -q -e '.[dev,dask]' --upgrade --upgrade-strategy eager python -m pip list java -version From faff41ec1bdbd33ac7ca2d1d8ddda3ac11a3f427 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 11:36:56 -0500 Subject: [PATCH 23/26] numba 0.58 pins numpy from above < 1.26 nb: safer to repin in coffea for users because of numba's sliding window, very easy to get a mismatch --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a12f82a74..689e03ff0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "fsspec", "matplotlib>=3", "numba>=0.58.0", - "numpy>=1.22.0", + "numpy>=1.22.0,<1.26", # < 1.26 for numba 0.58 series "scipy>=1.1.0", "tqdm>=4.27.0", "lz4", From 864f7094e92fe11792dba736f093f8f3115b3ac0 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Tue, 17 Oct 2023 10:22:52 -0500 Subject: [PATCH 24/26] clean up usage of quoted ",!offsets" --- src/coffea/nanoevents/factory.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 123f6a131..e97f556f0 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -29,8 +29,9 @@ PHYSLITESchema, TreeMakerSchema, ) -from coffea.nanoevents.util import key_to_tuple, tuple_to_key +from coffea.nanoevents.util import quote, unquote, key_to_tuple, tuple_to_key +_offsets_label = quote(",!offsets") def _remove_not_interpretable(branch): if isinstance( @@ -63,7 +64,7 @@ def _remove_not_interpretable(branch): def _key_formatter(prefix, form_key, form, attribute): if attribute == "offsets": - form_key += "%2C%21offsets" + form_key += _offsets_label return prefix + f"/{attribute}/{form_key}" @@ -80,7 +81,7 @@ def keys_for_buffer_keys(self, buffer_keys): base_columns = set() for buffer_key in buffer_keys: form_key, attribute = self.parse_buffer_key(buffer_key) - operands = urllib.parse.unquote(form_key).split(",") + operands = unquote(form_key).split(",") it_operands = iter(operands) next(it_operands) @@ -97,7 +98,7 @@ def keys_for_buffer_keys(self, buffer_keys): def parse_buffer_key(self, buffer_key): prefix, attribute, form_key = buffer_key.rsplit("/", maxsplit=2) if attribute == "offsets": - return (form_key[: -len("%2C%21offsets")], attribute) + return (form_key[: -len(_offsets_label)], attribute) else: return (form_key, attribute) @@ -107,7 +108,7 @@ def buffer_key(self): def _key_formatter(self, prefix, form_key, form, attribute): if attribute == "offsets": - form_key += "%2C%21offsets" + form_key += _offsets_label return prefix + f"/{attribute}/{form_key}" From 9b96f7b8d3d1613e0ed22e111cf5c0221f03a024 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 15:23:12 +0000 Subject: [PATCH 25/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index e97f556f0..66f3cb482 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -29,10 +29,11 @@ PHYSLITESchema, TreeMakerSchema, ) -from coffea.nanoevents.util import quote, unquote, key_to_tuple, tuple_to_key +from coffea.nanoevents.util import key_to_tuple, quote, tuple_to_key, unquote _offsets_label = quote(",!offsets") + def _remove_not_interpretable(branch): if isinstance( branch.interpretation, uproot.interpretation.identify.uproot.AsGrouped From 0a525d0340aa0afc240cd313cd0067578c970556 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Tue, 17 Oct 2023 10:24:32 -0500 Subject: [PATCH 26/26] flake8 lint --- src/coffea/nanoevents/factory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 66f3cb482..b6656282f 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -1,6 +1,5 @@ import io import pathlib -import urllib.parse import warnings import weakref from functools import partial