From 31fbc00667ce7952d3292f9bcf661c326f1fbe0e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 23 May 2024 14:30:45 -0400 Subject: [PATCH 1/8] Changes for dask-awkward one-pass optimize --- src/uproot/_dask.py | 83 +++++---------------------------------------- 1 file changed, 8 insertions(+), 75 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index c1c0100be..a1701ca25 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -889,7 +889,7 @@ def keys_for_buffer_keys(self, buffer_keys: frozenset[str]) -> frozenset[str]: keys: set[str] = set() for buffer_key in buffer_keys: # Identify form key - form_key, attribute = buffer_key.rsplit("-", maxsplit=1) + form_key, attribute = buffer_key.replace("@.", ".").rsplit("-", maxsplit=1) # Identify key from form_key keys.add(self._form_key_to_key[form_key]) return frozenset(keys) @@ -954,6 +954,7 @@ def __call__(self, form: Form) -> tuple[Form, TrivialFormMappingInfo]: class UprootReadMixin: base_form: Form expected_form: Form + behavior = {} form_mapping_info: ImplementsFormMappingInfo common_keys: frozenset[str] interp_options: dict[str, Any] @@ -1026,83 +1027,15 @@ def read_tree( assert tree.source # we must be reading something here return out, tree.source.performance_counters - def mock(self) -> AwkArray: - awkward = uproot.extras.awkward() - return awkward.typetracer.typetracer_from_form( - self.expected_form, - highlevel=True, - behavior=self.form_mapping_info.behavior, - ) - - def mock_empty(self, backend) -> AwkArray: - awkward = uproot.extras.awkward() - return awkward.to_backend( - self.expected_form.length_zero_array(highlevel=False), - backend=backend, - highlevel=True, - behavior=self.form_mapping_info.behavior, - ) - - def prepare_for_projection(self) -> tuple[AwkArray, TypeTracerReport, dict]: - awkward = uproot.extras.awkward() - dask_awkward = uproot.extras.dask_awkward() - - # A form mapping will (may) remap the base form into a new form - # The remapped form can be queried for structural information - - # Build typetracer and associated report object - meta, report = awkward.typetracer.typetracer_with_report( - self.expected_form, - highlevel=True, - behavior=self.form_mapping_info.behavior, - buffer_key=self.form_mapping_info.buffer_key, - ) - - return ( - meta, - report, - { - "trace": dask_awkward.lib.utils.trace_form_structure( - self.expected_form, - buffer_key=self.form_mapping_info.buffer_key, - ), - "form_info": self.form_mapping_info, - }, - ) - - def project(self: T, *, report: TypeTracerReport, state: dict) -> T: - keys = self.necessary_columns(report=report, state=state) - return self.project_keys(keys) - - def necessary_columns( - self, *, report: TypeTracerReport, state: dict - ) -> frozenset[str]: - ## Read from stash - # Form hierarchy information - form_key_to_parent_form_key: dict = state["trace"][ - "form_key_to_parent_form_key" - ] - # Buffer hierarchy information - form_key_to_buffer_keys: dict = state["trace"]["form_key_to_buffer_keys"] - # Restructured form information - form_info = state["form_info"] - - # Require the data of metadata buffers above shape-only requests - dask_awkward = uproot.extras.dask_awkward() - data_buffers = { - *report.data_touched, - *dask_awkward.lib.utils.buffer_keys_required_to_compute_shapes( - form_info.parse_buffer_key, - report.shape_touched, - form_key_to_parent_form_key, - form_key_to_buffer_keys, - ), - } + @property + def form(self): + return self.expected_form - # Determine which TTree keys need to be read - return form_info.keys_for_buffer_keys(data_buffers) & frozenset( + def project(self, columns) -> T: + keys = self.form_mapping_info.keys_for_buffer_keys(columns) & frozenset( self.common_keys ) + return self.project_keys(keys) def project_keys(self: T, keys: frozenset[str]) -> T: raise NotImplementedError From 47ccdaf83e69c22a4eadb865621ebef1cfc590c9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 18:31:51 +0000 Subject: [PATCH 2/8] style: pre-commit fixes --- src/uproot/_dask.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index a1701ca25..aa15c2234 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -23,7 +23,6 @@ from uproot.behaviors.TBranch import HasBranches, TBranch, _regularize_step_size if TYPE_CHECKING: - from awkward._nplikes.typetracer import TypeTracerReport from awkward.forms import Form from awkward.highlevel import Array as AwkArray @@ -889,7 +888,9 @@ def keys_for_buffer_keys(self, buffer_keys: frozenset[str]) -> frozenset[str]: keys: set[str] = set() for buffer_key in buffer_keys: # Identify form key - form_key, attribute = buffer_key.replace("@.", ".").rsplit("-", maxsplit=1) + form_key, attribute = buffer_key.replace("@.", ".").rsplit( + "-", maxsplit=1 + ) # Identify key from form_key keys.add(self._form_key_to_key[form_key]) return frozenset(keys) From cd6690b051e747c11ebabebf2d1140afacf0178b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 10 Jun 2024 10:08:31 -0400 Subject: [PATCH 3/8] fix behaviour --- src/uproot/_dask.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index a1701ca25..bcafe50f7 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -882,7 +882,7 @@ def impl(form, column_path): buffer_key: Final[str] = "{form_key}-{attribute}" def parse_buffer_key(self, buffer_key: str) -> tuple[str, str]: - form_key, attribute = buffer_key.rsplit("-", maxsplit=1) + form_key, *attribute = buffer_key.rsplit("-", maxsplit=1) return form_key, attribute def keys_for_buffer_keys(self, buffer_keys: frozenset[str]) -> frozenset[str]: @@ -954,12 +954,15 @@ def __call__(self, form: Form) -> tuple[Form, TrivialFormMappingInfo]: class UprootReadMixin: base_form: Form expected_form: Form - behavior = {} form_mapping_info: ImplementsFormMappingInfo common_keys: frozenset[str] interp_options: dict[str, Any] allow_read_errors_with_report: bool | tuple[type[BaseException], ...] + @property + def behavior(self): + return self.form_mapping_info.behavior + @property def allowed_exceptions(self): if isinstance(self.allow_read_errors_with_report, tuple): From d3d3c4b48582fb49b9ebcfe02c57a2e93090f883 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 2 Aug 2024 15:53:09 -0400 Subject: [PATCH 4/8] simplistic nano pattern for columns --- src/uproot/_dask.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 62320de92..4d1130164 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -1036,9 +1036,11 @@ def form(self): return self.expected_form def project(self, columns) -> T: - keys = self.form_mapping_info.keys_for_buffer_keys(columns) & frozenset( - self.common_keys - ) + from dask_awkward.lib.utils import _buf_to_col + keys = [_buf_to_col(c).replace(".", "_") for c in columns] + ["nJet", "nMuon", "nElectron", "nPhoton", "nTau"] + if not isinstance(self.form_mapping_info, TrivialFormMappingInfo): + roots = {_.split("_", 1)[0] for _ in keys if "_" in _} + keys.extend([f"n{_}" for _ in roots]) return self.project_keys(keys) def project_keys(self: T, keys: frozenset[str]) -> T: From 51cbbac1d000ab896ed83942e585e36240819845 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Aug 2024 19:54:21 +0000 Subject: [PATCH 5/8] style: pre-commit fixes --- src/uproot/_dask.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 4d1130164..a35f27191 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -1037,7 +1037,14 @@ def form(self): def project(self, columns) -> T: from dask_awkward.lib.utils import _buf_to_col - keys = [_buf_to_col(c).replace(".", "_") for c in columns] + ["nJet", "nMuon", "nElectron", "nPhoton", "nTau"] + + keys = [_buf_to_col(c).replace(".", "_") for c in columns] + [ + "nJet", + "nMuon", + "nElectron", + "nPhoton", + "nTau", + ] if not isinstance(self.form_mapping_info, TrivialFormMappingInfo): roots = {_.split("_", 1)[0] for _ in keys if "_" in _} keys.extend([f"n{_}" for _ in roots]) From 3146d9456d513bee0f35923df0f687ccd98fe8d9 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 2 Aug 2024 16:20:43 -0400 Subject: [PATCH 6/8] remove unnecessary --- src/uproot/_dask.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index a35f27191..96af92efe 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -1037,14 +1037,6 @@ def form(self): def project(self, columns) -> T: from dask_awkward.lib.utils import _buf_to_col - - keys = [_buf_to_col(c).replace(".", "_") for c in columns] + [ - "nJet", - "nMuon", - "nElectron", - "nPhoton", - "nTau", - ] if not isinstance(self.form_mapping_info, TrivialFormMappingInfo): roots = {_.split("_", 1)[0] for _ in keys if "_" in _} keys.extend([f"n{_}" for _ in roots]) From 4c011568d61967885d6cfdbb1e5a1fcf2647d6f8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Aug 2024 20:22:17 +0000 Subject: [PATCH 7/8] style: pre-commit fixes --- src/uproot/_dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 96af92efe..ecffe0f5d 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -1036,7 +1036,7 @@ def form(self): return self.expected_form def project(self, columns) -> T: - from dask_awkward.lib.utils import _buf_to_col + if not isinstance(self.form_mapping_info, TrivialFormMappingInfo): roots = {_.split("_", 1)[0] for _ in keys if "_" in _} keys.extend([f"n{_}" for _ in roots]) From 51c45c59a9c362bf9396ff4f385e38634e4fe1fb Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 2 Aug 2024 16:22:25 -0400 Subject: [PATCH 8/8] removed too much --- src/uproot/_dask.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 96af92efe..ea4390a32 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -1037,6 +1037,8 @@ def form(self): def project(self, columns) -> T: from dask_awkward.lib.utils import _buf_to_col + + keys = [_buf_to_col(c).replace(".", "_") for c in columns] if not isinstance(self.form_mapping_info, TrivialFormMappingInfo): roots = {_.split("_", 1)[0] for _ in keys if "_" in _} keys.extend([f"n{_}" for _ in roots])