Skip to content

Commit

Permalink
Refactor the initialization of dask.DataFrame (#853)
Browse files Browse the repository at this point in the history
* MNT: refactor the initialization of dask.DataFrame

* MNT: add changelog entry

* ENH: pass columns to dask.dataframe.from_map

* Pass 'columns' to _get_partition, not to from_map

---------

Co-authored-by: Dan Allan <[email protected]>
  • Loading branch information
genematx and danielballan authored Jan 21, 2025
1 parent c5a01ee commit e42e722
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Write the date in place of the "Unreleased" in the case a new version is release

- Addressed DeprecationWarnings from Python and dependencies
- Update AccessPolicy Docs to match new filter arguments
- Refactored intialization of dask DataFrame

## v0.1.0-b13 (2024-01-09)

Expand Down
21 changes: 9 additions & 12 deletions tiled/client/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import functools
from urllib.parse import parse_qs, urlparse

import dask
import dask.dataframe.core
import dask.dataframe
import httpx

from ..serialization.table import deserialize_arrow, serialize_arrow
Expand Down Expand Up @@ -162,23 +163,19 @@ def read(self, columns=None):
structure = self.structure()
# Build a client-side dask dataframe whose partitions pull from a
# server-side dask array.
name = f"remote-dask-dataframe-{self.item['links']['self']}"
dask_tasks = {
(name, partition): (self._get_partition, partition, columns)
for partition in range(structure.npartitions)
}
label = f"remote-dask-dataframe-{self.item['links']['self']}"
meta = structure.meta

if columns is not None:
meta = meta[columns]
ddf = dask.dataframe.core.DataFrame(
dask_tasks,
name=name,

ddf = dask.dataframe.from_map(
functools.partial(self._get_partition, columns=columns),
range(structure.npartitions),
meta=meta,
label=label,
divisions=(None,) * (1 + structure.npartitions),
)
if columns is not None:
ddf = ddf[columns]

return ddf

# We implement *some* of the Mapping interface here but intentionally not
Expand Down

0 comments on commit e42e722

Please sign in to comment.