diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 6587f668b4..84ed0a86d6 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -264,6 +264,8 @@ class Dataset(Element, metaclass=PipelineMeta): _vdim_reductions = {} _kdim_reductions = {} + interface: Interface + def __new__(cls, data=None, kdims=None, vdims=None, **kwargs): """ Allows casting a DynamicMap to an Element class like hv.Curve, by applying the diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index c1c7a1999c..a9b2c91b2a 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -3,6 +3,8 @@ axis or map dimension. Also supplies the Dimensioned abstract baseclass for classes that accept Dimension values. """ +from __future__ import annotations + import builtins import datetime as dt import re @@ -922,7 +924,7 @@ def dimensions(self, selection='all', label=False): if label else dim for dim in dims] - def get_dimension(self, dimension, default=None, strict=False): + def get_dimension(self, dimension, default=None, strict=False) -> Dimension | None: """Get a Dimension object by name or index. Args: diff --git a/holoviews/element/util.py b/holoviews/element/util.py index 166aeb5f3f..130be0c0c4 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -1,4 +1,7 @@ +from __future__ import annotations + import itertools +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -19,6 +22,11 @@ sort_topologically, ) +if TYPE_CHECKING: + from typing import TypeVar + + Array = TypeVar("Array", np.ndarray, pd.api.extensions.ExtensionArray) + def split_path(path): """ @@ -126,7 +134,7 @@ class categorical_aggregate2d(Operation): The grid interface types to use when constructing the gridded Dataset.""") @classmethod - def _get_coords(cls, obj): + def _get_coords(cls, obj: Dataset): """ Get the coordinates of the 2D aggregate, maintaining the correct sorting order. @@ -134,10 +142,11 @@ def _get_coords(cls, obj): xdim, ydim = obj.dimensions(label=True)[:2] xcoords = obj.dimension_values(xdim, False) ycoords = obj.dimension_values(ydim, False) + if xcoords.dtype.kind not in 'SUO': - xcoords = np.sort(xcoords) + xcoords = sort_arr(xcoords) if ycoords.dtype.kind not in 'SUO': - return xcoords, np.sort(ycoords) + return xcoords, sort_arr(ycoords) # Determine global orderings of y-values using topological sort grouped = obj.groupby(xdim, container_type=dict, @@ -149,19 +158,18 @@ def _get_coords(cls, obj): if len(vals) == 1: orderings[vals[0]] = [vals[0]] else: - for i in range(len(vals)-1): - p1, p2 = vals[i:i+2] + for p1, p2 in zip(vals[:-1], vals[1:]): orderings[p1] = [p2] if sort: if vals.dtype.kind in ('i', 'f'): sort = (np.diff(vals)>=0).all() else: - sort = np.array_equal(np.sort(vals), vals) + sort = np.array_equal(sort_arr(vals), vals) if sort or one_to_one(orderings, ycoords): - ycoords = np.sort(ycoords) + ycoords = sort_arr(ycoords) elif not is_cyclic(orderings): coords = list(itertools.chain(*sort_topologically(orderings))) - ycoords = coords if len(coords) == len(ycoords) else np.sort(ycoords) + ycoords = coords if len(coords) == len(ycoords) else sort_arr(ycoords) return np.asarray(xcoords), np.asarray(ycoords) def _aggregate_dataset(self, obj): @@ -332,3 +340,9 @@ def connect_edges(graph): end = end_ds.array(end_ds.kdims[:2]) paths.append(np.array([start[0], end[0]])) return paths + + +def sort_arr(arr: Array) -> Array: + if isinstance(arr, pd.api.extensions.ExtensionArray): + return arr[arr.argsort()] + return np.sort(arr) diff --git a/holoviews/tests/plotting/bokeh/test_barplot.py b/holoviews/tests/plotting/bokeh/test_barplot.py index de52079880..0b3c68aec1 100644 --- a/holoviews/tests/plotting/bokeh/test_barplot.py +++ b/holoviews/tests/plotting/bokeh/test_barplot.py @@ -351,6 +351,27 @@ def test_bars_not_continuous_data_list_custom_width(self): plot = bokeh_renderer.get_plot(bars) assert plot.handles["glyph"].width == 1 + def test_bars_categorical_order(self): + cells_dtype = pd.CategoricalDtype( + pd.array(["~1M", "~10M", "~100M"], dtype="string"), + ordered=True, + ) + df = pd.DataFrame(dict( + cells=cells_dtype.categories.astype(cells_dtype), + time=pd.array([2.99, 18.5, 835.2]), + function=pd.array(["read", "read", "read"]), + )) + + bars = Bars(df, ["function", "cells"], ["time"]) + plot = bokeh_renderer.get_plot(bars) + x_factors = plot.handles["x_range"].factors + + np.testing.assert_equal(x_factors, [ + ("read", "~1M"), + ("read", "~10M"), + ("read", "~100M"), + ]) + def test_bars_group(self): samples = 100