From 3956b73a7792f41e4410349f2c40b9a9a80decd2 Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 12 Aug 2021 16:41:33 +0200 Subject: [PATCH] conditionally disable bottleneck (#5560) --- doc/whats-new.rst | 6 ++++-- xarray/core/dataset.py | 6 ++++++ xarray/core/missing.py | 14 +++++++++++++- xarray/core/nputils.py | 3 +++ xarray/core/options.py | 6 ++++++ xarray/core/rolling.py | 5 +++-- xarray/core/variable.py | 8 +++++++- xarray/tests/test_dataset.py | 6 ++++++ xarray/tests/test_missing.py | 32 ++++++++++++++++++++++++++++++++ xarray/tests/test_variable.py | 23 +++++++++++++++++++++++ 10 files changed, 103 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3dad685aaf7..9ac9639b8c1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,10 @@ v0.19.1 (unreleased) New Features ~~~~~~~~~~~~ +- Add a option to disable the use of ``bottleneck`` (:pull:`5560`) + By `Justus Magin `_. +- Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`). + By `Pushkar Kopparla `_. Breaking changes @@ -104,8 +108,6 @@ New Features - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None`` (:issue:`5510`). By `Elle Smith `_. -- Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`). - By `Pushkar Kopparla `_. - Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`). By `Tom Nicholas `_. - Units in plot labels are now automatically inferred from wrapped :py:meth:`pint.Quantity` arrays. (:pull:`5561`). diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 533ecadbae5..4bfc1ccbdf1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6228,6 +6228,12 @@ def rank(self, dim, pct=False, keep_attrs=None): ranked : Dataset Variables that do not depend on `dim` are dropped. """ + if not OPTIONS["use_bottleneck"]: + raise RuntimeError( + "rank requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) + if dim not in self.dims: raise ValueError(f"Dataset does not contain the dimension: {dim}") diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 6b5742104e4..36983a227b9 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -12,7 +12,7 @@ from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_version, is_duck_dask_array from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -405,6 +405,12 @@ def _bfill(arr, n=None, axis=-1): def ffill(arr, dim=None, limit=None): """forward fill missing values""" + if not OPTIONS["use_bottleneck"]: + raise RuntimeError( + "ffill requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) + axis = arr.get_axis_num(dim) # work around for bottleneck 178 @@ -422,6 +428,12 @@ def ffill(arr, dim=None, limit=None): def bfill(arr, dim=None, limit=None): """backfill missing values""" + if not OPTIONS["use_bottleneck"]: + raise RuntimeError( + "bfill requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) + axis = arr.get_axis_num(dim) # work around for bottleneck 178 diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 3aaed08575a..3e0f550dd30 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -4,6 +4,8 @@ import pandas as pd from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] +from .options import OPTIONS + try: import bottleneck as bn @@ -138,6 +140,7 @@ def f(values, axis=None, **kwargs): if ( _USE_BOTTLENECK + and OPTIONS["use_bottleneck"] and isinstance(values, np.ndarray) and bn_func is not None and not isinstance(axis, tuple) diff --git a/xarray/core/options.py b/xarray/core/options.py index 7104e12c29f..71358916243 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -14,6 +14,7 @@ FILE_CACHE_MAXSIZE = "file_cache_maxsize" KEEP_ATTRS = "keep_attrs" WARN_FOR_UNCLOSED_FILES = "warn_for_unclosed_files" +USE_BOTTLENECK = "use_bottleneck" OPTIONS = { @@ -31,6 +32,7 @@ FILE_CACHE_MAXSIZE: 128, KEEP_ATTRS: "default", WARN_FOR_UNCLOSED_FILES: False, + USE_BOTTLENECK: True, } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) @@ -54,6 +56,7 @@ def _positive_integer(value): FILE_CACHE_MAXSIZE: _positive_integer, KEEP_ATTRS: lambda choice: choice in [True, False, "default"], WARN_FOR_UNCLOSED_FILES: lambda value: isinstance(value, bool), + USE_BOTTLENECK: lambda choice: choice in [True, False], } @@ -122,6 +125,9 @@ class set_options: attrs, ``False`` to always discard them, or ``'default'`` to use original logic that attrs should only be kept in unambiguous circumstances. Default: ``'default'``. + - ``use_bottleneck``: allow using bottleneck. Either ``True`` to accelerate + operations using bottleneck if it is installed or ``False`` to never use it. + Default: ``True`` - ``display_style``: display style to use in jupyter for xarray objects. Default: ``'html'``. Other options are ``'text'``. - ``display_expand_attrs``: whether to expand the attributes section for diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 04052510f5d..0cac9f2b129 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -7,7 +7,7 @@ from . import dtypes, duck_array_ops, utils from .arithmetic import CoarsenArithmetic -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import is_duck_dask_array from .utils import either_dict_or_kwargs @@ -517,7 +517,8 @@ def _numpy_or_bottleneck_reduce( del kwargs["dim"] if ( - bottleneck_move_func is not None + OPTIONS["use_bottleneck"] + and bottleneck_move_func is not None and not is_duck_dask_array(self.obj.data) and len(self.dim) == 1 ): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index bd89fe97494..6b971389de7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -33,7 +33,7 @@ VectorizedIndexer, as_indexable, ) -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import ( DuckArrayModule, cupy_array_type, @@ -2052,6 +2052,12 @@ def rank(self, dim, pct=False): -------- Dataset.rank, DataArray.rank """ + if not OPTIONS["use_bottleneck"]: + raise RuntimeError( + "rank requires bottleneck to be enabled." + " Call `xr.set_options(use_bottleneck=True)` to enable it." + ) + import bottleneck as bn data = self.data diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 8e39bbdd83e..9b8b7c748f1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4972,6 +4972,12 @@ def test_rank(self): with pytest.raises(ValueError, match=r"does not contain"): x.rank("invalid_dim") + def test_rank_use_bottleneck(self): + ds = Dataset({"a": ("x", [0, np.nan, 2]), "b": ("y", [4, 6, 3, 4])}) + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + ds.rank("x") + def test_count(self): ds = Dataset({"x": ("a", [np.nan, 1]), "y": 0, "z": np.nan}) expected = Dataset({"x": 1, "y": 1, "z": 0}) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index e2dfac04222..1ebcd9ac6f7 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -392,6 +392,38 @@ def test_ffill(): assert_equal(actual, expected) +def test_ffill_use_bottleneck(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.ffill("x") + + +@requires_dask +def test_ffill_use_bottleneck_dask(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + da = da.chunk({"x": 1}) + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.ffill("x") + + +def test_bfill_use_bottleneck(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.bfill("x") + + +@requires_dask +def test_bfill_use_bottleneck_dask(): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + da = da.chunk({"x": 1}) + with xr.set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + da.bfill("x") + + @requires_bottleneck @requires_dask @pytest.mark.parametrize("method", ["ffill", "bfill"]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 487c9b34336..7f3ba9123d9 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1673,6 +1673,23 @@ def test_reduce(self): with pytest.raises(ValueError, match=r"cannot supply both"): v.mean(dim="x", axis=0) + @requires_bottleneck + def test_reduce_use_bottleneck(self, monkeypatch): + def raise_if_called(*args, **kwargs): + raise RuntimeError("should not have been called") + + import bottleneck as bn + + monkeypatch.setattr(bn, "nanmin", raise_if_called) + + v = Variable("x", [0.0, np.nan, 1.0]) + with pytest.raises(RuntimeError, match="should not have been called"): + with set_options(use_bottleneck=True): + v.min() + + with set_options(use_bottleneck=False): + v.min() + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( @@ -1720,6 +1737,12 @@ def test_rank_dask_raises(self): with pytest.raises(TypeError, match=r"arrays stored as dask"): v.rank("x") + def test_rank_use_bottleneck(self): + v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0]) + with set_options(use_bottleneck=False): + with pytest.raises(RuntimeError): + v.rank("x") + @requires_bottleneck def test_rank(self): import bottleneck as bn