Skip to content

Commit

Permalink
Fix seed for random test data. (#9844)
Browse files Browse the repository at this point in the history
* Fix seed for random test data.

Also switch to using default_rng instead of RandomState.

* Fixes

* one more fix.

* more fixes

* last one?

* one more
  • Loading branch information
dcherian authored Dec 3, 2024
1 parent 05f24f7 commit 7445012
Show file tree
Hide file tree
Showing 16 changed files with 63 additions and 59 deletions.
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ def requires_sparse():


def randn(shape, frac_nan=None, chunks=None, seed=0):
rng = np.random.RandomState(seed)
rng = np.random.default_rng(seed)
if chunks is None:
x = rng.standard_normal(shape)
else:
import dask.array as da

rng = da.random.RandomState(seed)
rng = da.random.default_rng(seed)
x = rng.standard_normal(shape, chunks=chunks)

if frac_nan is not None:
Expand All @@ -47,7 +47,7 @@ def randn(shape, frac_nan=None, chunks=None, seed=0):


def randint(low, high=None, size=None, frac_minus=None, seed=0):
rng = np.random.RandomState(seed)
rng = np.random.default_rng(seed)
x = rng.randint(low, high, size)
if frac_minus is not None:
inds = rng.choice(range(x.size), int(x.size * frac_minus))
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/reindexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class Reindex:
def setup(self):
data = np.random.RandomState(0).randn(ntime, nx, ny)
data = np.random.default_rng(0).random((ntime, nx, ny))
self.ds = xr.Dataset(
{"temperature": (("time", "x", "y"), data)},
coords={"time": np.arange(ntime), "x": np.arange(nx), "y": np.arange(ny)},
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/unstacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class Unstacking:
def setup(self):
data = np.random.RandomState(0).randn(250, 500)
data = np.random.default_rng(0).random((250, 500))
self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...])
self.da_missing = self.da_full[:-1]
self.df_missing = self.da_missing.to_pandas()
Expand Down
3 changes: 2 additions & 1 deletion doc/user-guide/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ numpy) over all array values:
.. ipython:: python
arr = xr.DataArray(
np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]
np.random.default_rng(0).random((2, 3)),
[("x", ["a", "b"]), ("y", [10, 20, 30])],
)
arr - 3
abs(arr)
Expand Down
2 changes: 1 addition & 1 deletion doc/user-guide/dask.rst
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ work as a streaming operation, when run on arrays loaded from disk:
.. ipython::
:verbatim:

In [56]: rs = np.random.RandomState(0)
In [56]: rs = np.random.default_rng(0)

In [57]: array1 = xr.DataArray(rs.randn(1000, 100000), dims=["place", "time"]) # 800MB

Expand Down
2 changes: 1 addition & 1 deletion doc/user-guide/pandas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ Let's take a look:

.. ipython:: python
data = np.random.RandomState(0).rand(2, 3, 4)
data = np.random.default_rng(0).rand(2, 3, 4)
items = list("ab")
major_axis = list("mno")
minor_axis = pd.date_range(start="2000", periods=4, name="date")
Expand Down
8 changes: 4 additions & 4 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,12 +298,12 @@ def assert_allclose(a, b, check_default_indexes=True, **kwargs):


def create_test_data(
seed: int | None = None,
seed: int = 12345,
add_attrs: bool = True,
dim_sizes: tuple[int, int, int] = _DEFAULT_TEST_DIM_SIZES,
use_extension_array: bool = False,
) -> Dataset:
rs = np.random.RandomState(seed)
rs = np.random.default_rng(seed)
_vars = {
"var1": ["dim1", "dim2"],
"var2": ["dim1", "dim2"],
Expand All @@ -329,15 +329,15 @@ def create_test_data(
"dim1",
pd.Categorical(
rs.choice(
list(string.ascii_lowercase[: rs.randint(1, 5)]),
list(string.ascii_lowercase[: rs.integers(1, 5)]),
size=dim_sizes[0],
)
),
)
if dim_sizes == _DEFAULT_TEST_DIM_SIZES:
numbers_values = np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64")
else:
numbers_values = rs.randint(0, 3, _dims["dim3"], dtype="int64")
numbers_values = rs.integers(0, 3, _dims["dim3"], dtype="int64")
obj.coords["numbers"] = ("dim3", numbers_values)
obj.encoding = {"foo": "bar"}
assert_writeable(obj)
Expand Down
9 changes: 6 additions & 3 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2868,8 +2868,11 @@ def test_append_with_new_variable(self) -> None:

# check append mode for new variable
with self.create_zarr_target() as store_target:
xr.concat([ds, ds_to_append], dim="time").to_zarr(
store_target, mode="w", **self.version_kwargs
combined = xr.concat([ds, ds_to_append], dim="time")
combined.to_zarr(store_target, mode="w", **self.version_kwargs)
assert_identical(
combined,
xr.open_dataset(store_target, engine="zarr", **self.version_kwargs),
)
ds_with_new_var.to_zarr(store_target, mode="a", **self.version_kwargs)
combined = xr.concat([ds, ds_to_append], dim="time")
Expand Down Expand Up @@ -6494,7 +6497,7 @@ def test_zarr_safe_chunk_region(tmp_path):
arr.isel(a=slice(5, -1)).chunk(a=5).to_zarr(store, region="auto", mode="r+")

# Test if the code is detecting the last chunk correctly
data = np.random.RandomState(0).randn(2920, 25, 53)
data = np.random.default_rng(0).random((2920, 25, 53))
ds = xr.Dataset({"temperature": (("time", "lat", "lon"), data)})
chunks = {"time": 1000, "lat": 25, "lon": 53}
ds.chunk(chunks).to_zarr(store, compute=False, mode="w")
Expand Down
6 changes: 3 additions & 3 deletions xarray/tests/test_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,9 +1293,9 @@ def covariance(x, y):
(x - x.mean(axis=-1, keepdims=True)) * (y - y.mean(axis=-1, keepdims=True))
).mean(axis=-1)

rs = np.random.RandomState(42)
array1 = da.from_array(rs.randn(4, 4), chunks=(2, 4))
array2 = da.from_array(rs.randn(4, 4), chunks=(2, 4))
rs = np.random.default_rng(42)
array1 = da.from_array(rs.random((4, 4)), chunks=(2, 4))
array2 = da.from_array(rs.random((4, 4)), chunks=(2, 4))
data_array_1 = xr.DataArray(array1, dims=("x", "z"))
data_array_2 = xr.DataArray(array2, dims=("y", "z"))

Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@


def test_raise_if_dask_computes():
data = da.from_array(np.random.RandomState(0).randn(4, 6), chunks=(2, 2))
data = da.from_array(np.random.default_rng(0).random((4, 6)), chunks=(2, 2))
with pytest.raises(RuntimeError, match=r"Too many computes"):
with raise_if_dask_computes():
data.compute()
Expand Down Expand Up @@ -77,7 +77,7 @@ def assertLazyAndAllClose(self, expected, actual):

@pytest.fixture(autouse=True)
def setUp(self):
self.values = np.random.RandomState(0).randn(4, 6)
self.values = np.random.default_rng(0).random((4, 6))
self.data = da.from_array(self.values, chunks=(2, 2))

self.eager_var = Variable(("x", "y"), self.values)
Expand Down
8 changes: 4 additions & 4 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2284,7 +2284,7 @@ class NdArraySubclass(np.ndarray):
assert isinstance(converted_subok.data, NdArraySubclass)

def test_is_null(self) -> None:
x = np.random.RandomState(42).randn(5, 6)
x = np.random.default_rng(42).random((5, 6))
x[x < 0] = np.nan
original = DataArray(x, [-np.arange(5), np.arange(6)], ["x", "y"])
expected = DataArray(pd.isnull(x), [-np.arange(5), np.arange(6)], ["x", "y"])
Expand Down Expand Up @@ -3528,7 +3528,7 @@ def test_from_multiindex_series_sparse(self) -> None:

idx = pd.MultiIndex.from_product([np.arange(3), np.arange(5)], names=["a", "b"])
series: pd.Series = pd.Series(
np.random.RandomState(0).random(len(idx)), index=idx
np.random.default_rng(0).random(len(idx)), index=idx
).sample(n=5, random_state=3)

dense = DataArray.from_series(series, sparse=False)
Expand Down Expand Up @@ -3703,8 +3703,8 @@ def test_to_dict_with_numpy_attrs(self) -> None:
assert expected_attrs == actual["attrs"]

def test_to_masked_array(self) -> None:
rs = np.random.RandomState(44)
x = rs.random_sample(size=(10, 20))
rs = np.random.default_rng(44)
x = rs.random(size=(10, 20))
x_masked = np.ma.masked_where(x < 0.5, x)
da = DataArray(x_masked)

Expand Down
56 changes: 28 additions & 28 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@


def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
rs = np.random.RandomState(seed)
rs = np.random.default_rng(seed)

lat = [2, 1, 0]
lon = [0, 1, 2]
Expand All @@ -126,7 +126,7 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
ds = xr.Dataset(
data_vars={
"da": xr.DataArray(
rs.rand(3, 3, nt1),
rs.random((3, 3, nt1)),
coords=[lat, lon, time1],
dims=["lat", "lon", "time"],
),
Expand All @@ -141,7 +141,7 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
ds_to_append = xr.Dataset(
data_vars={
"da": xr.DataArray(
rs.rand(3, 3, nt2),
rs.random((3, 3, nt2)),
coords=[lat, lon, time2],
dims=["lat", "lon", "time"],
),
Expand All @@ -156,7 +156,7 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
ds_with_new_var = xr.Dataset(
data_vars={
"new_var": xr.DataArray(
rs.rand(3, 3, nt1 + nt2),
rs.random((3, 3, nt1 + nt2)),
coords=[lat, lon, time1.append(time2)],
dims=["lat", "lon", "time"],
)
Expand Down Expand Up @@ -293,14 +293,14 @@ def test_repr(self) -> None:
numbers (dim3) int64 80B 0 1 2 0 0 1 1 2 2 3
Dimensions without coordinates: dim1
Data variables:
var1 (dim1, dim2) float64 576B -1.086 0.9973 0.283 ... 0.4684 -0.8312
var2 (dim1, dim2) float64 576B 1.162 -1.097 -2.123 ... 1.267 0.3328
var3 (dim3, dim1) float64 640B 0.5565 -0.2121 0.4563 ... -0.2452 -0.3616
var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364
var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423
var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555
Attributes:
foo: bar""".format(data["dim3"].dtype)
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)

assert expected == actual

with set_options(display_width=100):
Expand Down Expand Up @@ -7161,13 +7161,13 @@ def test_raise_no_warning_assert_close(ds) -> None:
@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize("edge_order", [1, 2])
def test_differentiate(dask, edge_order) -> None:
rs = np.random.RandomState(42)
rs = np.random.default_rng(42)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

da = xr.DataArray(
rs.randn(8, 6),
rs.random((8, 6)),
dims=["x", "y"],
coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.randn(8, 6))},
coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.random((8, 6)))},
)
if dask and has_dask:
da = da.chunk({"x": 4})
Expand Down Expand Up @@ -7210,7 +7210,7 @@ def test_differentiate(dask, edge_order) -> None:
@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_datetime(dask) -> None:
rs = np.random.RandomState(42)
rs = np.random.default_rng(42)
coord = np.array(
[
"2004-07-13",
Expand All @@ -7226,9 +7226,9 @@ def test_differentiate_datetime(dask) -> None:
)

da = xr.DataArray(
rs.randn(8, 6),
rs.random((8, 6)),
dims=["x", "y"],
coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.randn(8, 6))},
coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.random((8, 6)))},
)
if dask and has_dask:
da = da.chunk({"x": 4})
Expand Down Expand Up @@ -7260,12 +7260,12 @@ def test_differentiate_datetime(dask) -> None:
@requires_cftime
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_cftime(dask) -> None:
rs = np.random.RandomState(42)
rs = np.random.default_rng(42)
coord = xr.cftime_range("2000", periods=8, freq="2ME")

da = xr.DataArray(
rs.randn(8, 6),
coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.randn(8, 6))},
rs.random((8, 6)),
coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.random((8, 6)))},
dims=["time", "y"],
)

Expand All @@ -7289,17 +7289,17 @@ def test_differentiate_cftime(dask) -> None:

@pytest.mark.parametrize("dask", [True, False])
def test_integrate(dask) -> None:
rs = np.random.RandomState(42)
rs = np.random.default_rng(42)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

da = xr.DataArray(
rs.randn(8, 6),
rs.random((8, 6)),
dims=["x", "y"],
coords={
"x": coord,
"x2": (("x",), rs.randn(8)),
"x2": (("x",), rs.random(8)),
"z": 3,
"x2d": (("x", "y"), rs.randn(8, 6)),
"x2d": (("x", "y"), rs.random((8, 6))),
},
)
if dask and has_dask:
Expand Down Expand Up @@ -7343,17 +7343,17 @@ def test_integrate(dask) -> None:
@requires_scipy
@pytest.mark.parametrize("dask", [True, False])
def test_cumulative_integrate(dask) -> None:
rs = np.random.RandomState(43)
rs = np.random.default_rng(43)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

da = xr.DataArray(
rs.randn(8, 6),
rs.random((8, 6)),
dims=["x", "y"],
coords={
"x": coord,
"x2": (("x",), rs.randn(8)),
"x2": (("x",), rs.random(8)),
"z": 3,
"x2d": (("x", "y"), rs.randn(8, 6)),
"x2d": (("x", "y"), rs.random((8, 6))),
},
)
if dask and has_dask:
Expand Down Expand Up @@ -7406,7 +7406,7 @@ def test_cumulative_integrate(dask) -> None:
@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize("which_datetime", ["np", "cftime"])
def test_trapezoid_datetime(dask, which_datetime) -> None:
rs = np.random.RandomState(42)
rs = np.random.default_rng(42)
coord: ArrayLike
if which_datetime == "np":
coord = np.array(
Expand All @@ -7428,8 +7428,8 @@ def test_trapezoid_datetime(dask, which_datetime) -> None:
coord = xr.cftime_range("2000", periods=8, freq="2D")

da = xr.DataArray(
rs.randn(8, 6),
coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.randn(8, 6))},
rs.random((8, 6)),
coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.random((8, 6)))},
dims=["time", "y"],
)

Expand Down
8 changes: 4 additions & 4 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,16 +341,16 @@ def test_types(self, val1, val2, val3, null):

def construct_dataarray(dim_num, dtype, contains_nan, dask):
# dimnum <= 3
rng = np.random.RandomState(0)
rng = np.random.default_rng(0)
shapes = [16, 8, 4][:dim_num]
dims = ("x", "y", "z")[:dim_num]

if np.issubdtype(dtype, np.floating):
array = rng.randn(*shapes).astype(dtype)
array = rng.random(shapes).astype(dtype)
elif np.issubdtype(dtype, np.integer):
array = rng.randint(0, 10, size=shapes).astype(dtype)
array = rng.integers(0, 10, size=shapes).astype(dtype)
elif np.issubdtype(dtype, np.bool_):
array = rng.randint(0, 1, size=shapes).astype(dtype)
array = rng.integers(0, 1, size=shapes).astype(dtype)
elif dtype is str:
array = rng.choice(["a", "b", "c", "d"], size=shapes)
else:
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_formatting_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

@pytest.fixture
def dataarray() -> xr.DataArray:
return xr.DataArray(np.random.RandomState(0).randn(4, 6))
return xr.DataArray(np.random.default_rng(0).random((4, 6)))


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2879,7 +2879,7 @@ def test_multiple_groupers(use_flox: bool, shuffle: bool) -> None:
)

b = xr.DataArray(
np.random.RandomState(0).randn(2, 3, 4),
np.random.default_rng(0).random((2, 3, 4)),
coords={"xy": (("x", "y"), [["a", "b", "c"], ["b", "c", "c"]], {"foo": "bar"})},
dims=["x", "y", "z"],
)
Expand Down
Loading

0 comments on commit 7445012

Please sign in to comment.