Skip to content

Commit

Permalink
Merge branch 'main' into adjust-parquet-xfails
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Jan 29, 2024
2 parents f7d5e2f + 3aa1f4e commit da04e43
Show file tree
Hide file tree
Showing 37 changed files with 1,119 additions and 892 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- name: mamba list
run: mamba list

- uses: actions/cache@v3
- uses: actions/cache@v4
id: cache
with:
# Suffix is depending on the backend / OS. Let's be agnostic here
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ dependencies:
- pytest-rerunfailures
- pytest-timeout
- pytest-xdist
- moto
- moto<5
# Optional dependencies
- mimesis
- mimesis<13.1.0 # https://github.com/dask/dask/issues/10858
- numpy=1.23
- pandas=1.5
- flask
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/environment-3.11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ dependencies:
- pytest-rerunfailures
- pytest-timeout
- pytest-xdist
- moto
- moto<5
# Optional dependencies
- mimesis
- mimesis<13.1.0 # https://github.com/dask/dask/issues/10858
- numpy
- pandas
- flask
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/environment-3.12.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ dependencies:
- pytest-rerunfailures
- pytest-timeout
- pytest-xdist
- moto
- moto<5
# Optional dependencies
- mimesis
- mimesis<13.1.0 # https://github.com/dask/dask/issues/10858
- numpy
- pandas
- flask
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- pytest-rerunfailures
- pytest-timeout
- pytest-xdist
- moto
- moto<5
# Optional dependencies
- mimesis<12
- numpy=1.22
Expand Down
4 changes: 2 additions & 2 deletions dask/array/fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dask.array.core import asarray
from dask.array.core import concatenate as _concatenate
from dask.array.creation import arange as _arange
from dask.array.numpy_compat import _numpy_200
from dask.array.numpy_compat import NUMPY_GE_200
from dask.utils import derived_from, skip_doctest

chunk_error = (
Expand Down Expand Up @@ -167,7 +167,7 @@ def func(a, s=None, axes=None):
if s is None:
axes = tuple(range(a.ndim))
else:
if _numpy_200:
if NUMPY_GE_200:
# Match deprecation in numpy
warnings.warn(
"DeprecationWarning: `axes` should not be `None` "
Expand Down
20 changes: 9 additions & 11 deletions dask/array/numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@
from dask.utils import derived_from

_np_version = parse_version(np.__version__)
_numpy_122 = _np_version >= parse_version("1.22.0")
_numpy_123 = _np_version >= parse_version("1.23.0")
_numpy_124 = _np_version >= parse_version("1.24.0")
_numpy_125 = _np_version.release >= (1, 25, 0)
_numpy_200 = _np_version.release >= (2, 0, 0)
NUMPY_GE_122 = _np_version.release >= (1, 22)
NUMPY_GE_123 = _np_version.release >= (1, 23)
NUMPY_GE_124 = _np_version.release >= (1, 24)
NUMPY_GE_125 = _np_version.release >= (1, 25)
NUMPY_GE_200 = _np_version.release >= (2, 0)


if _numpy_200:
if NUMPY_GE_200:
from numpy.exceptions import AxisError, ComplexWarning # noqa: F401
from numpy.lib.array_utils import normalize_axis_index, normalize_axis_tuple
else:
from numpy import AxisError, ComplexWarning # noqa: F401
from numpy.core.numeric import normalize_axis_index # type: ignore[attr-defined]
from numpy.core.numeric import normalize_axis_tuple # type: ignore[attr-defined]

Expand Down Expand Up @@ -183,11 +185,7 @@ def rollaxis(a, axis, start=0):

# kwarg is renamed in numpy 1.22.0
def percentile(a, q, method="linear"):
if _numpy_122:
if NUMPY_GE_122:
return np.percentile(a, q, method=method)
else:
return np.percentile(a, q, interpolation=method)


ComplexWarning = np.exceptions.ComplexWarning if _numpy_200 else np.ComplexWarning
AxisError = np.exceptions.AxisError if _numpy_200 else np.AxisError
4 changes: 2 additions & 2 deletions dask/array/percentile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tlz import merge

from dask.array.core import Array
from dask.array.numpy_compat import _numpy_122
from dask.array.numpy_compat import NUMPY_GE_122
from dask.array.numpy_compat import percentile as np_percentile
from dask.base import tokenize
from dask.highlevelgraph import HighLevelGraph
Expand Down Expand Up @@ -126,7 +126,7 @@ def percentile(a, q, method="linear", internal_method="default", **kwargs):
internal_method = method

if "interpolation" in kwargs:
if _numpy_122:
if NUMPY_GE_122:
warnings.warn(
"In Dask 2022.1.0, the `interpolation=` argument to percentile was renamed to "
"`method= ` ",
Expand Down
4 changes: 2 additions & 2 deletions dask/array/routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
)
from dask.array.creation import arange, diag, empty, indices, tri
from dask.array.einsumfuncs import einsum # noqa
from dask.array.numpy_compat import _numpy_200
from dask.array.numpy_compat import NUMPY_GE_200
from dask.array.reductions import reduction
from dask.array.ufunc import multiply, sqrt
from dask.array.utils import (
Expand Down Expand Up @@ -1801,7 +1801,7 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
# mapping of the original values.
matches = (ar[:, None] == out["values"][None, :]).astype(np.intp)
inverse = (matches * out["inverse"]).sum(axis=1)
if _numpy_200:
if NUMPY_GE_200:
inverse = inverse.reshape(orig_shape)
result.append(inverse)
if return_counts:
Expand Down
11 changes: 7 additions & 4 deletions dask/array/slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,9 +559,14 @@ def slicing_plan(chunks, index):

if not is_arraylike(index):
index = np.asanyarray(index)
cum_chunks = cached_cumsum(chunks)

cum_chunks = asarray_safe(cum_chunks, like=index)
cum_chunks_tup = cached_cumsum(chunks)
cum_chunks = asarray_safe(cum_chunks_tup, like=index)
if cum_chunks.dtype.kind != "f": # Don't cast NaN chunks to int
# This is important when index.dtype=uint64 (or uint32 on 32-bit hosts) to
# prevent accidental automatic casting during `index - cum_chunks` below
cum_chunks = cum_chunks.astype(index.dtype)

# this dispactches to the array library
chunk_locations = np.searchsorted(cum_chunks, index, side="right")

Expand Down Expand Up @@ -634,8 +639,6 @@ def take(outname, inname, chunks, index, itemsize, axis=0):
PerformanceWarning,
stacklevel=6,
)
if not is_arraylike(index):
index = np.asarray(index)

# Check for chunks from the plan that would violate the user's
# configured chunk size.
Expand Down
22 changes: 8 additions & 14 deletions dask/array/tests/test_array_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
stack,
store,
)
from dask.array.numpy_compat import _numpy_200
from dask.array.numpy_compat import NUMPY_GE_200
from dask.array.reshape import _not_implemented_message
from dask.array.tests.test_dispatch import EncapsulateNDArray
from dask.array.utils import assert_eq, same_keys
Expand Down Expand Up @@ -888,7 +888,7 @@ def test_elemwise_on_scalars():
dy = from_array(ny, chunks=(5,))
dz = dx.sum() * dy

if _numpy_200:
if NUMPY_GE_200:
assert_eq(dz, nz)
else:
# Dask 0-d arrays do not behave like numpy scalars for type promotion
Expand Down Expand Up @@ -3112,25 +3112,19 @@ def test_slice_with_floats():
d[[1, 1.5]]


def test_slice_with_integer_types():
@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.uint32, np.uint64])
def test_slice_with_integer_types(dtype):
x = np.arange(10)
dx = da.from_array(x, chunks=5)
inds = np.array([0, 3, 6], dtype="u8")
inds = np.array([0, 3, 6], dtype=dtype)
assert_eq(dx[inds], x[inds])
assert_eq(dx[inds.astype("u4")], x[inds.astype("u4")])

inds = np.array([0, 3, 6], dtype=np.int64)
assert_eq(dx[inds], x[inds])
assert_eq(dx[inds.astype("u4")], x[inds.astype("u4")])


def test_index_with_integer_types():
@pytest.mark.parametrize("cls", [int, np.int32, np.int64, np.uint32, np.uint64])
def test_index_with_integer_types(cls):
x = np.arange(10)
dx = da.from_array(x, chunks=5)
inds = int(3)
assert_eq(dx[inds], x[inds])

inds = np.int64(3)
inds = cls(3)
assert_eq(dx[inds], x[inds])


Expand Down
4 changes: 2 additions & 2 deletions dask/array/tests/test_fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import dask.array.fft
from dask.array.core import normalize_chunks
from dask.array.fft import fft_wrap
from dask.array.numpy_compat import _numpy_200
from dask.array.numpy_compat import NUMPY_GE_200
from dask.array.utils import assert_eq, same_keys

all_1d_funcnames = ["fft", "ifft", "rfft", "irfft", "hfft", "ihfft"]
Expand Down Expand Up @@ -60,7 +60,7 @@ def test_fft2n_shapes(funcname):
da_fft(darr3, (12, 11), axes=(1, 0)), np_fft(nparr, (12, 11), axes=(1, 0))
)

if _numpy_200 and funcname.endswith("fftn"):
if NUMPY_GE_200 and funcname.endswith("fftn"):
ctx = pytest.warns(
DeprecationWarning,
match="`axes` should not be `None` if `s` is not `None`",
Expand Down
4 changes: 2 additions & 2 deletions dask/array/tests/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest

import dask.array as da
from dask.array.numpy_compat import ComplexWarning, _numpy_123
from dask.array.numpy_compat import NUMPY_GE_123, ComplexWarning
from dask.array.utils import assert_eq
from dask.base import tokenize
from dask.utils import typename
Expand Down Expand Up @@ -387,7 +387,7 @@ def test_average_weights_with_masked_array(keepdims):

da_avg = da.ma.average(d_a, weights=d_weights, axis=1, keepdims=keepdims)

if _numpy_123:
if NUMPY_GE_123:
assert_eq(da_avg, np.ma.average(a, weights=weights, axis=1, keepdims=keepdims))
elif not keepdims:
assert_eq(da_avg, np.ma.average(a, weights=weights, axis=1))
Expand Down
4 changes: 2 additions & 2 deletions dask/array/tests/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import dask.array as da
import dask.config as config
from dask.array.numpy_compat import ComplexWarning, _numpy_122
from dask.array.numpy_compat import NUMPY_GE_122, ComplexWarning
from dask.array.utils import assert_eq, same_keys
from dask.core import get_deps

Expand Down Expand Up @@ -261,7 +261,7 @@ def test_arg_reductions(dfunc, func):
assert_eq(dfunc(a, 0), func(x, 0))
assert_eq(dfunc(a, 1), func(x, 1))
assert_eq(dfunc(a, 2), func(x, 2))
if _numpy_122:
if NUMPY_GE_122:
assert_eq(dfunc(a, keepdims=True), func(x, keepdims=True))

pytest.raises(ValueError, lambda: dfunc(a, 3))
Expand Down
8 changes: 4 additions & 4 deletions dask/array/tests/test_routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
np = pytest.importorskip("numpy")

import dask.array as da
from dask.array.numpy_compat import AxisError, _numpy_123, _numpy_200
from dask.array.numpy_compat import NUMPY_GE_123, NUMPY_GE_200, AxisError
from dask.array.utils import assert_eq, same_keys


Expand Down Expand Up @@ -2351,7 +2351,7 @@ def test_result_type():
# Effect of scalars depends on their value
assert da.result_type(1, b) == np.int16
assert da.result_type(1.0, a) == np.float32
if _numpy_200:
if NUMPY_GE_200:
assert da.result_type(np.int64(1), b) == np.int64
assert da.result_type(np.ones((), np.int64), b) == np.int64
assert da.result_type(1e200, a) == np.float32
Expand Down Expand Up @@ -2594,7 +2594,7 @@ def test_average_keepdims(a):

da_avg = da.average(d_a, keepdims=True)

if _numpy_123:
if NUMPY_GE_123:
np_avg = np.average(a, keepdims=True)
assert_eq(np_avg, da_avg)

Expand All @@ -2609,7 +2609,7 @@ def test_average_weights(keepdims):

da_avg = da.average(d_a, weights=d_weights, axis=1, keepdims=keepdims)

if _numpy_123:
if NUMPY_GE_123:
assert_eq(da_avg, np.average(a, weights=weights, axis=1, keepdims=keepdims))
elif not keepdims:
assert_eq(da_avg, np.average(a, weights=weights, axis=1))
Expand Down
1 change: 0 additions & 1 deletion dask/bytes/tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ def s3_context(bucket=test_bucket_name, files=files):


@pytest.fixture()
@pytest.mark.slow
def s3_with_yellow_tripdata(s3):
"""
Fixture with sample yellowtrip CSVs loaded into S3.
Expand Down
2 changes: 1 addition & 1 deletion dask/dataframe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def _dask_expr_enabled() -> bool:
import dask_expr # noqa: F401
except ImportError:
raise ValueError("Must install dask-expr to activate query planning.")
return use_dask_expr
return use_dask_expr if use_dask_expr is not None else False


if _dask_expr_enabled():
Expand Down
8 changes: 3 additions & 5 deletions dask/dataframe/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
PKG = os.path.dirname(os.path.dirname(__file__))


def test_dataframe(query_planning="True") -> None:
import os

def test_dataframe(query_planning: bool = True) -> None:
import pytest

cmd = PKG + "/dataframe"
print(f"running: pytest {cmd}")
os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = query_planning
sys.exit(pytest.main(["-n 4"] + [cmd]))
os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = str(query_planning)
sys.exit(pytest.main(["-n 4", cmd]))


__all__ = ["test_dataframe"]
8 changes: 4 additions & 4 deletions dask/dataframe/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dask.array.dispatch import percentile_lookup
from dask.array.percentile import _percentile
from dask.backends import CreationDispatch, DaskBackendEntrypoint
from dask.dataframe._compat import is_any_real_numeric_dtype
from dask.dataframe._compat import PANDAS_GE_220, is_any_real_numeric_dtype
from dask.dataframe.core import DataFrame, Index, Scalar, Series, _Frame
from dask.dataframe.dispatch import (
categorical_dtype_dispatch,
Expand Down Expand Up @@ -623,11 +623,11 @@ def concat_pandas(
if uniform
else any(isinstance(df, pd.DataFrame) for df in dfs2)
):
if uniform:
if uniform or PANDAS_GE_220:
dfs3 = dfs2
cat_mask = dfs2[0].dtypes == "category"
else:
# When concatenating mixed dataframes and series on axis 1, Pandas
# When concatenating mixed dataframes and series on axis 1, Pandas <2.2
# converts series to dataframes with a single column named 0, then
# concatenates.
dfs3 = [
Expand All @@ -647,7 +647,7 @@ def concat_pandas(
**kwargs,
).any()

if cat_mask.any():
if isinstance(cat_mask, pd.Series) and cat_mask.any():
not_cat = cat_mask[~cat_mask].index
# this should be aligned, so no need to filter warning
out = pd.concat(
Expand Down
11 changes: 9 additions & 2 deletions dask/dataframe/io/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,15 @@ def test_parse_dates_multi_column():
"""
)

warn = FutureWarning if PANDAS_GE_220 else None
with pytest.warns(warn, match="nested"):
if PANDAS_GE_220:
with pytest.warns(FutureWarning, match="nested"):
with filetext(pdmc_text) as fn:
ddf = dd.read_csv(fn, parse_dates=[["date", "time"]])
df = pd.read_csv(fn, parse_dates=[["date", "time"]])

assert (df.columns == ddf.columns).all()
assert len(df) == len(ddf)
else:
with filetext(pdmc_text) as fn:
ddf = dd.read_csv(fn, parse_dates=[["date", "time"]])
df = pd.read_csv(fn, parse_dates=[["date", "time"]])
Expand Down
Loading

0 comments on commit da04e43

Please sign in to comment.