Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Jan 9, 2025
1 parent be29afd commit d677414
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
16 changes: 10 additions & 6 deletions dask/dataframe/dask_expr/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ def is_scalar(x):
return not isinstance(x, Expr)


def _columns_equal(left_columns, right_columns):
# Checks if left_columns and right_columns are equal.
# It is possible that one of the arguments is a
# numpy array or a numpy scalar. Therefore, we
# cannot always rely on the == operator.
if is_scalar(left_columns) == is_scalar(right_columns):
return _convert_to_list(left_columns) == _convert_to_list(right_columns)
return False


def _tokenize_deterministic(*args, **kwargs) -> str:
# Utility to be strict about deterministic tokens
return tokenize(*args, ensure_deterministic=True, **kwargs)
Expand Down Expand Up @@ -226,9 +236,3 @@ def _is_any_real_numeric_dtype(arr_or_dtype):
def get_specified_shuffle(shuffle_method):
# Take the config shuffle if given, otherwise defer evaluation until optimize
return shuffle_method or config.get("dataframe.shuffle.method", None)


def _columns_equal(left, right):
if is_scalar(left) == is_scalar(right):
return _convert_to_list(left) == _convert_to_list(right)
return False
14 changes: 14 additions & 0 deletions dask/dataframe/dask_expr/io/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,20 @@ def test_from_dask_array_scalar_columns(columns):
assert_eq(df, pdf)


def test_from_dask_array_projection():
rng = np.random.default_rng()
arr_np = rng.random((100, 10))
arr = da.from_array(arr_np, chunks=(50, 10))
pdf = pd.DataFrame(arr_np)
df = from_dask_array(arr)
# Project possible np.int64(0) argument
dd.assert_eq(pdf[pdf.columns[0]], df[df.columns[0]])
# Project possible Index([0, 1], dtype='int64') argument
dd.assert_eq(pdf[pdf.columns[0:2]], df[df.columns[0:2]])
# Project list argument
dd.assert_eq(pdf[list(pdf.columns[0:2])], df[list(df.columns[0:2])])


def test_from_dict():
data = {"a": [1, 2, 3, 4], "B": [10, 11, 12, 13]}
result = from_dict(data, npartitions=2)
Expand Down

0 comments on commit d677414

Please sign in to comment.