Skip to content

Commit

Permalink
TST (string dtype): un-xfail string tests specific to object dtype (p…
Browse files Browse the repository at this point in the history
…andas-dev#59433)

Co-authored-by: Joris Van den Bossche <[email protected]>
(cherry picked from commit dbeeb1f)
  • Loading branch information
jbrockmendel authored and jorisvandenbossche committed Nov 4, 2024
1 parent ce56f2e commit e5f6d1d
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 17 deletions.
11 changes: 4 additions & 7 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
NA,
ArrowDtype,
Expand Down Expand Up @@ -137,10 +135,9 @@ def test_interp_fill_functions_inplace(
assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64")


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_interpolate_cleaned_fill_method(using_copy_on_write):
# Check that "method is set to None" case works correctly
def test_interpolate_cannot_with_object_dtype(using_copy_on_write):
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
df["a"] = df["a"].astype(object)
df_orig = df.copy()

msg = "DataFrame.interpolate with object dtype"
Expand All @@ -159,9 +156,9 @@ def test_interpolate_cleaned_fill_method(using_copy_on_write):
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_interpolate_object_convert_no_op(using_copy_on_write):
def test_interpolate_object_convert_no_op(using_copy_on_write, using_infer_string):
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
df["a"] = df["a"].astype(object)
arr_a = get_array(df, "a")
msg = "DataFrame.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,10 +356,9 @@ def test_replace_empty_list(using_copy_on_write):
assert not df2._mgr._has_no_reference(0)


@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
@pytest.mark.parametrize("value", ["d", None])
def test_replace_object_list_inplace(using_copy_on_write, value):
df = DataFrame({"a": ["a", "b", "c"]})
df = DataFrame({"a": ["a", "b", "c"]}, dtype=object)
arr = get_array(df, "a")
df.replace(["c"], value, inplace=True)
if using_copy_on_write or value is None:
Expand Down
26 changes: 18 additions & 8 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1704,20 +1704,25 @@ def test_unique_complex_numbers(self, array, expected):


class TestHashTable:
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"htable, data",
[
(ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
(ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
(
ht.PyObjectHashTable,
np.array([f"foo_{i}" for i in range(1000)], dtype=object),
),
(
ht.StringHashTable,
np.array([f"foo_{i}" for i in range(1000)], dtype=object),
),
(ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
(ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
(ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
],
)
def test_hashtable_unique(self, htable, data, writable):
# output of maker has guaranteed unique elements
s = Series(data)
s = Series(data, dtype=data.dtype)
if htable == ht.Float64HashTable:
# add NaN for float column
s.loc[500] = np.nan
Expand All @@ -1744,20 +1749,25 @@ def test_hashtable_unique(self, htable, data, writable):
reconstr = result_unique[result_inverse]
tm.assert_numpy_array_equal(reconstr, s_duplicated.values)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"htable, data",
[
(ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
(ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
(
ht.PyObjectHashTable,
np.array([f"foo_{i}" for i in range(1000)], dtype=object),
),
(
ht.StringHashTable,
np.array([f"foo_{i}" for i in range(1000)], dtype=object),
),
(ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
(ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
(ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
],
)
def test_hashtable_factorize(self, htable, writable, data):
# output of maker has guaranteed unique elements
s = Series(data)
s = Series(data, dtype=data.dtype)
if htable == ht.Float64HashTable:
# add NaN for float column
s.loc[500] = np.nan
Expand Down

0 comments on commit e5f6d1d

Please sign in to comment.