Skip to content

Commit

Permalink
fix more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Nov 6, 2023
1 parent e55b985 commit 5a5a1d5
Show file tree
Hide file tree
Showing 22 changed files with 135 additions and 57 deletions.
7 changes: 5 additions & 2 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@

import numpy as np

from pandas._config import using_copy_on_write
from pandas._config import (
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import lib
from pandas._libs.tslibs import OutOfBoundsDatetime
Expand Down Expand Up @@ -966,7 +969,7 @@ def is_in_axis(key) -> bool:
def is_in_obj(gpr) -> bool:
if not hasattr(gpr, "name"):
return False
if using_copy_on_write():
if using_copy_on_write or warn_copy_on_write():
# For the CoW case, we check the references to determine if the
# series is part of the object
try:
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,7 @@ def test_apply_dtype(col):
tm.assert_series_equal(result, expected)


def test_apply_mutating(using_array_manager, using_copy_on_write):
def test_apply_mutating(using_array_manager, using_copy_on_write, warn_copy_on_write):
# GH#35462 case where applied func pins a new BlockManager to a row
df = DataFrame({"a": range(100), "b": range(100, 200)})
df_orig = df.copy()
Expand All @@ -1467,7 +1467,8 @@ def func(row):
expected = df.copy()
expected["a"] += 1

result = df.apply(func, axis=1)
with tm.assert_cow_warning(warn_copy_on_write):
result = df.apply(func, axis=1)

tm.assert_frame_equal(result, expected)
if using_copy_on_write or using_array_manager:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def test_setitem_frame_2d_values(self, data):
df.iloc[:] = df
tm.assert_frame_equal(df, orig)

df.iloc[:-1] = df.iloc[:-1]
df.iloc[:-1] = df.iloc[:-1].copy()
tm.assert_frame_equal(df, orig)

df.iloc[:] = df.values
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/generic/test_duplicate_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ def test_preserve_getitem(self):
assert df.loc[[0]].flags.allows_duplicate_labels is False
assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False

def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write):
if not using_copy_on_write:
def test_ndframe_getitem_caching_issue(
self, request, using_copy_on_write, warn_copy_on_write
):
if not (using_copy_on_write or warn_copy_on_write):
request.applymarker(pytest.mark.xfail(reason="Unclear behavior."))
# NDFrame.__getitem__ will cache the first df['A']. May need to
# invalidate that cache? Update the cached entries?
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def test_repr():
assert result == expected


def test_groupby_std_datetimelike():
# TODO(CoW-warn) this should NOT warn
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_groupby_std_datetimelike(warn_copy_on_write):
# GH#48481
tdi = pd.timedelta_range("1 Day", periods=10000)
ser = Series(tdi)
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/indexes/period/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@


class TestPeriodIndex:
def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write):
def test_getitem_periodindex_duplicates_string_slice(
self, using_copy_on_write, warn_copy_on_write
):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN")
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
Expand All @@ -21,7 +23,8 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write):
result = ts["2007"]
expected = ts[1:3]
tm.assert_series_equal(result, expected)
result[:] = 1
with tm.assert_cow_warning(warn_copy_on_write):
result[:] = 1
if using_copy_on_write:
tm.assert_series_equal(ts, original)
else:
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write):


@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
def test_cache_updating(using_copy_on_write):
def test_cache_updating(using_copy_on_write, warn_copy_on_write):
# 5216
# make sure that we don't try to set a dead cache
a = np.random.default_rng(2).random((10, 3))
Expand All @@ -59,7 +59,8 @@ def test_cache_updating(using_copy_on_write):
df.loc[0]["z"].iloc[0] = 1.0
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
else:
df.loc[0]["z"].iloc[0] = 1.0
with tm.assert_cow_warning(warn_copy_on_write):
df.loc[0]["z"].iloc[0] = 1.0
result = df.loc[(0, 0), "z"]
assert result == 1

Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ def test_getitem_partial_column_select(self):
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
@td.skip_array_manager_invalid_test
def test_partial_set(
self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write
self,
multiindex_year_month_day_dataframe_random_data,
using_copy_on_write,
warn_copy_on_write,
):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
Expand All @@ -137,7 +140,8 @@ def test_partial_set(
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
else:
df["A"].loc[2000, 4] = 1
with tm.assert_cow_warning(warn_copy_on_write):
df["A"].loc[2000, 4] = 1
exp.iloc[65:85, 0] = 1
tm.assert_frame_equal(df, exp)

Expand All @@ -151,7 +155,8 @@ def test_partial_set(
df["A"].iloc[14] = 5
df["A"].iloc[14] == exp["A"].iloc[14]
else:
df["A"].iloc[14] = 5
with tm.assert_cow_warning(warn_copy_on_write):
df["A"].iloc[14] = 5
assert df["A"].iloc[14] == 5

@pytest.mark.parametrize("dtype", [int, float])
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ def test_loc_getitem_tuple_plus_columns(
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
index = MultiIndex(
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
Expand Down Expand Up @@ -421,7 +422,7 @@ def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())

def test_set_column_scalar_with_loc(
self, multiindex_dataframe_random_data, using_copy_on_write
self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
frame = multiindex_dataframe_random_data
subset = frame.index[[1, 4, 5]]
Expand All @@ -431,7 +432,8 @@ def test_set_column_scalar_with_loc(

frame_original = frame.copy()
col = frame["B"]
col[subset] = 97
with tm.assert_cow_warning(warn_copy_on_write):
col[subset] = 97
if using_copy_on_write:
# chained setitem doesn't work with CoW
tm.assert_frame_equal(frame, frame_original)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexing/multiindex/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,7 @@ def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_dat
expected = s.reindex(s.index[5:])
tm.assert_series_equal(result, expected)

s = ymd["A"].copy()
exp = ymd["A"].copy()
s[5:] = 0
exp.iloc[5:] = 0
Expand Down
49 changes: 36 additions & 13 deletions pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def random_text(nobs=100):


class TestCaching:
def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
def test_slice_consolidate_invalidate_item_cache(
self, using_copy_on_write, warn_copy_on_write
):
# this is chained assignment, but will 'work'
with option_context("chained_assignment", None):
# #3970
Expand All @@ -49,7 +51,9 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.17
else:
df["bb"].iloc[0] = 0.17
# TODO(CoW-warn) custom warning message
with tm.assert_cow_warning(warn_copy_on_write):
df["bb"].iloc[0] = 0.17
df._clear_item_cache()
if not using_copy_on_write:
tm.assert_almost_equal(df["bb"][0], 0.17)
Expand All @@ -74,7 +78,9 @@ def test_setitem_cache_updating(self, do_ref):
assert df.loc[0, "c"] == 0.0
assert df.loc[7, "c"] == 1.0

def test_setitem_cache_updating_slices(self, using_copy_on_write):
def test_setitem_cache_updating_slices(
self, using_copy_on_write, warn_copy_on_write
):
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame(
Expand Down Expand Up @@ -102,7 +108,8 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
out[row["C"]][six:eix] = v
else:
out[row["C"]][six:eix] = v
with tm.assert_cow_warning(warn_copy_on_write):
out[row["C"]][six:eix] = v

if not using_copy_on_write:
tm.assert_frame_equal(out, expected)
Expand All @@ -113,17 +120,21 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write):

out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
for ix, row in df.iterrows():
out.loc[six:eix, row["C"]] += row["D"]
# TODO(CoW-warn) should not warn
with tm.assert_produces_warning(FutureWarning):
out.loc[six:eix, row["C"]] += row["D"]

tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])

def test_altering_series_clears_parent_cache(self, using_copy_on_write):
def test_altering_series_clears_parent_cache(
self, using_copy_on_write, warn_copy_on_write
):
# GH #33675
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
ser = df["A"]

if using_copy_on_write:
if using_copy_on_write or warn_copy_on_write:
assert "A" not in df._item_cache
else:
assert "A" in df._item_cache
Expand All @@ -138,7 +149,7 @@ def test_altering_series_clears_parent_cache(self, using_copy_on_write):


class TestChaining:
def test_setitem_chained_setfault(self, using_copy_on_write):
def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write):
# GH6026
data = ["right", "left", "left", "left", "right", "left", "timeout"]
mdata = ["right", "left", "left", "left", "right", "left", "none"]
Expand All @@ -150,6 +161,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write):
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": data}))
else:
# TODO(CoW-warn) should warn
# with tm.assert_cow_warning(warn_copy_on_write):
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": mdata}))

Expand All @@ -161,6 +174,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write):
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": data}))
else:
# TODO(CoW-warn) should warn
# with tm.assert_cow_warning(warn_copy_on_write):
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": mdata}))

Expand All @@ -172,6 +187,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write):
df.response[mask] = "none"
tm.assert_frame_equal(df, df_original)
else:
# TODO(CoW-warn) should warn
# with tm.assert_cow_warning(warn_copy_on_write):
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))

Expand All @@ -183,7 +200,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write):
df["A"].iloc[0] = np.nan
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
else:
df["A"].iloc[0] = np.nan
with tm.assert_cow_warning(warn_copy_on_write):
df["A"].iloc[0] = np.nan
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
result = df.head()
tm.assert_frame_equal(result, expected)
Expand All @@ -193,7 +211,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
df.A.iloc[0] = np.nan
else:
df.A.iloc[0] = np.nan
with tm.assert_cow_warning(warn_copy_on_write):
df.A.iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -636,7 +655,9 @@ def test_cache_updating2(self, using_copy_on_write):
expected = Series([0, 0, 0, 2, 0], name="f")
tm.assert_series_equal(df.f, expected)

def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
def test_iloc_setitem_chained_assignment(
self, using_copy_on_write, warn_copy_on_write
):
# GH#3970
with option_context("chained_assignment", None):
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
Expand All @@ -648,7 +669,8 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.13
else:
df["bb"].iloc[0] = 0.13
with tm.assert_cow_warning(warn_copy_on_write):
df["bb"].iloc[0] = 0.13

# GH#3970 this lookup used to break the chained setting to 0.15
df.iloc[ck]
Expand All @@ -657,7 +679,8 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.15
else:
df["bb"].iloc[0] = 0.15
with tm.assert_cow_warning(warn_copy_on_write):
df["bb"].iloc[0] = 0.15

if not using_copy_on_write:
assert df["bb"].iloc[0] == 0.15
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/indexing/test_iat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Series,
period_range,
)
import pandas._testing as tm


def test_iat(float_frame):
Expand All @@ -30,17 +31,22 @@ def test_iat_getitem_series_with_period_index():
assert expected == result


def test_iat_setitem_item_cache_cleared(indexer_ial, using_copy_on_write):
def test_iat_setitem_item_cache_cleared(
indexer_ial, using_copy_on_write, warn_copy_on_write
):
# GH#45684
data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)}
df = DataFrame(data).copy()
ser = df["y"]

# previously this iat setting would split the block and fail to clear
# the item_cache.
indexer_ial(df)[7, 0] = 9999
with tm.assert_cow_warning(warn_copy_on_write and indexer_ial is tm.iloc):
indexer_ial(df)[7, 0] = 9999

indexer_ial(df)[7, 1] = 1234
# TODO(CoW-warn) should also warn for iat?
with tm.assert_cow_warning(warn_copy_on_write and indexer_ial is tm.iloc):
indexer_ial(df)[7, 1] = 1234

assert df.iat[7, 1] == 1234
if not using_copy_on_write:
Expand Down
Loading

0 comments on commit 5a5a1d5

Please sign in to comment.