From c597428bead974d82013e56ed0a9a5ffc7c46109 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 4 Oct 2024 17:23:59 +0530 Subject: [PATCH 1/7] remove action=ignore for .apply() on cat dtype --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/apply.py | 14 ++++---------- pandas/tests/apply/test_frame_apply.py | 2 +- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a5b4560a47bc4..25f90973b3a43 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -490,6 +490,7 @@ Other Removals - Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`) - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) - Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`) +- Removed support for ``action="ignore"`` for :class:`SeriesApply` in :meth:`SeriesApply.apply_standard`, categorical NA value now returns ``False`` (:issue:`59938`) .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7d50b466f5126..9c543ec5e6911 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -38,10 +38,7 @@ is_numeric_dtype, is_sequence, ) -from pandas.core.dtypes.dtypes import ( - CategoricalDtype, - ExtensionDtype, -) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCNDFrame, @@ -1466,13 +1463,10 @@ def curried(x): else: curried = func - # row-wise access - # apply doesn't have a `na_action` keyword and for backward compat reasons - # we need to give `na_action="ignore"` for categorical data. - # TODO: remove the `na_action="ignore"` when that default has been changed in + # remove the `na_action="ignore"` as default has been changed in # Categorical (GH51645). - action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None - mapped = obj._map_values(mapper=curried, na_action=action) + # Reference for below fix (GH ) + mapped = obj._map_values(mapper=curried) if len(mapped) and isinstance(mapped[0], ABCSeries): # GH#43986 Need to do list(mapped) in order to get treated as nested diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index dee0efcd8fd15..6e42aa17257b7 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -741,7 +741,7 @@ def test_apply_category_equalness(val): result = df.a.apply(lambda x: x == val) expected = Series( - [np.nan if pd.isnull(x) else x == val for x in df_values], name="a" + [False if pd.isnull(x) else x == val for x in df_values], name="a" ) tm.assert_series_equal(result, expected) From f7672f7aa11416cecad31327a6e0dee2e919733d Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 4 Oct 2024 17:29:43 +0530 Subject: [PATCH 2/7] add PR reference in comments --- pandas/core/apply.py | 4 ++-- pandas/tests/apply/test_frame_apply.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9c543ec5e6911..2c07944fc1844 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1464,8 +1464,8 @@ def curried(x): curried = func # remove the `na_action="ignore"` as default has been changed in - # Categorical (GH51645). - # Reference for below fix (GH ) + # Categorical (GH 51645). + # Reference for below fix (GH 59966) mapped = obj._map_values(mapper=curried) if len(mapped) and isinstance(mapped[0], ABCSeries): diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 6e42aa17257b7..b4d3d49da9ba8 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -742,7 +742,7 @@ def test_apply_category_equalness(val): result = df.a.apply(lambda x: x == val) expected = Series( [False if pd.isnull(x) else x == val for x in df_values], name="a" - ) + ) # False since behavior of NaN for categorical dtype has been changed (GH 59966) tm.assert_series_equal(result, expected) From a2bb2857a98b647d6bcf623c58af5aa5157440b6 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 4 Oct 2024 17:36:43 +0530 Subject: [PATCH 3/7] fix pytest linting --- pandas/tests/apply/test_frame_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index b4d3d49da9ba8..b6fb72883c500 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -742,7 +742,7 @@ def test_apply_category_equalness(val): result = df.a.apply(lambda x: x == val) expected = Series( [False if pd.isnull(x) else x == val for x in df_values], name="a" - ) # False since behavior of NaN for categorical dtype has been changed (GH 59966) + ) # False since behavior of NaN for categorical dtype has been changed (GH 59966) tm.assert_series_equal(result, expected) From a8073d216c71929f544f060afaebfbe6105c562f Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 4 Oct 2024 18:02:47 +0530 Subject: [PATCH 4/7] refac failing test_series_apply.py --- pandas/tests/apply/test_series_apply.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 76704de6f2d10..9541b0b7495c7 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -236,10 +236,10 @@ def test_apply_categorical_with_nan_values(series, by_row): with pytest.raises(AttributeError, match=msg): s.apply(lambda x: x.split("-")[0], by_row=by_row) return - - result = s.apply(lambda x: x.split("-")[0], by_row=by_row) + # NaN for cat dtype fixed in (GH 59966) + result = s.apply(lambda x: x.split("-")[0] if pd.notna(x) else False, by_row=by_row) result = result.astype(object) - expected = Series(["1", "1", np.nan], dtype="category") + expected = Series(["1", "1", False], dtype="category") expected = expected.astype(object) tm.assert_series_equal(result, expected) From 8840256c29d742c2aac80c4ef2e33451d4f47fa3 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 4 Oct 2024 20:09:42 +0530 Subject: [PATCH 5/7] Trigger CI From be4e2d9e0eb31c50ed88b72750a40147b5f3ffa8 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Sat, 5 Oct 2024 00:21:58 +0530 Subject: [PATCH 6/7] changes post review --- doc/source/whatsnew/v3.0.0.rst | 3 +-- pandas/core/apply.py | 4 ---- pandas/tests/apply/test_frame_apply.py | 3 ++- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 25f90973b3a43..376ce247c7277 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -490,7 +490,6 @@ Other Removals - Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`) - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) - Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`) -- Removed support for ``action="ignore"`` for :class:`SeriesApply` in :meth:`SeriesApply.apply_standard`, categorical NA value now returns ``False`` (:issue:`59938`) .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: @@ -545,7 +544,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- +- Removed support for ``action="ignore"`` for :class:`SeriesApply` in :meth:`SeriesApply.apply_standard`, categorical NA value now returns ``False`` (:issue:`59938`) - Datetimelike diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2c07944fc1844..1f13459724d78 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1462,10 +1462,6 @@ def curried(x): else: curried = func - - # remove the `na_action="ignore"` as default has been changed in - # Categorical (GH 51645). - # Reference for below fix (GH 59966) mapped = obj._map_values(mapper=curried) if len(mapped) and isinstance(mapped[0], ABCSeries): diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index b6fb72883c500..f0ab01e9e960e 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -742,7 +742,8 @@ def test_apply_category_equalness(val): result = df.a.apply(lambda x: x == val) expected = Series( [False if pd.isnull(x) else x == val for x in df_values], name="a" - ) # False since behavior of NaN for categorical dtype has been changed (GH 59966) + ) + # False since behavior of NaN for categorical dtype has been changed (GH 59966) tm.assert_series_equal(result, expected) From 9526172acdfcfab377830f7e9de6d11c90bd62ad Mon Sep 17 00:00:00 2001 From: saldanhad Date: Sat, 5 Oct 2024 01:19:40 +0530 Subject: [PATCH 7/7] rephrase change log --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 376ce247c7277..52debcc49eb27 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -544,7 +544,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Removed support for ``action="ignore"`` for :class:`SeriesApply` in :meth:`SeriesApply.apply_standard`, categorical NA value now returns ``False`` (:issue:`59938`) +- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) - Datetimelike