Skip to content

Commit

Permalink
Merge branch 'main' into fix-constructor-from-mgr
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Oct 26, 2023
2 parents fd51a03 + aeb3644 commit f972b04
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 53 deletions.
10 changes: 10 additions & 0 deletions doc/source/whatsnew/v2.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,28 @@ including other versions of pandas.

{{ header }}

.. ---------------------------------------------------------------------------
.. _whatsnew_212.deprecations:

Deprecations
~~~~~~~~~~~~

- Reverted deprecation of ``fill_method=None`` in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`; the values ``'backfill'``, ``'bfill'``, ``'pad'``, and ``'ffill'`` are still deprecated (:issue:`53491`)

.. ---------------------------------------------------------------------------
.. _whatsnew_212.regressions:

Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
- Fixed regression in :meth:`~DataFrame.rolling` where non-nanosecond index or ``on`` column would produce incorrect results (:issue:`55026`, :issue:`55106`, :issue:`55299`)
- Fixed regression in :meth:`DataFrame.resample` which was extrapolating back to ``origin`` when ``origin`` was outside its bounds (:issue:`55064`)
- Fixed regression in :meth:`DataFrame.sort_index` which was not sorting correctly when the index was a sliced :class:`MultiIndex` (:issue:`55379`)
- Fixed regression in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` where if the option ``compute.use_numba`` was set to True, groupby methods not supported by the numba engine would raise a ``TypeError`` (:issue:`55520`)
- Fixed performance regression with wide DataFrames, typically involving methods where all columns were accessed individually (:issue:`55256`, :issue:`55245`)
- Fixed regression in :func:`merge_asof` raising ``TypeError`` for ``by`` with datetime and timedelta dtypes (:issue:`55453`)
- Fixed regression in construction of certain DataFrame or Series subclasses (:issue:`54922`)
- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite when using ``detect_types`` (:issue:`55554`)

.. ---------------------------------------------------------------------------
.. _whatsnew_212.bug_fixes:
Expand Down
1 change: 0 additions & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ Other Deprecations
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_220.performance:
Expand Down
44 changes: 23 additions & 21 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11723,6 +11723,7 @@ def pct_change(
How to handle NAs **before** computing percent changes.
.. deprecated:: 2.1
All options of `fill_method` are deprecated except `fill_method=None`.
limit : int, default None
The number of consecutive NAs to fill before stopping.
Expand Down Expand Up @@ -11829,32 +11830,33 @@ def pct_change(
APPL -0.252395 -0.011860 NaN
"""
# GH#53491
if fill_method is not lib.no_default or limit is not lib.no_default:
if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
warnings.warn(
"The 'fill_method' and 'limit' keywords in "
f"{type(self).__name__}.pct_change are deprecated and will be "
"removed in a future version. Call "
f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} "
"before calling pct_change instead.",
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(self).__name__}.pct_change are deprecated and will be removed "
"in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
stacklevel=find_stack_level(),
)
if fill_method is lib.no_default:
cols = self.items() if self.ndim == 2 else [(None, self)]
for _, col in cols:
mask = col.isna().values
mask = mask[np.argmax(~mask) :]
if mask.any():
warnings.warn(
"The default fill_method='pad' in "
f"{type(self).__name__}.pct_change is deprecated and will be "
"removed in a future version. Call ffill before calling "
"pct_change to retain current behavior and silence this "
"warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
break
if limit is lib.no_default:
cols = self.items() if self.ndim == 2 else [(None, self)]
for _, col in cols:
mask = col.isna().values
mask = mask[np.argmax(~mask) :]
if mask.any():
warnings.warn(
"The default fill_method='pad' in "
f"{type(self).__name__}.pct_change is deprecated and will "
"be removed in a future version. Either fill in any "
"non-leading NA values prior to calling pct_change or "
"specify 'fill_method=None' to not fill NA values.",
FutureWarning,
stacklevel=find_stack_level(),
)
break
fill_method = "pad"
if limit is lib.no_default:
limit = None
Expand Down
25 changes: 14 additions & 11 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5296,7 +5296,7 @@ def diff(
def pct_change(
self,
periods: int = 1,
fill_method: FillnaOptions | lib.NoDefault = lib.no_default,
fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default,
limit: int | None | lib.NoDefault = lib.no_default,
freq=None,
axis: Axis | lib.NoDefault = lib.no_default,
Expand Down Expand Up @@ -5348,23 +5348,26 @@ def pct_change(
goldfish 0.2 0.125
"""
# GH#53491
if fill_method is not lib.no_default or limit is not lib.no_default:
if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
warnings.warn(
"The 'fill_method' and 'limit' keywords in "
f"{type(self).__name__}.pct_change are deprecated and will be "
"removed in a future version. Call "
f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} "
"before calling pct_change instead.",
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(self).__name__}.pct_change are deprecated and will be removed "
"in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
stacklevel=find_stack_level(),
)
if fill_method is lib.no_default:
if any(grp.isna().values.any() for _, grp in self):
if limit is lib.no_default and any(
grp.isna().values.any() for _, grp in self
):
warnings.warn(
"The default fill_method='ffill' in "
f"{type(self).__name__}.pct_change is deprecated and will be "
"removed in a future version. Call ffill before calling "
"pct_change to retain current behavior and silence this warning.",
f"{type(self).__name__}.pct_change is deprecated and will "
"be removed in a future version. Either fill in any "
"non-leading NA values prior to calling pct_change or "
"specify 'fill_method=None' to not fill NA values.",
FutureWarning,
stacklevel=find_stack_level(),
)
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from pandas._libs.tslibs import (
BaseOffset,
Timedelta,
to_offset,
)
import pandas._libs.window.aggregations as window_aggregations
Expand Down Expand Up @@ -112,6 +113,8 @@
from pandas.core.generic import NDFrame
from pandas.core.groupby.ops import BaseGrouper

from pandas.core.arrays.datetimelike import dtype_to_unit


class BaseWindow(SelectionMixin):
"""Provides utilities for performing windowing operations."""
Expand Down Expand Up @@ -1887,7 +1890,12 @@ def _validate(self):
self._on.freq.nanos / self._on.freq.n
)
else:
self._win_freq_i8 = freq.nanos
try:
unit = dtype_to_unit(self._on.dtype) # type: ignore[arg-type]
except TypeError:
# if not a datetime dtype, eg for empty dataframes
unit = "ns"
self._win_freq_i8 = Timedelta(freq.nanos).as_unit(unit)._value

# min_periods must be an integer
if self.min_periods is None:
Expand Down
6 changes: 2 additions & 4 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2094,19 +2094,17 @@ def _adapt_time(t) -> str:
# Python 3.12+ doesn't auto-register adapters for us anymore

adapt_date_iso = lambda val: val.isoformat()
adapt_datetime_iso = lambda val: val.isoformat()
adapt_datetime_iso = lambda val: val.isoformat(" ")

sqlite3.register_adapter(time, _adapt_time)

sqlite3.register_adapter(date, adapt_date_iso)
sqlite3.register_adapter(datetime, adapt_datetime_iso)

convert_date = lambda val: date.fromisoformat(val.decode())
convert_datetime = lambda val: datetime.fromisoformat(val.decode())
convert_timestamp = lambda val: datetime.fromtimestamp(int(val))
convert_timestamp = lambda val: datetime.fromisoformat(val.decode())

sqlite3.register_converter("date", convert_date)
sqlite3.register_converter("datetime", convert_datetime)
sqlite3.register_converter("timestamp", convert_timestamp)

def sql_schema(self) -> str:
Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/frame/methods/test_pct_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_pct_change_with_nas(
obj = frame_or_series(vals)

msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(obj).__name__}.pct_change are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
Expand All @@ -46,7 +46,7 @@ def test_pct_change_numeric(self):
pnl.iat[2, 3] = 60

msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)

Expand All @@ -59,12 +59,11 @@ def test_pct_change_numeric(self):

def test_pct_change(self, datetime_frame):
msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)

with tm.assert_produces_warning(FutureWarning, match=msg):
rs = datetime_frame.pct_change(fill_method=None)
rs = datetime_frame.pct_change(fill_method=None)
tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1)

rs = datetime_frame.pct_change(2)
Expand Down Expand Up @@ -110,7 +109,7 @@ def test_pct_change_periods_freq(
self, datetime_frame, freq, periods, fill_method, limit
):
msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)

Expand Down Expand Up @@ -144,11 +143,12 @@ def test_pct_change_with_duplicated_indices(fill_method):
{0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3
)

warn = None if fill_method is None else FutureWarning
msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
with tm.assert_produces_warning(warn, match=msg):
result = data.pct_change(fill_method=fill_method)

if fill_method is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit):
expected = expected.to_frame("vals")

msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(gb).__name__}.pct_change are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3368,6 +3368,24 @@ def test_roundtripping_datetimes(sqlite_engine):
assert result == "2020-12-31 12:00:00.000000"


@pytest.fixture
def sqlite_builtin_detect_types():
with contextlib.closing(
sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES)
) as closing_conn:
with closing_conn as conn:
yield conn


def test_roundtripping_datetimes_detect_types(sqlite_builtin_detect_types):
# https://github.com/pandas-dev/pandas/issues/55554
conn = sqlite_builtin_detect_types
df = DataFrame({"t": [datetime(2020, 12, 31, 12)]}, dtype="datetime64[ns]")
df.to_sql("test", conn, if_exists="replace", index=False)
result = pd.read_sql("select * from test", conn).iloc[0, 0]
assert result == Timestamp("2020-12-31 12:00:00.000000")


@pytest.mark.db
def test_psycopg2_schema_support(postgresql_psycopg2_engine):
conn = postgresql_psycopg2_engine
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/series/methods/test_pct_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
class TestSeriesPctChange:
def test_pct_change(self, datetime_series):
msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)

with tm.assert_produces_warning(FutureWarning, match=msg):
rs = datetime_series.pct_change(fill_method=None)
rs = datetime_series.pct_change(fill_method=None)
tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1)

rs = datetime_series.pct_change(2)
Expand Down Expand Up @@ -69,7 +68,7 @@ def test_pct_change_periods_freq(
self, freq, periods, fill_method, limit, datetime_series
):
msg = (
"The 'fill_method' and 'limit' keywords in "
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)

Expand Down Expand Up @@ -101,8 +100,12 @@ def test_pct_change_with_duplicated_indices(fill_method):
# GH30463
s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)

msg = "The 'fill_method' and 'limit' keywords in Series.pct_change are deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
warn = None if fill_method is None else FutureWarning
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
with tm.assert_produces_warning(warn, match=msg):
result = s.pct_change(fill_method=fill_method)

expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1950,3 +1950,35 @@ def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
op2 = getattr(rolling2, kernel)
expected = op2(*arg2, numeric_only=numeric_only)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@pytest.mark.parametrize("tz", [None, "UTC", "Europe/Prague"])
def test_rolling_timedelta_window_non_nanoseconds(unit, tz):
# Test Sum, GH#55106
df_time = DataFrame(
{"A": range(5)}, index=date_range("2013-01-01", freq="1s", periods=5, tz=tz)
)
sum_in_nanosecs = df_time.rolling("1s").sum()
# microseconds / milliseconds should not break the correct rolling
df_time.index = df_time.index.as_unit(unit)
sum_in_microsecs = df_time.rolling("1s").sum()
sum_in_microsecs.index = sum_in_microsecs.index.as_unit("ns")
tm.assert_frame_equal(sum_in_nanosecs, sum_in_microsecs)

# Test max, GH#55026
ref_dates = date_range("2023-01-01", "2023-01-10", unit="ns", tz=tz)
ref_series = Series(0, index=ref_dates)
ref_series.iloc[0] = 1
ref_max_series = ref_series.rolling(Timedelta(days=4)).max()

dates = date_range("2023-01-01", "2023-01-10", unit=unit, tz=tz)
series = Series(0, index=dates)
series.iloc[0] = 1
max_series = series.rolling(Timedelta(days=4)).max()

ref_df = DataFrame(ref_max_series)
df = DataFrame(max_series)
df.index = df.index.as_unit("ns")

tm.assert_frame_equal(ref_df, df)

0 comments on commit f972b04

Please sign in to comment.