Merge branch 'main' into fix-constructor-from-mgr

jorisvandenbossche · Oct 26, 2023 · f972b04 · f972b04
2 parents fd51a03 + aeb3644
commit f972b04
Show file tree

Hide file tree

Showing 11 changed files with 126 additions and 53 deletions.
diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
@@ -8,18 +8,28 @@ including other versions of pandas.
 
 {{ header }}
 
+.. ---------------------------------------------------------------------------
+.. _whatsnew_212.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+
+- Reverted deprecation of ``fill_method=None`` in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`; the values ``'backfill'``, ``'bfill'``, ``'pad'``, and ``'ffill'`` are still deprecated (:issue:`53491`)
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_212.regressions:
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
+- Fixed regression in :meth:`~DataFrame.rolling` where non-nanosecond index or ``on`` column would produce incorrect results (:issue:`55026`, :issue:`55106`, :issue:`55299`)
 - Fixed regression in :meth:`DataFrame.resample` which was extrapolating back to ``origin`` when ``origin`` was outside its bounds (:issue:`55064`)
 - Fixed regression in :meth:`DataFrame.sort_index` which was not sorting correctly when the index was a sliced :class:`MultiIndex` (:issue:`55379`)
 - Fixed regression in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` where if the option ``compute.use_numba`` was set to True, groupby methods not supported by the numba engine would raise a ``TypeError`` (:issue:`55520`)
 - Fixed performance regression with wide DataFrames, typically involving methods where all columns were accessed individually (:issue:`55256`, :issue:`55245`)
 - Fixed regression in :func:`merge_asof` raising ``TypeError`` for ``by`` with datetime and timedelta dtypes (:issue:`55453`)
 - Fixed regression in construction of certain DataFrame or Series subclasses (:issue:`54922`)
+- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite when using ``detect_types`` (:issue:`55554`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_212.bug_fixes:

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -288,7 +288,6 @@ Other Deprecations
 - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
 - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
 - Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -11723,6 +11723,7 @@ def pct_change(
             How to handle NAs **before** computing percent changes.
 
             .. deprecated:: 2.1
+                All options of `fill_method` are deprecated except `fill_method=None`.
 
         limit : int, default None
             The number of consecutive NAs to fill before stopping.
@@ -11829,32 +11830,33 @@ def pct_change(
         APPL -0.252395 -0.011860   NaN
         """
         # GH#53491
-        if fill_method is not lib.no_default or limit is not lib.no_default:
+        if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
             warnings.warn(
-                "The 'fill_method' and 'limit' keywords in "
-                f"{type(self).__name__}.pct_change are deprecated and will be "
-                "removed in a future version. Call "
-                f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} "
-                "before calling pct_change instead.",
+                "The 'fill_method' keyword being not None and the 'limit' keyword in "
+                f"{type(self).__name__}.pct_change are deprecated and will be removed "
+                "in a future version. Either fill in any non-leading NA values prior "
+                "to calling pct_change or specify 'fill_method=None' to not fill NA "
+                "values.",
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
         if fill_method is lib.no_default:
-            cols = self.items() if self.ndim == 2 else [(None, self)]
-            for _, col in cols:
-                mask = col.isna().values
-                mask = mask[np.argmax(~mask) :]
-                if mask.any():
-                    warnings.warn(
-                        "The default fill_method='pad' in "
-                        f"{type(self).__name__}.pct_change is deprecated and will be "
-                        "removed in a future version. Call ffill before calling "
-                        "pct_change to retain current behavior and silence this "
-                        "warning.",
-                        FutureWarning,
-                        stacklevel=find_stack_level(),
-                    )
-                    break
+            if limit is lib.no_default:
+                cols = self.items() if self.ndim == 2 else [(None, self)]
+                for _, col in cols:
+                    mask = col.isna().values
+                    mask = mask[np.argmax(~mask) :]
+                    if mask.any():
+                        warnings.warn(
+                            "The default fill_method='pad' in "
+                            f"{type(self).__name__}.pct_change is deprecated and will "
+                            "be removed in a future version. Either fill in any "
+                            "non-leading NA values prior to calling pct_change or "
+                            "specify 'fill_method=None' to not fill NA values.",
+                            FutureWarning,
+                            stacklevel=find_stack_level(),
+                        )
+                        break
             fill_method = "pad"
         if limit is lib.no_default:
             limit = None

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -5296,7 +5296,7 @@ def diff(
     def pct_change(
         self,
         periods: int = 1,
-        fill_method: FillnaOptions | lib.NoDefault = lib.no_default,
+        fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default,
         limit: int | None | lib.NoDefault = lib.no_default,
         freq=None,
         axis: Axis | lib.NoDefault = lib.no_default,
@@ -5348,23 +5348,26 @@ def pct_change(
         goldfish    0.2  0.125
         """
         # GH#53491
-        if fill_method is not lib.no_default or limit is not lib.no_default:
+        if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
             warnings.warn(
-                "The 'fill_method' and 'limit' keywords in "
-                f"{type(self).__name__}.pct_change are deprecated and will be "
-                "removed in a future version. Call "
-                f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} "
-                "before calling pct_change instead.",
+                "The 'fill_method' keyword being not None and the 'limit' keyword in "
+                f"{type(self).__name__}.pct_change are deprecated and will be removed "
+                "in a future version. Either fill in any non-leading NA values prior "
+                "to calling pct_change or specify 'fill_method=None' to not fill NA "
+                "values.",
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
         if fill_method is lib.no_default:
-            if any(grp.isna().values.any() for _, grp in self):
+            if limit is lib.no_default and any(
+                grp.isna().values.any() for _, grp in self
+            ):
                 warnings.warn(
                     "The default fill_method='ffill' in "
-                    f"{type(self).__name__}.pct_change is deprecated and will be "
-                    "removed in a future version. Call ffill before calling "
-                    "pct_change to retain current behavior and silence this warning.",
+                    f"{type(self).__name__}.pct_change is deprecated and will "
+                    "be removed in a future version. Either fill in any "
+                    "non-leading NA values prior to calling pct_change or "
+                    "specify 'fill_method=None' to not fill NA values.",
                     FutureWarning,
                     stacklevel=find_stack_level(),
                 )

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -21,6 +21,7 @@
 
 from pandas._libs.tslibs import (
     BaseOffset,
+    Timedelta,
     to_offset,
 )
 import pandas._libs.window.aggregations as window_aggregations
@@ -112,6 +113,8 @@
     from pandas.core.generic import NDFrame
     from pandas.core.groupby.ops import BaseGrouper
 
+from pandas.core.arrays.datetimelike import dtype_to_unit
+
 
 class BaseWindow(SelectionMixin):
     """Provides utilities for performing windowing operations."""
@@ -1887,7 +1890,12 @@ def _validate(self):
                     self._on.freq.nanos / self._on.freq.n
                 )
             else:
-                self._win_freq_i8 = freq.nanos
+                try:
+                    unit = dtype_to_unit(self._on.dtype)  # type: ignore[arg-type]
+                except TypeError:
+                    # if not a datetime dtype, eg for empty dataframes
+                    unit = "ns"
+                self._win_freq_i8 = Timedelta(freq.nanos).as_unit(unit)._value
 
             # min_periods must be an integer
             if self.min_periods is None:

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -2094,19 +2094,17 @@ def _adapt_time(t) -> str:
         # Python 3.12+ doesn't auto-register adapters for us anymore
 
         adapt_date_iso = lambda val: val.isoformat()
-        adapt_datetime_iso = lambda val: val.isoformat()
+        adapt_datetime_iso = lambda val: val.isoformat(" ")
 
         sqlite3.register_adapter(time, _adapt_time)
 
         sqlite3.register_adapter(date, adapt_date_iso)
         sqlite3.register_adapter(datetime, adapt_datetime_iso)
 
         convert_date = lambda val: date.fromisoformat(val.decode())
-        convert_datetime = lambda val: datetime.fromisoformat(val.decode())
-        convert_timestamp = lambda val: datetime.fromtimestamp(int(val))
+        convert_timestamp = lambda val: datetime.fromisoformat(val.decode())
 
         sqlite3.register_converter("date", convert_date)
-        sqlite3.register_converter("datetime", convert_datetime)
         sqlite3.register_converter("timestamp", convert_timestamp)
 
     def sql_schema(self) -> str:

diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py
@@ -29,7 +29,7 @@ def test_pct_change_with_nas(
         obj = frame_or_series(vals)
 
         msg = (
-            "The 'fill_method' and 'limit' keywords in "
+            "The 'fill_method' keyword being not None and the 'limit' keyword in "
             f"{type(obj).__name__}.pct_change are deprecated"
         )
         with tm.assert_produces_warning(FutureWarning, match=msg):
@@ -46,7 +46,7 @@ def test_pct_change_numeric(self):
         pnl.iat[2, 3] = 60
 
         msg = (
-            "The 'fill_method' and 'limit' keywords in "
+            "The 'fill_method' keyword being not None and the 'limit' keyword in "
             "DataFrame.pct_change are deprecated"
         )
 
@@ -59,12 +59,11 @@ def test_pct_change_numeric(self):
 
     def test_pct_change(self, datetime_frame):
         msg = (
-            "The 'fill_method' and 'limit' keywords in "
+            "The 'fill_method' keyword being not None and the 'limit' keyword in "
             "DataFrame.pct_change are deprecated"
         )
 
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            rs = datetime_frame.pct_change(fill_method=None)
+        rs = datetime_frame.pct_change(fill_method=None)
         tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1)
 
         rs = datetime_frame.pct_change(2)
@@ -110,7 +109,7 @@ def test_pct_change_periods_freq(
         self, datetime_frame, freq, periods, fill_method, limit
     ):
         msg = (
-            "The 'fill_method' and 'limit' keywords in "
+            "The 'fill_method' keyword being not None and the 'limit' keyword in "
             "DataFrame.pct_change are deprecated"
         )
 
@@ -144,11 +143,12 @@ def test_pct_change_with_duplicated_indices(fill_method):
         {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3
     )
 
+    warn = None if fill_method is None else FutureWarning
     msg = (
-        "The 'fill_method' and 'limit' keywords in "
+        "The 'fill_method' keyword being not None and the 'limit' keyword in "
         "DataFrame.pct_change are deprecated"
     )
-    with tm.assert_produces_warning(FutureWarning, match=msg):
+    with tm.assert_produces_warning(warn, match=msg):
         result = data.pct_change(fill_method=fill_method)
 
     if fill_method is None:

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -1071,7 +1071,7 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit):
         expected = expected.to_frame("vals")
 
     msg = (
-        "The 'fill_method' and 'limit' keywords in "
+        "The 'fill_method' keyword being not None and the 'limit' keyword in "
         f"{type(gb).__name__}.pct_change are deprecated"
     )
     with tm.assert_produces_warning(FutureWarning, match=msg):

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -3368,6 +3368,24 @@ def test_roundtripping_datetimes(sqlite_engine):
     assert result == "2020-12-31 12:00:00.000000"
 
 
+@pytest.fixture
+def sqlite_builtin_detect_types():
+    with contextlib.closing(
+        sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES)
+    ) as closing_conn:
+        with closing_conn as conn:
+            yield conn
+
+
+def test_roundtripping_datetimes_detect_types(sqlite_builtin_detect_types):
+    # https://github.com/pandas-dev/pandas/issues/55554
+    conn = sqlite_builtin_detect_types
+    df = DataFrame({"t": [datetime(2020, 12, 31, 12)]}, dtype="datetime64[ns]")
+    df.to_sql("test", conn, if_exists="replace", index=False)
+    result = pd.read_sql("select * from test", conn).iloc[0, 0]
+    assert result == Timestamp("2020-12-31 12:00:00.000000")
+
+
 @pytest.mark.db
 def test_psycopg2_schema_support(postgresql_psycopg2_engine):
     conn = postgresql_psycopg2_engine

diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py
@@ -11,12 +11,11 @@
 class TestSeriesPctChange:
     def test_pct_change(self, datetime_series):
         msg = (
-            "The 'fill_method' and 'limit' keywords in "
+            "The 'fill_method' keyword being not None and the 'limit' keyword in "
             "Series.pct_change are deprecated"
         )
 
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            rs = datetime_series.pct_change(fill_method=None)
+        rs = datetime_series.pct_change(fill_method=None)
         tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1)
 
         rs = datetime_series.pct_change(2)
@@ -69,7 +68,7 @@ def test_pct_change_periods_freq(
         self, freq, periods, fill_method, limit, datetime_series
     ):
         msg = (
-            "The 'fill_method' and 'limit' keywords in "
+            "The 'fill_method' keyword being not None and the 'limit' keyword in "
             "Series.pct_change are deprecated"
         )
 
@@ -101,8 +100,12 @@ def test_pct_change_with_duplicated_indices(fill_method):
     # GH30463
     s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)
 
-    msg = "The 'fill_method' and 'limit' keywords in Series.pct_change are deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
+    warn = None if fill_method is None else FutureWarning
+    msg = (
+        "The 'fill_method' keyword being not None and the 'limit' keyword in "
+        "Series.pct_change are deprecated"
+    )
+    with tm.assert_produces_warning(warn, match=msg):
         result = s.pct_change(fill_method=fill_method)
 
     expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)

diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -1950,3 +1950,35 @@ def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
         op2 = getattr(rolling2, kernel)
         expected = op2(*arg2, numeric_only=numeric_only)
         tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
+@pytest.mark.parametrize("tz", [None, "UTC", "Europe/Prague"])
+def test_rolling_timedelta_window_non_nanoseconds(unit, tz):
+    # Test Sum, GH#55106
+    df_time = DataFrame(
+        {"A": range(5)}, index=date_range("2013-01-01", freq="1s", periods=5, tz=tz)
+    )
+    sum_in_nanosecs = df_time.rolling("1s").sum()
+    # microseconds / milliseconds should not break the correct rolling
+    df_time.index = df_time.index.as_unit(unit)
+    sum_in_microsecs = df_time.rolling("1s").sum()
+    sum_in_microsecs.index = sum_in_microsecs.index.as_unit("ns")
+    tm.assert_frame_equal(sum_in_nanosecs, sum_in_microsecs)
+
+    # Test max, GH#55026
+    ref_dates = date_range("2023-01-01", "2023-01-10", unit="ns", tz=tz)
+    ref_series = Series(0, index=ref_dates)
+    ref_series.iloc[0] = 1
+    ref_max_series = ref_series.rolling(Timedelta(days=4)).max()
+
+    dates = date_range("2023-01-01", "2023-01-10", unit=unit, tz=tz)
+    series = Series(0, index=dates)
+    series.iloc[0] = 1
+    max_series = series.rolling(Timedelta(days=4)).max()
+
+    ref_df = DataFrame(ref_max_series)
+    df = DataFrame(max_series)
+    df.index = df.index.as_unit("ns")
+
+    tm.assert_frame_equal(ref_df, df)