diff --git a/nbs/core.ipynb b/nbs/core.ipynb index fd2d61d88..64c3abfbd 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -942,24 +942,12 @@ " fcsts_df : pandas.DataFrame\n", " DataFrame with insample predictions for all fitted `models`. \n", " \"\"\"\n", - " if not self._fitted:\n", - " raise Exception('The models must be fitted first with `fit` or `cross_validation`.')\n", - "\n", " for model in self.models:\n", " if model.SAMPLING_TYPE == 'recurrent':\n", " warnings.warn(f'Predict insample might not provide accurate predictions for \\\n", " recurrent model {repr(model)} class yet due to scaling.')\n", " print(f'WARNING: Predict insample might not provide accurate predictions for \\\n", " recurrent model {repr(model)} class yet due to scaling.')\n", - " \n", - " cols = []\n", - " count_names = {'model': 0}\n", - " for model in self.models:\n", - " model_name = repr(model)\n", - " count_names[model_name] = count_names.get(model_name, -1) + 1\n", - " if count_names[model_name] > 0:\n", - " model_name += str(count_names[model_name])\n", - " cols += [model_name + n for n in model.loss.output_names]\n", "\n", " # Remove test set from dataset and last dates\n", " test_size = self.models[0].get_test_size()\n", @@ -977,12 +965,70 @@ " else:\n", " trimmed_dataset = self.dataset\n", " times = self.ds\n", + " \n", + " # original y\n", + " original_y = {\n", + " self.id_col: ufp.repeat(self.uids, np.diff(self.dataset.indptr)),\n", + " self.time_col: self.ds,\n", + " self.target_col: self.dataset.temporal[:, 0].numpy(),\n", + " }\n", + "\n", + " return self._predict_sliding(trimmed_dataset, self.uids, times, test_size, original_y, step_size=step_size)\n", + "\n", + "\n", + " def predict_sliding(self, df, step_size : int= 1):\n", + " \"\"\"Sliding window prediction with core.NeuralForecast.\n", + "\n", + " `core.NeuralForecast`'s `predict_sliding` uses stored fitted `models`\n", + " to predict values of a time series given a dataframe.\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pandas or polars DataFrame \n", + " step_size : int (default=1)\n", + " Step size between each window.\n", + "\n", + " Returns\n", + " -------\n", + " fcsts_df : pandas.DataFrame\n", + " DataFrame with insample predictions for all fitted `models`. \n", + " \"\"\"\n", + " for model in self.models:\n", + " if model.SAMPLING_TYPE == 'recurrent':\n", + " warnings.warn(f'Sliding predictions might not provide accurate predictions for \\\n", + " recurrent model {repr(model)} class yet due to scaling.')\n", + " print(f'WARNING: Sliding predictions might not provide accurate predictions for \\\n", + " recurrent model {repr(model)} class yet due to scaling.')\n", + " \n", + " # TODO does sorting need to happen?\n", + " dataset, uids, _, times = TimeSeriesDataset.from_df(df, id_col=self.id_col, time_col=self.time_col, target_col=self.target_col)\n", + " \n", + " original_y = {\n", + " self.id_col: ufp.repeat(uids, np.diff(dataset.indptr)),\n", + " self.time_col: times,\n", + " self.target_col: dataset.temporal[:, 0].numpy(),\n", + " }\n", + "\n", + " return self._predict_sliding(dataset, uids, times, 0, original_y, step_size=step_size)\n", + "\n", + " def _predict_sliding(self, dataset, uids, times, test_size, original_y, step_size: int = 1):\n", + " if not self._fitted:\n", + " raise Exception('The models must be fitted first with `fit` or `cross_validation`.')\n", + "\n", + " cols = []\n", + " count_names = {'model': 0}\n", + " for model in self.models:\n", + " model_name = repr(model)\n", + " count_names[model_name] = count_names.get(model_name, -1) + 1\n", + " if count_names[model_name] > 0:\n", + " model_name += str(count_names[model_name])\n", + " cols += [model_name + n for n in model.loss.output_names]\n", "\n", " # Generate dates\n", " fcsts_df = _insample_times(\n", " times=times,\n", - " uids=self.uids,\n", - " indptr=trimmed_dataset.indptr,\n", + " uids=uids,\n", + " indptr=dataset.indptr,\n", " h=self.h,\n", " freq=self.freq,\n", " step_size=step_size,\n", @@ -995,22 +1041,16 @@ "\n", " for model in self.models:\n", " # Test size is the number of periods to forecast (full size of trimmed dataset)\n", - " model.set_test_size(test_size=trimmed_dataset.max_size)\n", + " model.set_test_size(test_size=dataset.max_size)\n", "\n", " # Predict\n", - " model_fcsts = model.predict(trimmed_dataset, step_size=step_size)\n", + " model_fcsts = model.predict(dataset, step_size=step_size)\n", " # Append predictions in memory placeholder\n", " output_length = len(model.loss.output_names)\n", " fcsts[:,col_idx:(col_idx + output_length)] = model_fcsts\n", " col_idx += output_length \n", " model.set_test_size(test_size=test_size) # Set original test_size\n", "\n", - " # original y\n", - " original_y = {\n", - " self.id_col: ufp.repeat(self.uids, np.diff(self.dataset.indptr)),\n", - " self.time_col: self.ds,\n", - " self.target_col: self.dataset.temporal[:, 0].numpy(),\n", - " }\n", "\n", " # Add predictions to forecasts DataFrame\n", " if isinstance(self.uids, pl_Series):\n", diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py index a11180a60..643a33286 100644 --- a/neuralforecast/_modidx.py +++ b/neuralforecast/_modidx.py @@ -124,6 +124,8 @@ 'neuralforecast/core.py'), 'neuralforecast.core.NeuralForecast._no_refit_cross_validation': ( 'core.html#neuralforecast._no_refit_cross_validation', 'neuralforecast/core.py'), + 'neuralforecast.core.NeuralForecast._predict_sliding': ( 'core.html#neuralforecast._predict_sliding', + 'neuralforecast/core.py'), 'neuralforecast.core.NeuralForecast._prepare_fit': ( 'core.html#neuralforecast._prepare_fit', 'neuralforecast/core.py'), 'neuralforecast.core.NeuralForecast._reset_models': ( 'core.html#neuralforecast._reset_models', @@ -146,6 +148,8 @@ 'neuralforecast/core.py'), 'neuralforecast.core.NeuralForecast.predict_insample': ( 'core.html#neuralforecast.predict_insample', 'neuralforecast/core.py'), + 'neuralforecast.core.NeuralForecast.predict_sliding': ( 'core.html#neuralforecast.predict_sliding', + 'neuralforecast/core.py'), 'neuralforecast.core.NeuralForecast.save': ('core.html#neuralforecast.save', 'neuralforecast/core.py'), 'neuralforecast.core._id_as_idx': ('core.html#_id_as_idx', 'neuralforecast/core.py'), 'neuralforecast.core._insample_times': ('core.html#_insample_times', 'neuralforecast/core.py'), diff --git a/neuralforecast/core.py b/neuralforecast/core.py index ca6b53a14..e94b79fd8 100644 --- a/neuralforecast/core.py +++ b/neuralforecast/core.py @@ -874,11 +874,6 @@ def predict_insample(self, step_size: int = 1): fcsts_df : pandas.DataFrame DataFrame with insample predictions for all fitted `models`. """ - if not self._fitted: - raise Exception( - "The models must be fitted first with `fit` or `cross_validation`." - ) - for model in self.models: if model.SAMPLING_TYPE == "recurrent": warnings.warn( @@ -890,15 +885,6 @@ def predict_insample(self, step_size: int = 1): recurrent model {repr(model)} class yet due to scaling." ) - cols = [] - count_names = {"model": 0} - for model in self.models: - model_name = repr(model) - count_names[model_name] = count_names.get(model_name, -1) + 1 - if count_names[model_name] > 0: - model_name += str(count_names[model_name]) - cols += [model_name + n for n in model.loss.output_names] - # Remove test set from dataset and last dates test_size = self.models[0].get_test_size() if test_size > 0: @@ -918,11 +904,87 @@ def predict_insample(self, step_size: int = 1): trimmed_dataset = self.dataset times = self.ds + # original y + original_y = { + self.id_col: ufp.repeat(self.uids, np.diff(self.dataset.indptr)), + self.time_col: self.ds, + self.target_col: self.dataset.temporal[:, 0].numpy(), + } + + return self._predict_sliding( + trimmed_dataset, + self.uids, + times, + test_size, + original_y, + step_size=step_size, + ) + + def predict_sliding(self, df, step_size: int = 1): + """Sliding window prediction with core.NeuralForecast. + + `core.NeuralForecast`'s `predict_sliding` uses stored fitted `models` + to predict values of a time series given a dataframe. + + Parameters + ---------- + df : pandas or polars DataFrame + step_size : int (default=1) + Step size between each window. + + Returns + ------- + fcsts_df : pandas.DataFrame + DataFrame with insample predictions for all fitted `models`. + """ + for model in self.models: + if model.SAMPLING_TYPE == "recurrent": + warnings.warn( + f"Sliding predictions might not provide accurate predictions for \ + recurrent model {repr(model)} class yet due to scaling." + ) + print( + f"WARNING: Sliding predictions might not provide accurate predictions for \ + recurrent model {repr(model)} class yet due to scaling." + ) + + # TODO does sorting need to happen? + dataset, uids, _, times = TimeSeriesDataset.from_df( + df, id_col=self.id_col, time_col=self.time_col, target_col=self.target_col + ) + + original_y = { + self.id_col: ufp.repeat(uids, np.diff(dataset.indptr)), + self.time_col: times, + self.target_col: dataset.temporal[:, 0].numpy(), + } + + return self._predict_sliding( + dataset, uids, times, 0, original_y, step_size=step_size + ) + + def _predict_sliding( + self, dataset, uids, times, test_size, original_y, step_size: int = 1 + ): + if not self._fitted: + raise Exception( + "The models must be fitted first with `fit` or `cross_validation`." + ) + + cols = [] + count_names = {"model": 0} + for model in self.models: + model_name = repr(model) + count_names[model_name] = count_names.get(model_name, -1) + 1 + if count_names[model_name] > 0: + model_name += str(count_names[model_name]) + cols += [model_name + n for n in model.loss.output_names] + # Generate dates fcsts_df = _insample_times( times=times, - uids=self.uids, - indptr=trimmed_dataset.indptr, + uids=uids, + indptr=dataset.indptr, h=self.h, freq=self.freq, step_size=step_size, @@ -935,23 +997,16 @@ def predict_insample(self, step_size: int = 1): for model in self.models: # Test size is the number of periods to forecast (full size of trimmed dataset) - model.set_test_size(test_size=trimmed_dataset.max_size) + model.set_test_size(test_size=dataset.max_size) # Predict - model_fcsts = model.predict(trimmed_dataset, step_size=step_size) + model_fcsts = model.predict(dataset, step_size=step_size) # Append predictions in memory placeholder output_length = len(model.loss.output_names) fcsts[:, col_idx : (col_idx + output_length)] = model_fcsts col_idx += output_length model.set_test_size(test_size=test_size) # Set original test_size - # original y - original_y = { - self.id_col: ufp.repeat(self.uids, np.diff(self.dataset.indptr)), - self.time_col: self.ds, - self.target_col: self.dataset.temporal[:, 0].numpy(), - } - # Add predictions to forecasts DataFrame if isinstance(self.uids, pl_Series): fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))