fix_non_monotonic_iq_loss_and_redundant_cv_conformal

Nixtla · Oct 17, 2024 · 96ab536 · 96ab536
1 parent 8ee4592
commit 96ab536
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 50 deletions.
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -338,6 +338,7 @@
     "        # Flags and attributes\n",
     "        self._fitted = False\n",
     "        self._reset_models()\n",
+    "        self._add_level = False\n",
     "\n",
     "    def _scalers_fit_transform(self, dataset: TimeSeriesDataset) -> None:\n",
     "        self.scalers_ = {}        \n",
@@ -1030,27 +1031,6 @@
     "            _warn_id_as_idx()\n",
     "            fcsts_df = fcsts_df.set_index(self.id_col)\n",
     "\n",
-    "        # # add prediction intervals or quantiles to models trained with point loss functions via level argument\n",
-    "        # if level is not None or quantiles is not None:\n",
-    "        #     model_names = self._get_model_names(add_level=True)\n",
-    "        #     if model_names:\n",
-    "        #         if self.prediction_intervals is None:\n",
-    "        #             raise AttributeError(\n",
-    "        #                 \"You have trained one or more models with a point loss function (e.g. MAE, MSE). \"\n",
-    "        #                 \"You then must set `prediction_intervals` during fit to use level or quantiles during predict.\")  \n",
-    "        #         prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)\n",
-    "\n",
-    "        #         fcsts_df = prediction_interval_method(\n",
-    "        #             fcsts_df,\n",
-    "        #             self._cs_df,\n",
-    "        #             model_names=list(model_names),\n",
-    "        #             level=level_ if level is not None else None,\n",
-    "        #             cs_n_windows=self.prediction_intervals.n_windows,\n",
-    "        #             n_series=len(uids),\n",
-    "        #             horizon=self.h,\n",
-    "        #             quantiles=quantiles_ if quantiles is not None else None,\n",
-    "        #         )        \n",
-    "\n",
     "        return fcsts_df\n",
     "\n",
     "    def _reset_models(self):\n",
@@ -1111,6 +1091,9 @@
     "\n",
     "        fcsts_list: List = []\n",
     "        for model in self.models:\n",
+    "            if self._add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+    "                continue\n",
+    "\n",
     "            model.fit(dataset=self.dataset,\n",
     "                        val_size=val_size, \n",
     "                        test_size=test_size)\n",
@@ -1147,7 +1130,7 @@
     "        self._fitted = True\n",
     "\n",
     "        # Add predictions to forecasts DataFrame\n",
-    "        cols = self._get_model_names()\n",
+    "        cols = self._get_model_names(add_level=self._add_level)\n",
     "        if isinstance(self.uids, pl_Series):\n",
     "            fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
     "        else:\n",
@@ -1678,6 +1661,7 @@
     "                \"Please reduce the number of windows, horizon or remove those series.\"\n",
     "            )\n",
     "        \n",
+    "        self._add_level = True\n",
     "        cv_results = self.cross_validation(\n",
     "            df=df,\n",
     "            static_df=static_df,\n",
@@ -1686,7 +1670,8 @@
     "            time_col=time_col,\n",
     "            target_col=target_col,\n",
     "        )\n",
-    "        \n",
+    "        self._add_level = False\n",
+    "\n",
     "        kept = [time_col, id_col, 'cutoff']\n",
     "        # conformity score for each model\n",
     "        for model in self._get_model_names(add_level=True):\n",
@@ -1730,10 +1715,21 @@
     "                    cols.extend(col_names)\n",
     "            # case 2: IQLoss\n",
     "            elif quantiles_ is not None and isinstance(model.loss, IQLoss):\n",
+    "                # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles\n",
+    "                quantiles_iqloss = np.linspace(0.01, 0.99, 20)\n",
+    "                fcsts_list_iqloss = []\n",
+    "                for i, quantile in enumerate(quantiles_iqloss):\n",
+    "                    model_fcsts = model.predict(dataset=dataset, quantiles = [quantile], **data_kwargs)               \n",
+    "                    fcsts_list_iqloss.append(model_fcsts)      \n",
+    "                fcsts_iqloss = np.concatenate(fcsts_list_iqloss, axis=-1)\n",
+    "\n",
+    "                # Get the actual requested quantiles\n",
+    "                model_fcsts = np.quantile(fcsts_iqloss, quantiles_, axis=-1).T\n",
+    "                fcsts_list.append(model_fcsts)      \n",
+    "\n",
+    "                # Get the right column names\n",
     "                col_names = []\n",
     "                for i, quantile in enumerate(quantiles_):\n",
-    "                    model_fcsts = model.predict(dataset=dataset, quantiles = [quantile], **data_kwargs)\n",
-    "                    fcsts_list.append(model_fcsts)      \n",
     "                    col_name = self._get_column_name(model_name, quantile, has_level)\n",
     "                    col_names.extend([col_name])                \n",
     "                cols.extend(col_names)\n",

diff --git a/neuralforecast/core.py b/neuralforecast/core.py
@@ -270,6 +270,7 @@ def __init__(
         # Flags and attributes
         self._fitted = False
         self._reset_models()
+        self._add_level = False
 
     def _scalers_fit_transform(self, dataset: TimeSeriesDataset) -> None:
         self.scalers_ = {}
@@ -998,27 +999,6 @@ def predict(
             _warn_id_as_idx()
             fcsts_df = fcsts_df.set_index(self.id_col)
 
-        # # add prediction intervals or quantiles to models trained with point loss functions via level argument
-        # if level is not None or quantiles is not None:
-        #     model_names = self._get_model_names(add_level=True)
-        #     if model_names:
-        #         if self.prediction_intervals is None:
-        #             raise AttributeError(
-        #                 "You have trained one or more models with a point loss function (e.g. MAE, MSE). "
-        #                 "You then must set `prediction_intervals` during fit to use level or quantiles during predict.")
-        #         prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)
-
-        #         fcsts_df = prediction_interval_method(
-        #             fcsts_df,
-        #             self._cs_df,
-        #             model_names=list(model_names),
-        #             level=level_ if level is not None else None,
-        #             cs_n_windows=self.prediction_intervals.n_windows,
-        #             n_series=len(uids),
-        #             horizon=self.h,
-        #             quantiles=quantiles_ if quantiles is not None else None,
-        #         )
-
         return fcsts_df
 
     def _reset_models(self):
@@ -1082,6 +1062,11 @@ def _no_refit_cross_validation(
 
         fcsts_list: List = []
         for model in self.models:
+            if self._add_level and (
+                model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)
+            ):
+                continue
+
             model.fit(dataset=self.dataset, val_size=val_size, test_size=test_size)
             model_fcsts = model.predict(
                 self.dataset, step_size=step_size, **data_kwargs
@@ -1118,7 +1103,7 @@ def _no_refit_cross_validation(
         self._fitted = True
 
         # Add predictions to forecasts DataFrame
-        cols = self._get_model_names()
+        cols = self._get_model_names(add_level=self._add_level)
         if isinstance(self.uids, pl_Series):
             fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))
         else:
@@ -1678,6 +1663,7 @@ def _conformity_scores(
                 "Please reduce the number of windows, horizon or remove those series."
             )
 
+        self._add_level = True
         cv_results = self.cross_validation(
             df=df,
             static_df=static_df,
@@ -1686,6 +1672,7 @@ def _conformity_scores(
             time_col=time_col,
             target_col=target_col,
         )
+        self._add_level = False
 
         kept = [time_col, id_col, "cutoff"]
         # conformity score for each model
@@ -1751,12 +1738,23 @@ def _generate_forecasts(
                     cols.extend(col_names)
             # case 2: IQLoss
             elif quantiles_ is not None and isinstance(model.loss, IQLoss):
-                col_names = []
-                for i, quantile in enumerate(quantiles_):
+                # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles
+                quantiles_iqloss = np.linspace(0.01, 0.99, 20)
+                fcsts_list_iqloss = []
+                for i, quantile in enumerate(quantiles_iqloss):
                     model_fcsts = model.predict(
                         dataset=dataset, quantiles=[quantile], **data_kwargs
                     )
-                    fcsts_list.append(model_fcsts)
+                    fcsts_list_iqloss.append(model_fcsts)
+                fcsts_iqloss = np.concatenate(fcsts_list_iqloss, axis=-1)
+
+                # Get the actual requested quantiles
+                model_fcsts = np.quantile(fcsts_iqloss, quantiles_, axis=-1).T
+                fcsts_list.append(model_fcsts)
+
+                # Get the right column names
+                col_names = []
+                for i, quantile in enumerate(quantiles_):
                     col_name = self._get_column_name(model_name, quantile, has_level)
                     col_names.extend([col_name])
                 cols.extend(col_names)