use pylint and fix unused arguments (#61)

Nixtla · Nov 9, 2022 · 2e9379d · 2e9379d
1 parent 8d86148
commit 2e9379d
Show file tree

Hide file tree

Showing 15 changed files with 52 additions and 45 deletions.
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -19,10 +19,7 @@ jobs:
           python-version: 3.8
 
       - name: Install linters
-        run: pip install mypy flake8
+        run: pip install mypy flake8 pylint
 
-      - name: mypy
-        run: mypy mlforecast/
-
-      - name: flake8
-        run: flake8 --select=F mlforecast/
+      - name: Lint
+        run: ./action_files/lint
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,6 @@
+[MAIN]
+ignore=_nbdev.py
+
+[MESSAGES CONTROL]
+disable=all
+enable=W0612,W0613
diff --git a/action_files/lint b/action_files/lint
@@ -1,3 +1,4 @@
 #!/usr/bin/env bash
-mypy mlforecast
-flake8 --select=F mlforecast
+mypy mlforecast || exit -1
+flake8 --select=F mlforecast || exit -1
+pylint mlforecast
diff --git a/environment.yml b/environment.yml
@@ -11,6 +11,7 @@ dependencies:
   - numba
   - pandas
   - pip
+  - pylint
   - scikit-learn
   - window-ops
   - xgboost

diff --git a/mlforecast/core.py b/mlforecast/core.py
@@ -154,9 +154,9 @@ def _build_transform_name(lag, tfm, *args) -> str:
 def simple_predict(
     model,
     new_x: pd.DataFrame,
-    dynamic_dfs: List[pd.DataFrame],
+    _dynamic_dfs: List[pd.DataFrame],
     features_order: List[str],
-    **kwargs,
+    **_kwargs,
 ) -> np.ndarray:
     """Drop the ds column from `new_x` and call `model.predict` on it."""
     new_x = new_x[features_order]
@@ -168,7 +168,7 @@ def merge_predict(
     new_x: pd.DataFrame,
     dynamic_dfs: List[pd.DataFrame],
     features_order: List[str],
-    **kwargs,
+    **_kwargs,
 ) -> np.ndarray:
     """Perform left join on each of `dynamic_dfs` and call model.predict."""
     idx = new_x.index.name
@@ -586,8 +586,8 @@ def predict(
                 predictions = predict_fn(
                     model,
                     new_x,
-                    dynamic_dfs=dynamic_dfs,
-                    features_order=self.features_order_,
+                    dynamic_dfs,
+                    self.features_order_,
                     **predict_fn_kwargs,
                 )
                 self._update_y(predictions)

diff --git a/mlforecast/distributed/forecast.py b/mlforecast/distributed/forecast.py
@@ -150,7 +150,7 @@ def fit_models(
             Forecast object with trained models.
         """
         self.models_ = []
-        for i, model in enumerate(self.models):
+        for model in self.models:
             self.models_.append(clone(model).fit(X, y))
         return self
 

diff --git a/mlforecast/forecast.py b/mlforecast/forecast.py
@@ -130,7 +130,7 @@ def fit_models(
             Forecast object with trained models.
         """
         self.models_ = []
-        for i, model in enumerate(self.models):
+        for model in self.models:
             self.models_.append(clone(model).fit(X, y))
         return self
 
@@ -297,7 +297,7 @@ def cross_validation(
             freq = self.freq
 
         for train_end, train, valid in backtest_splits(
-            data, n_windows, window_size, freq, time_col, target_col
+            data, n_windows, window_size, freq, time_col
         ):
             self.fit(
                 train,

diff --git a/mlforecast/lgb_cv.py b/mlforecast/lgb_cv.py
@@ -132,7 +132,7 @@ def setup(
         static_features: Optional[List[str]] = None,
         dropna: bool = True,
         keep_last_n: Optional[int] = None,
-        weights: Sequence[float] = None,
+        weights: Optional[Sequence[float]] = None,
         metric: Union[str, Callable] = "mape",
     ):
         """Initialize internal data structures to iteratively train the boosters. Use this before calling partial_fit.
@@ -199,7 +199,7 @@ def setup(
         self.target_col = target_col
         params = {} if params is None else params
         for _, train, valid in backtest_splits(
-            data, n_windows, window_size, freq, time_col, target_col
+            data, n_windows, window_size, freq, time_col
         ):
             ts = copy.deepcopy(self.ts)
             prep = ts.fit_transform(
@@ -351,13 +351,13 @@ def fit(
         time_col: str,
         target_col: str,
         num_iterations: int = 100,
-        params: Dict[str, Any] = None,
+        params: Optional[Dict[str, Any]] = None,
         static_features: Optional[List[str]] = None,
         dropna: bool = True,
         keep_last_n: Optional[int] = None,
         dynamic_dfs: Optional[List[pd.DataFrame]] = None,
         eval_every: int = 10,
-        weights: Sequence[float] = None,
+        weights: Optional[Sequence[float]] = None,
         metric: Union[str, Callable] = "mape",
         verbose_eval: bool = True,
         early_stopping_evals: int = 2,
@@ -588,4 +588,6 @@ def cv_predict(
         result : pandas DataFrame
             Predictions for each serie and timestep, with one column per window.
         """
-        return self.ts.predict(self.cv_models_, horizon)
+        return self.ts.predict(
+            self.cv_models_, horizon, dynamic_dfs, predict_fn, **predict_fn_kwargs
+        )
diff --git a/mlforecast/utils.py b/mlforecast/utils.py
@@ -74,7 +74,7 @@ def generate_daily_series(
 def generate_prices_for_series(
     series: pd.DataFrame, horizon: int = 7, seed: int = 0
 ) -> pd.DataFrame:
-    rng = np.random.RandomState(0)
+    rng = np.random.RandomState(seed)
     unique_last_dates = series.groupby("unique_id")["ds"].max().nunique()
     if unique_last_dates > 1:
         raise ValueError("series must have equal ends.")
@@ -102,7 +102,6 @@ def _split_info(
     window_size: int,
     freq: Union[pd.offsets.BaseOffset, int],
     time_col: str,
-    target_col: str,
 ):
     # TODO: try computing this once and passing it to this fn
     last_dates = data.groupby(level=0, observed=True)[time_col].transform("max")
@@ -123,12 +122,11 @@ def backtest_splits(
     window_size: int,
     freq: Union[pd.offsets.BaseOffset, int],
     time_col: str = "ds",
-    target_col: str = "y",
 ):
     for i in range(n_windows):
         offset = (n_windows - i) * window_size
         if isinstance(data, pd.DataFrame):
-            splits = _split_info(data, offset, window_size, freq, time_col, target_col)
+            splits = _split_info(data, offset, window_size, freq, time_col)
         else:
             end_dtype = int if isinstance(freq, int) else "datetime64[ns]"
             splits = data.map_partitions(
@@ -137,7 +135,6 @@ def backtest_splits(
                 window_size=window_size,
                 freq=freq,
                 time_col=time_col,
-                target_col=target_col,
                 meta={"train_end": end_dtype, "is_valid": bool},
             )
         train_mask = data[time_col].le(splits["train_end"])

diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -645,9 +645,9 @@
     "def simple_predict(\n",
     "    model,\n",
     "    new_x: pd.DataFrame,\n",
-    "    dynamic_dfs: List[pd.DataFrame],\n",
+    "    _dynamic_dfs: List[pd.DataFrame],\n",
     "    features_order: List[str],\n",
-    "    **kwargs,\n",
+    "    **_kwargs,\n",
     ") -> np.ndarray:\n",
     "    \"\"\"Drop the ds column from `new_x` and call `model.predict` on it.\"\"\"\n",
     "    new_x = new_x[features_order]\n",
@@ -659,7 +659,7 @@
     "    new_x: pd.DataFrame,\n",
     "    dynamic_dfs: List[pd.DataFrame],\n",
     "    features_order: List[str],\n",
-    "    **kwargs,\n",
+    "    **_kwargs,\n",
     ") -> np.ndarray:\n",
     "    \"\"\"Perform left join on each of `dynamic_dfs` and call model.predict.\"\"\"\n",
     "    idx = new_x.index.name\n",
@@ -1102,8 +1102,8 @@
     "                predictions = predict_fn(\n",
     "                    model,\n",
     "                    new_x,\n",
-    "                    dynamic_dfs=dynamic_dfs,\n",
-    "                    features_order=self.features_order_,\n",
+    "                    dynamic_dfs,\n",
+    "                    self.features_order_,\n",
     "                    **predict_fn_kwargs,\n",
     "                )\n",
     "                self._update_y(predictions)\n",

diff --git a/nbs/distributed.forecast.ipynb b/nbs/distributed.forecast.ipynb
@@ -220,7 +220,7 @@
     "            Forecast object with trained models.\n",
     "        \"\"\"\n",
     "        self.models_ = []\n",
-    "        for i, model in enumerate(self.models):\n",
+    "        for model in self.models:\n",
     "            self.models_.append(clone(model).fit(X, y))\n",
     "        return self    \n",
     "\n",

diff --git a/nbs/forecast.ipynb b/nbs/forecast.ipynb
@@ -196,7 +196,7 @@
     "            Forecast object with trained models.\n",
     "        \"\"\"\n",
     "        self.models_ = []\n",
-    "        for i, model in enumerate(self.models):\n",
+    "        for model in self.models:\n",
     "            self.models_.append(clone(model).fit(X, y))\n",
     "        return self\n",
     "\n",
@@ -355,7 +355,7 @@
     "        else:\n",
     "            freq = self.freq\n",
     "\n",
-    "        for train_end, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col, target_col):\n",
+    "        for train_end, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col):\n",
     "            self.fit(train, 'index', time_col, target_col, static_features, dropna, keep_last_n)\n",
     "            self.cv_models_.append(self.models_)\n",
     "            y_pred = self.predict(\n",

diff --git a/nbs/lgb_cv.ipynb b/nbs/lgb_cv.ipynb
@@ -181,7 +181,7 @@
     "        static_features: Optional[List[str]] = None,\n",
     "        dropna: bool = True,\n",
     "        keep_last_n: Optional[int] = None,\n",
-    "        weights: Sequence[float] = None,\n",
+    "        weights: Optional[Sequence[float]] = None,\n",
     "        metric: Union[str, Callable] = 'mape',\n",
     "    ):\n",
     "        \"\"\"Initialize internal data structures to iteratively train the boosters. Use this before calling partial_fit.\n",
@@ -245,7 +245,7 @@
     "        self.time_col = time_col\n",
     "        self.target_col = target_col\n",
     "        params = {} if params is None else params\n",
-    "        for _, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col, target_col):\n",
+    "        for _, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col):\n",
     "            ts = copy.deepcopy(self.ts)\n",
     "            prep = ts.fit_transform(train, 'index', time_col, target_col, static_features, dropna, keep_last_n)\n",
     "            ds = lgb.Dataset(prep.drop(columns=[time_col, target_col]), prep[target_col]).construct()\n",
@@ -371,13 +371,13 @@
     "        time_col: str,\n",
     "        target_col: str,\n",
     "        num_iterations: int = 100,\n",
-    "        params: Dict[str, Any] = None,\n",
+    "        params: Optional[Dict[str, Any]] = None,\n",
     "        static_features: Optional[List[str]] = None,\n",
     "        dropna: bool = True,\n",
     "        keep_last_n: Optional[int] = None,\n",
     "        dynamic_dfs: Optional[List[pd.DataFrame]] = None,\n",
     "        eval_every: int = 10,\n",
-    "        weights: Sequence[float] = None,\n",
+    "        weights: Optional[Sequence[float]] = None,\n",
     "        metric: Union[str, Callable] = 'mape',\n",
     "        verbose_eval: bool = True,\n",
     "        early_stopping_evals: int = 2,\n",
@@ -600,7 +600,13 @@
     "        result : pandas DataFrame\n",
     "            Predictions for each serie and timestep, with one column per window.\n",
     "        \"\"\"\n",
-    "        return self.ts.predict(self.cv_models_, horizon)"
+    "        return self.ts.predict(\n",
+    "            self.cv_models_,\n",
+    "            horizon,\n",
+    "            dynamic_dfs,\n",
+    "            predict_fn,\n",
+    "            **predict_fn_kwargs\n",
+    "        )"
    ]
   },
   {

diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb
@@ -475,7 +475,7 @@
    "source": [
     "#|export\n",
     "def generate_prices_for_series(series: pd.DataFrame, horizon: int = 7, seed: int = 0) -> pd.DataFrame:\n",
-    "    rng = np.random.RandomState(0)\n",
+    "    rng = np.random.RandomState(seed)\n",
     "    unique_last_dates = series.groupby('unique_id')['ds'].max().nunique()\n",
     "    if unique_last_dates > 1:\n",
     "        raise ValueError('series must have equal ends.')\n",
@@ -653,7 +653,6 @@
     "    window_size: int,\n",
     "    freq: Union[pd.offsets.BaseOffset, int],\n",
     "    time_col: str,\n",
-    "    target_col: str,\n",
     "):\n",
     "    # TODO: try computing this once and passing it to this fn\n",
     "    last_dates = data.groupby(level=0, observed=True)[time_col].transform('max')\n",
@@ -681,12 +680,11 @@
     "    window_size: int,\n",
     "    freq: Union[pd.offsets.BaseOffset, int],\n",
     "    time_col: str = 'ds',\n",
-    "    target_col: str = 'y',\n",
     "):\n",
     "    for i in range(n_windows):\n",
     "        offset = (n_windows - i) * window_size\n",
     "        if isinstance(data, pd.DataFrame):\n",
-    "            splits = _split_info(data, offset, window_size, freq, time_col, target_col)\n",
+    "            splits = _split_info(data, offset, window_size, freq, time_col)\n",
     "        else:\n",
     "            end_dtype = int if isinstance(freq, int) else 'datetime64[ns]'\n",
     "            splits = data.map_partitions(\n",
@@ -695,7 +693,6 @@
     "                window_size=window_size,\n",
     "                freq=freq,\n",
     "                time_col=time_col,\n",
-    "                target_col=target_col,\n",
     "                meta={'train_end': end_dtype, 'is_valid': bool}\n",
     "            )\n",
     "        train_mask = data[time_col].le(splits['train_end'])\n",

diff --git a/settings.ini b/settings.ini
@@ -17,7 +17,7 @@ license = apache2
 status = 3
 requirements = numba pandas scikit-learn window-ops
 distributed_requirements = dask[complete]
-dev_requirements = black datasetsforecast flake8 lightgbm matplotlib mypy nbdev xgboost
+dev_requirements = black datasetsforecast flake8 lightgbm matplotlib mypy nbdev pylint xgboost
 nbs_path = nbs
 doc_path = _docs
 recursive = False
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,6 +11,7 @@ dependencies: @@
       - numba
       - pandas
       - pip
+      - pylint
       - scikit-learn
       - window-ops
       - xgboost
@@ Expand Down @@