From 2e9379d3d1a0b12f5d0f9faa486438f8394072d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Tue, 8 Nov 2022 19:57:54 -0600 Subject: [PATCH] use pylint and fix unused arguments (#61) --- .github/workflows/lint.yaml | 9 +++------ .pylintrc | 6 ++++++ action_files/lint | 5 +++-- environment.yml | 1 + mlforecast/core.py | 10 +++++----- mlforecast/distributed/forecast.py | 2 +- mlforecast/forecast.py | 4 ++-- mlforecast/lgb_cv.py | 12 +++++++----- mlforecast/utils.py | 7 ++----- nbs/core.ipynb | 10 +++++----- nbs/distributed.forecast.ipynb | 2 +- nbs/forecast.ipynb | 4 ++-- nbs/lgb_cv.ipynb | 16 +++++++++++----- nbs/utils.ipynb | 7 ++----- settings.ini | 2 +- 15 files changed, 52 insertions(+), 45 deletions(-) create mode 100644 .pylintrc diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 61487f83..b3952739 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -19,10 +19,7 @@ jobs: python-version: 3.8 - name: Install linters - run: pip install mypy flake8 + run: pip install mypy flake8 pylint - - name: mypy - run: mypy mlforecast/ - - - name: flake8 - run: flake8 --select=F mlforecast/ + - name: Lint + run: ./action_files/lint diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..b84f4362 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,6 @@ +[MAIN] +ignore=_nbdev.py + +[MESSAGES CONTROL] +disable=all +enable=W0612,W0613 diff --git a/action_files/lint b/action_files/lint index a64248d5..2a7647a9 100755 --- a/action_files/lint +++ b/action_files/lint @@ -1,3 +1,4 @@ #!/usr/bin/env bash -mypy mlforecast -flake8 --select=F mlforecast +mypy mlforecast || exit -1 +flake8 --select=F mlforecast || exit -1 +pylint mlforecast diff --git a/environment.yml b/environment.yml index fd872e08..93dcb58a 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - numba - pandas - pip + - pylint - scikit-learn - window-ops - xgboost diff --git a/mlforecast/core.py b/mlforecast/core.py index 68d1bbc9..01ebdafa 100644 --- a/mlforecast/core.py +++ b/mlforecast/core.py @@ -154,9 +154,9 @@ def _build_transform_name(lag, tfm, *args) -> str: def simple_predict( model, new_x: pd.DataFrame, - dynamic_dfs: List[pd.DataFrame], + _dynamic_dfs: List[pd.DataFrame], features_order: List[str], - **kwargs, + **_kwargs, ) -> np.ndarray: """Drop the ds column from `new_x` and call `model.predict` on it.""" new_x = new_x[features_order] @@ -168,7 +168,7 @@ def merge_predict( new_x: pd.DataFrame, dynamic_dfs: List[pd.DataFrame], features_order: List[str], - **kwargs, + **_kwargs, ) -> np.ndarray: """Perform left join on each of `dynamic_dfs` and call model.predict.""" idx = new_x.index.name @@ -586,8 +586,8 @@ def predict( predictions = predict_fn( model, new_x, - dynamic_dfs=dynamic_dfs, - features_order=self.features_order_, + dynamic_dfs, + self.features_order_, **predict_fn_kwargs, ) self._update_y(predictions) diff --git a/mlforecast/distributed/forecast.py b/mlforecast/distributed/forecast.py index 3613f1ff..3991de22 100644 --- a/mlforecast/distributed/forecast.py +++ b/mlforecast/distributed/forecast.py @@ -150,7 +150,7 @@ def fit_models( Forecast object with trained models. """ self.models_ = [] - for i, model in enumerate(self.models): + for model in self.models: self.models_.append(clone(model).fit(X, y)) return self diff --git a/mlforecast/forecast.py b/mlforecast/forecast.py index 0d302d0c..ffb9f690 100644 --- a/mlforecast/forecast.py +++ b/mlforecast/forecast.py @@ -130,7 +130,7 @@ def fit_models( Forecast object with trained models. """ self.models_ = [] - for i, model in enumerate(self.models): + for model in self.models: self.models_.append(clone(model).fit(X, y)) return self @@ -297,7 +297,7 @@ def cross_validation( freq = self.freq for train_end, train, valid in backtest_splits( - data, n_windows, window_size, freq, time_col, target_col + data, n_windows, window_size, freq, time_col ): self.fit( train, diff --git a/mlforecast/lgb_cv.py b/mlforecast/lgb_cv.py index e6404efe..25779065 100644 --- a/mlforecast/lgb_cv.py +++ b/mlforecast/lgb_cv.py @@ -132,7 +132,7 @@ def setup( static_features: Optional[List[str]] = None, dropna: bool = True, keep_last_n: Optional[int] = None, - weights: Sequence[float] = None, + weights: Optional[Sequence[float]] = None, metric: Union[str, Callable] = "mape", ): """Initialize internal data structures to iteratively train the boosters. Use this before calling partial_fit. @@ -199,7 +199,7 @@ def setup( self.target_col = target_col params = {} if params is None else params for _, train, valid in backtest_splits( - data, n_windows, window_size, freq, time_col, target_col + data, n_windows, window_size, freq, time_col ): ts = copy.deepcopy(self.ts) prep = ts.fit_transform( @@ -351,13 +351,13 @@ def fit( time_col: str, target_col: str, num_iterations: int = 100, - params: Dict[str, Any] = None, + params: Optional[Dict[str, Any]] = None, static_features: Optional[List[str]] = None, dropna: bool = True, keep_last_n: Optional[int] = None, dynamic_dfs: Optional[List[pd.DataFrame]] = None, eval_every: int = 10, - weights: Sequence[float] = None, + weights: Optional[Sequence[float]] = None, metric: Union[str, Callable] = "mape", verbose_eval: bool = True, early_stopping_evals: int = 2, @@ -588,4 +588,6 @@ def cv_predict( result : pandas DataFrame Predictions for each serie and timestep, with one column per window. """ - return self.ts.predict(self.cv_models_, horizon) + return self.ts.predict( + self.cv_models_, horizon, dynamic_dfs, predict_fn, **predict_fn_kwargs + ) diff --git a/mlforecast/utils.py b/mlforecast/utils.py index f1c09f38..ef093364 100644 --- a/mlforecast/utils.py +++ b/mlforecast/utils.py @@ -74,7 +74,7 @@ def generate_daily_series( def generate_prices_for_series( series: pd.DataFrame, horizon: int = 7, seed: int = 0 ) -> pd.DataFrame: - rng = np.random.RandomState(0) + rng = np.random.RandomState(seed) unique_last_dates = series.groupby("unique_id")["ds"].max().nunique() if unique_last_dates > 1: raise ValueError("series must have equal ends.") @@ -102,7 +102,6 @@ def _split_info( window_size: int, freq: Union[pd.offsets.BaseOffset, int], time_col: str, - target_col: str, ): # TODO: try computing this once and passing it to this fn last_dates = data.groupby(level=0, observed=True)[time_col].transform("max") @@ -123,12 +122,11 @@ def backtest_splits( window_size: int, freq: Union[pd.offsets.BaseOffset, int], time_col: str = "ds", - target_col: str = "y", ): for i in range(n_windows): offset = (n_windows - i) * window_size if isinstance(data, pd.DataFrame): - splits = _split_info(data, offset, window_size, freq, time_col, target_col) + splits = _split_info(data, offset, window_size, freq, time_col) else: end_dtype = int if isinstance(freq, int) else "datetime64[ns]" splits = data.map_partitions( @@ -137,7 +135,6 @@ def backtest_splits( window_size=window_size, freq=freq, time_col=time_col, - target_col=target_col, meta={"train_end": end_dtype, "is_valid": bool}, ) train_mask = data[time_col].le(splits["train_end"]) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 3b9c12e7..7644824f 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -645,9 +645,9 @@ "def simple_predict(\n", " model,\n", " new_x: pd.DataFrame,\n", - " dynamic_dfs: List[pd.DataFrame],\n", + " _dynamic_dfs: List[pd.DataFrame],\n", " features_order: List[str],\n", - " **kwargs,\n", + " **_kwargs,\n", ") -> np.ndarray:\n", " \"\"\"Drop the ds column from `new_x` and call `model.predict` on it.\"\"\"\n", " new_x = new_x[features_order]\n", @@ -659,7 +659,7 @@ " new_x: pd.DataFrame,\n", " dynamic_dfs: List[pd.DataFrame],\n", " features_order: List[str],\n", - " **kwargs,\n", + " **_kwargs,\n", ") -> np.ndarray:\n", " \"\"\"Perform left join on each of `dynamic_dfs` and call model.predict.\"\"\"\n", " idx = new_x.index.name\n", @@ -1102,8 +1102,8 @@ " predictions = predict_fn(\n", " model,\n", " new_x,\n", - " dynamic_dfs=dynamic_dfs,\n", - " features_order=self.features_order_,\n", + " dynamic_dfs,\n", + " self.features_order_,\n", " **predict_fn_kwargs,\n", " )\n", " self._update_y(predictions)\n", diff --git a/nbs/distributed.forecast.ipynb b/nbs/distributed.forecast.ipynb index da3952bf..95d83e4b 100644 --- a/nbs/distributed.forecast.ipynb +++ b/nbs/distributed.forecast.ipynb @@ -220,7 +220,7 @@ " Forecast object with trained models.\n", " \"\"\"\n", " self.models_ = []\n", - " for i, model in enumerate(self.models):\n", + " for model in self.models:\n", " self.models_.append(clone(model).fit(X, y))\n", " return self \n", "\n", diff --git a/nbs/forecast.ipynb b/nbs/forecast.ipynb index e3e8edf4..44d7a2b9 100644 --- a/nbs/forecast.ipynb +++ b/nbs/forecast.ipynb @@ -196,7 +196,7 @@ " Forecast object with trained models.\n", " \"\"\"\n", " self.models_ = []\n", - " for i, model in enumerate(self.models):\n", + " for model in self.models:\n", " self.models_.append(clone(model).fit(X, y))\n", " return self\n", "\n", @@ -355,7 +355,7 @@ " else:\n", " freq = self.freq\n", "\n", - " for train_end, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col, target_col):\n", + " for train_end, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col):\n", " self.fit(train, 'index', time_col, target_col, static_features, dropna, keep_last_n)\n", " self.cv_models_.append(self.models_)\n", " y_pred = self.predict(\n", diff --git a/nbs/lgb_cv.ipynb b/nbs/lgb_cv.ipynb index 5b6428dc..ffd52451 100644 --- a/nbs/lgb_cv.ipynb +++ b/nbs/lgb_cv.ipynb @@ -181,7 +181,7 @@ " static_features: Optional[List[str]] = None,\n", " dropna: bool = True,\n", " keep_last_n: Optional[int] = None,\n", - " weights: Sequence[float] = None,\n", + " weights: Optional[Sequence[float]] = None,\n", " metric: Union[str, Callable] = 'mape',\n", " ):\n", " \"\"\"Initialize internal data structures to iteratively train the boosters. Use this before calling partial_fit.\n", @@ -245,7 +245,7 @@ " self.time_col = time_col\n", " self.target_col = target_col\n", " params = {} if params is None else params\n", - " for _, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col, target_col):\n", + " for _, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col):\n", " ts = copy.deepcopy(self.ts)\n", " prep = ts.fit_transform(train, 'index', time_col, target_col, static_features, dropna, keep_last_n)\n", " ds = lgb.Dataset(prep.drop(columns=[time_col, target_col]), prep[target_col]).construct()\n", @@ -371,13 +371,13 @@ " time_col: str,\n", " target_col: str,\n", " num_iterations: int = 100,\n", - " params: Dict[str, Any] = None,\n", + " params: Optional[Dict[str, Any]] = None,\n", " static_features: Optional[List[str]] = None,\n", " dropna: bool = True,\n", " keep_last_n: Optional[int] = None,\n", " dynamic_dfs: Optional[List[pd.DataFrame]] = None,\n", " eval_every: int = 10,\n", - " weights: Sequence[float] = None,\n", + " weights: Optional[Sequence[float]] = None,\n", " metric: Union[str, Callable] = 'mape',\n", " verbose_eval: bool = True,\n", " early_stopping_evals: int = 2,\n", @@ -600,7 +600,13 @@ " result : pandas DataFrame\n", " Predictions for each serie and timestep, with one column per window.\n", " \"\"\"\n", - " return self.ts.predict(self.cv_models_, horizon)" + " return self.ts.predict(\n", + " self.cv_models_,\n", + " horizon,\n", + " dynamic_dfs,\n", + " predict_fn,\n", + " **predict_fn_kwargs\n", + " )" ] }, { diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index b7f021eb..01897b93 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -475,7 +475,7 @@ "source": [ "#|export\n", "def generate_prices_for_series(series: pd.DataFrame, horizon: int = 7, seed: int = 0) -> pd.DataFrame:\n", - " rng = np.random.RandomState(0)\n", + " rng = np.random.RandomState(seed)\n", " unique_last_dates = series.groupby('unique_id')['ds'].max().nunique()\n", " if unique_last_dates > 1:\n", " raise ValueError('series must have equal ends.')\n", @@ -653,7 +653,6 @@ " window_size: int,\n", " freq: Union[pd.offsets.BaseOffset, int],\n", " time_col: str,\n", - " target_col: str,\n", "):\n", " # TODO: try computing this once and passing it to this fn\n", " last_dates = data.groupby(level=0, observed=True)[time_col].transform('max')\n", @@ -681,12 +680,11 @@ " window_size: int,\n", " freq: Union[pd.offsets.BaseOffset, int],\n", " time_col: str = 'ds',\n", - " target_col: str = 'y',\n", "):\n", " for i in range(n_windows):\n", " offset = (n_windows - i) * window_size\n", " if isinstance(data, pd.DataFrame):\n", - " splits = _split_info(data, offset, window_size, freq, time_col, target_col)\n", + " splits = _split_info(data, offset, window_size, freq, time_col)\n", " else:\n", " end_dtype = int if isinstance(freq, int) else 'datetime64[ns]'\n", " splits = data.map_partitions(\n", @@ -695,7 +693,6 @@ " window_size=window_size,\n", " freq=freq,\n", " time_col=time_col,\n", - " target_col=target_col,\n", " meta={'train_end': end_dtype, 'is_valid': bool}\n", " )\n", " train_mask = data[time_col].le(splits['train_end'])\n", diff --git a/settings.ini b/settings.ini index 194c5004..a1bd8e35 100644 --- a/settings.ini +++ b/settings.ini @@ -17,7 +17,7 @@ license = apache2 status = 3 requirements = numba pandas scikit-learn window-ops distributed_requirements = dask[complete] -dev_requirements = black datasetsforecast flake8 lightgbm matplotlib mypy nbdev xgboost +dev_requirements = black datasetsforecast flake8 lightgbm matplotlib mypy nbdev pylint xgboost nbs_path = nbs doc_path = _docs recursive = False