Skip to content

Commit

Permalink
use pylint and fix unused arguments (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Nov 9, 2022
1 parent 8d86148 commit 2e9379d
Show file tree
Hide file tree
Showing 15 changed files with 52 additions and 45 deletions.
9 changes: 3 additions & 6 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ jobs:
python-version: 3.8

- name: Install linters
run: pip install mypy flake8
run: pip install mypy flake8 pylint

- name: mypy
run: mypy mlforecast/

- name: flake8
run: flake8 --select=F mlforecast/
- name: Lint
run: ./action_files/lint
6 changes: 6 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[MAIN]
ignore=_nbdev.py

[MESSAGES CONTROL]
disable=all
enable=W0612,W0613
5 changes: 3 additions & 2 deletions action_files/lint
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
mypy mlforecast
flake8 --select=F mlforecast
mypy mlforecast || exit -1
flake8 --select=F mlforecast || exit -1
pylint mlforecast
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- numba
- pandas
- pip
- pylint
- scikit-learn
- window-ops
- xgboost
Expand Down
10 changes: 5 additions & 5 deletions mlforecast/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ def _build_transform_name(lag, tfm, *args) -> str:
def simple_predict(
model,
new_x: pd.DataFrame,
dynamic_dfs: List[pd.DataFrame],
_dynamic_dfs: List[pd.DataFrame],
features_order: List[str],
**kwargs,
**_kwargs,
) -> np.ndarray:
"""Drop the ds column from `new_x` and call `model.predict` on it."""
new_x = new_x[features_order]
Expand All @@ -168,7 +168,7 @@ def merge_predict(
new_x: pd.DataFrame,
dynamic_dfs: List[pd.DataFrame],
features_order: List[str],
**kwargs,
**_kwargs,
) -> np.ndarray:
"""Perform left join on each of `dynamic_dfs` and call model.predict."""
idx = new_x.index.name
Expand Down Expand Up @@ -586,8 +586,8 @@ def predict(
predictions = predict_fn(
model,
new_x,
dynamic_dfs=dynamic_dfs,
features_order=self.features_order_,
dynamic_dfs,
self.features_order_,
**predict_fn_kwargs,
)
self._update_y(predictions)
Expand Down
2 changes: 1 addition & 1 deletion mlforecast/distributed/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def fit_models(
Forecast object with trained models.
"""
self.models_ = []
for i, model in enumerate(self.models):
for model in self.models:
self.models_.append(clone(model).fit(X, y))
return self

Expand Down
4 changes: 2 additions & 2 deletions mlforecast/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def fit_models(
Forecast object with trained models.
"""
self.models_ = []
for i, model in enumerate(self.models):
for model in self.models:
self.models_.append(clone(model).fit(X, y))
return self

Expand Down Expand Up @@ -297,7 +297,7 @@ def cross_validation(
freq = self.freq

for train_end, train, valid in backtest_splits(
data, n_windows, window_size, freq, time_col, target_col
data, n_windows, window_size, freq, time_col
):
self.fit(
train,
Expand Down
12 changes: 7 additions & 5 deletions mlforecast/lgb_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def setup(
static_features: Optional[List[str]] = None,
dropna: bool = True,
keep_last_n: Optional[int] = None,
weights: Sequence[float] = None,
weights: Optional[Sequence[float]] = None,
metric: Union[str, Callable] = "mape",
):
"""Initialize internal data structures to iteratively train the boosters. Use this before calling partial_fit.
Expand Down Expand Up @@ -199,7 +199,7 @@ def setup(
self.target_col = target_col
params = {} if params is None else params
for _, train, valid in backtest_splits(
data, n_windows, window_size, freq, time_col, target_col
data, n_windows, window_size, freq, time_col
):
ts = copy.deepcopy(self.ts)
prep = ts.fit_transform(
Expand Down Expand Up @@ -351,13 +351,13 @@ def fit(
time_col: str,
target_col: str,
num_iterations: int = 100,
params: Dict[str, Any] = None,
params: Optional[Dict[str, Any]] = None,
static_features: Optional[List[str]] = None,
dropna: bool = True,
keep_last_n: Optional[int] = None,
dynamic_dfs: Optional[List[pd.DataFrame]] = None,
eval_every: int = 10,
weights: Sequence[float] = None,
weights: Optional[Sequence[float]] = None,
metric: Union[str, Callable] = "mape",
verbose_eval: bool = True,
early_stopping_evals: int = 2,
Expand Down Expand Up @@ -588,4 +588,6 @@ def cv_predict(
result : pandas DataFrame
Predictions for each serie and timestep, with one column per window.
"""
return self.ts.predict(self.cv_models_, horizon)
return self.ts.predict(
self.cv_models_, horizon, dynamic_dfs, predict_fn, **predict_fn_kwargs
)
7 changes: 2 additions & 5 deletions mlforecast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def generate_daily_series(
def generate_prices_for_series(
series: pd.DataFrame, horizon: int = 7, seed: int = 0
) -> pd.DataFrame:
rng = np.random.RandomState(0)
rng = np.random.RandomState(seed)
unique_last_dates = series.groupby("unique_id")["ds"].max().nunique()
if unique_last_dates > 1:
raise ValueError("series must have equal ends.")
Expand Down Expand Up @@ -102,7 +102,6 @@ def _split_info(
window_size: int,
freq: Union[pd.offsets.BaseOffset, int],
time_col: str,
target_col: str,
):
# TODO: try computing this once and passing it to this fn
last_dates = data.groupby(level=0, observed=True)[time_col].transform("max")
Expand All @@ -123,12 +122,11 @@ def backtest_splits(
window_size: int,
freq: Union[pd.offsets.BaseOffset, int],
time_col: str = "ds",
target_col: str = "y",
):
for i in range(n_windows):
offset = (n_windows - i) * window_size
if isinstance(data, pd.DataFrame):
splits = _split_info(data, offset, window_size, freq, time_col, target_col)
splits = _split_info(data, offset, window_size, freq, time_col)
else:
end_dtype = int if isinstance(freq, int) else "datetime64[ns]"
splits = data.map_partitions(
Expand All @@ -137,7 +135,6 @@ def backtest_splits(
window_size=window_size,
freq=freq,
time_col=time_col,
target_col=target_col,
meta={"train_end": end_dtype, "is_valid": bool},
)
train_mask = data[time_col].le(splits["train_end"])
Expand Down
10 changes: 5 additions & 5 deletions nbs/core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -645,9 +645,9 @@
"def simple_predict(\n",
" model,\n",
" new_x: pd.DataFrame,\n",
" dynamic_dfs: List[pd.DataFrame],\n",
" _dynamic_dfs: List[pd.DataFrame],\n",
" features_order: List[str],\n",
" **kwargs,\n",
" **_kwargs,\n",
") -> np.ndarray:\n",
" \"\"\"Drop the ds column from `new_x` and call `model.predict` on it.\"\"\"\n",
" new_x = new_x[features_order]\n",
Expand All @@ -659,7 +659,7 @@
" new_x: pd.DataFrame,\n",
" dynamic_dfs: List[pd.DataFrame],\n",
" features_order: List[str],\n",
" **kwargs,\n",
" **_kwargs,\n",
") -> np.ndarray:\n",
" \"\"\"Perform left join on each of `dynamic_dfs` and call model.predict.\"\"\"\n",
" idx = new_x.index.name\n",
Expand Down Expand Up @@ -1102,8 +1102,8 @@
" predictions = predict_fn(\n",
" model,\n",
" new_x,\n",
" dynamic_dfs=dynamic_dfs,\n",
" features_order=self.features_order_,\n",
" dynamic_dfs,\n",
" self.features_order_,\n",
" **predict_fn_kwargs,\n",
" )\n",
" self._update_y(predictions)\n",
Expand Down
2 changes: 1 addition & 1 deletion nbs/distributed.forecast.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@
" Forecast object with trained models.\n",
" \"\"\"\n",
" self.models_ = []\n",
" for i, model in enumerate(self.models):\n",
" for model in self.models:\n",
" self.models_.append(clone(model).fit(X, y))\n",
" return self \n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions nbs/forecast.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
" Forecast object with trained models.\n",
" \"\"\"\n",
" self.models_ = []\n",
" for i, model in enumerate(self.models):\n",
" for model in self.models:\n",
" self.models_.append(clone(model).fit(X, y))\n",
" return self\n",
"\n",
Expand Down Expand Up @@ -355,7 +355,7 @@
" else:\n",
" freq = self.freq\n",
"\n",
" for train_end, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col, target_col):\n",
" for train_end, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col):\n",
" self.fit(train, 'index', time_col, target_col, static_features, dropna, keep_last_n)\n",
" self.cv_models_.append(self.models_)\n",
" y_pred = self.predict(\n",
Expand Down
16 changes: 11 additions & 5 deletions nbs/lgb_cv.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
" static_features: Optional[List[str]] = None,\n",
" dropna: bool = True,\n",
" keep_last_n: Optional[int] = None,\n",
" weights: Sequence[float] = None,\n",
" weights: Optional[Sequence[float]] = None,\n",
" metric: Union[str, Callable] = 'mape',\n",
" ):\n",
" \"\"\"Initialize internal data structures to iteratively train the boosters. Use this before calling partial_fit.\n",
Expand Down Expand Up @@ -245,7 +245,7 @@
" self.time_col = time_col\n",
" self.target_col = target_col\n",
" params = {} if params is None else params\n",
" for _, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col, target_col):\n",
" for _, train, valid in backtest_splits(data, n_windows, window_size, freq, time_col):\n",
" ts = copy.deepcopy(self.ts)\n",
" prep = ts.fit_transform(train, 'index', time_col, target_col, static_features, dropna, keep_last_n)\n",
" ds = lgb.Dataset(prep.drop(columns=[time_col, target_col]), prep[target_col]).construct()\n",
Expand Down Expand Up @@ -371,13 +371,13 @@
" time_col: str,\n",
" target_col: str,\n",
" num_iterations: int = 100,\n",
" params: Dict[str, Any] = None,\n",
" params: Optional[Dict[str, Any]] = None,\n",
" static_features: Optional[List[str]] = None,\n",
" dropna: bool = True,\n",
" keep_last_n: Optional[int] = None,\n",
" dynamic_dfs: Optional[List[pd.DataFrame]] = None,\n",
" eval_every: int = 10,\n",
" weights: Sequence[float] = None,\n",
" weights: Optional[Sequence[float]] = None,\n",
" metric: Union[str, Callable] = 'mape',\n",
" verbose_eval: bool = True,\n",
" early_stopping_evals: int = 2,\n",
Expand Down Expand Up @@ -600,7 +600,13 @@
" result : pandas DataFrame\n",
" Predictions for each serie and timestep, with one column per window.\n",
" \"\"\"\n",
" return self.ts.predict(self.cv_models_, horizon)"
" return self.ts.predict(\n",
" self.cv_models_,\n",
" horizon,\n",
" dynamic_dfs,\n",
" predict_fn,\n",
" **predict_fn_kwargs\n",
" )"
]
},
{
Expand Down
7 changes: 2 additions & 5 deletions nbs/utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@
"source": [
"#|export\n",
"def generate_prices_for_series(series: pd.DataFrame, horizon: int = 7, seed: int = 0) -> pd.DataFrame:\n",
" rng = np.random.RandomState(0)\n",
" rng = np.random.RandomState(seed)\n",
" unique_last_dates = series.groupby('unique_id')['ds'].max().nunique()\n",
" if unique_last_dates > 1:\n",
" raise ValueError('series must have equal ends.')\n",
Expand Down Expand Up @@ -653,7 +653,6 @@
" window_size: int,\n",
" freq: Union[pd.offsets.BaseOffset, int],\n",
" time_col: str,\n",
" target_col: str,\n",
"):\n",
" # TODO: try computing this once and passing it to this fn\n",
" last_dates = data.groupby(level=0, observed=True)[time_col].transform('max')\n",
Expand Down Expand Up @@ -681,12 +680,11 @@
" window_size: int,\n",
" freq: Union[pd.offsets.BaseOffset, int],\n",
" time_col: str = 'ds',\n",
" target_col: str = 'y',\n",
"):\n",
" for i in range(n_windows):\n",
" offset = (n_windows - i) * window_size\n",
" if isinstance(data, pd.DataFrame):\n",
" splits = _split_info(data, offset, window_size, freq, time_col, target_col)\n",
" splits = _split_info(data, offset, window_size, freq, time_col)\n",
" else:\n",
" end_dtype = int if isinstance(freq, int) else 'datetime64[ns]'\n",
" splits = data.map_partitions(\n",
Expand All @@ -695,7 +693,6 @@
" window_size=window_size,\n",
" freq=freq,\n",
" time_col=time_col,\n",
" target_col=target_col,\n",
" meta={'train_end': end_dtype, 'is_valid': bool}\n",
" )\n",
" train_mask = data[time_col].le(splits['train_end'])\n",
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ license = apache2
status = 3
requirements = numba pandas scikit-learn window-ops
distributed_requirements = dask[complete]
dev_requirements = black datasetsforecast flake8 lightgbm matplotlib mypy nbdev xgboost
dev_requirements = black datasetsforecast flake8 lightgbm matplotlib mypy nbdev pylint xgboost
nbs_path = nbs
doc_path = _docs
recursive = False
Expand Down

0 comments on commit 2e9379d

Please sign in to comment.