diff --git a/README.rst b/README.rst index 68aa22f..b507fcb 100644 --- a/README.rst +++ b/README.rst @@ -48,7 +48,8 @@ Main Features: * **GASearchCV**: Principal class of the package, holds the evolutionary cross validation optimization routine. * **Algorithms**: Set of different evolutionary algorithms to use as optimization procedure. -* **Callbacks**: Custom evaluation strategies to generate early stopping rules, logging or custom logic. +* **Callbacks**: Custom evaluation strategies to generate early stopping rules, + logging (into TensorBoard, .pkl files, etc) or your custom logic. * **Plots**: Generate pre-defined plots to understand the optimization process. * **MLflow**: Build-in integration with mlflow to log all the hyperparameters, cv-scores and the fitted models. @@ -153,16 +154,13 @@ Contributions are more than welcome! There are lots of opportunities on the on going project, so please get in touch if you would like to help out. Also check the `Contribution guide `_ +Big thanks to the people who are helping this project! + +|Contributors|_ + Testing ####### After installation, you can launch the test suite from outside the source directory:: pytest sklearn_genetic - - -Current Contributors -#################### -Big thanks to the people who are helping this project! - -|Contributors|_ diff --git a/dev-requirements.txt b/dev-requirements.txt index 527febf..68f03f3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -14,3 +14,4 @@ sphinx_rtd_theme sphinx-copybutton numpydoc nbsphinx +tensorflow>=2.0.0 diff --git a/docs/api/callbacks.rst b/docs/api/callbacks.rst index 13708b9..3326e5e 100644 --- a/docs/api/callbacks.rst +++ b/docs/api/callbacks.rst @@ -9,9 +9,10 @@ Callbacks DeltaThreshold TimerStopping ThresholdStopping - ThresholdStopping + TensorBoard LogbookSaver + .. autoclass:: sklearn_genetic.callbacks.base.BaseCallback :members: :undoc-members: False @@ -32,6 +33,10 @@ Callbacks :members: :undoc-members: False +.. autoclass:: TensorBoard + :members: + :undoc-members: False + .. autoclass:: LogbookSaver :members: :undoc-members: False diff --git a/docs/images/tensorboard_log.png b/docs/images/tensorboard_log.png new file mode 100644 index 0000000..7049af6 Binary files /dev/null and b/docs/images/tensorboard_log.png differ diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 5363a07..9dcd2d1 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -12,6 +12,9 @@ This is the current development version, these features are not yet available th Features: ^^^^^^^^^ +* Added the :class:`~sklearn_genetic.callbacks.TensorBoard` callback to log the + generation metrics, watch in real time while the models are trained + and compare different runs in your TensorBoard instance. * Added the :class:`~sklearn_genetic.callbacks.TimerStopping` callback to stop the iterations after a total (threshold) fitting time has been elapsed. * Added new parallel coordinates plot in :func:`~sklearn_genetic.plots.plot_parallel_coordinates`. @@ -33,8 +36,9 @@ Docs: ^^^^^ * Edited all demos to be in the jupyter notebook format. -* Added embedded jupyter notebooks examples -* The modules of the package now have a summary of their classes/functions in the docs +* Added embedded jupyter notebooks examples. +* The modules of the package now have a summary of their classes/functions in the docs. +* Updated the callbacks tutorials to add new TensorBoard callback. What's new in 0.5.0 ------------------- diff --git a/docs/tutorials/callbacks.rst b/docs/tutorials/callbacks.rst index 05ba874..05b22e5 100644 --- a/docs/tutorials/callbacks.rst +++ b/docs/tutorials/callbacks.rst @@ -31,6 +31,8 @@ the data set and model used in :ref:`basic-usage`. The available callbacks are: * ThresholdStopping +* TensorBoard + * LogbookSaver ConsecutiveStopping @@ -108,6 +110,34 @@ if the 'fitness_max' is above 0.98: evolved_estimator.fit(X, y, callbacks=callback) + +TensorBoard +------------ +It saves at each iteration the fitness metrics into a log folder that can be +read by Tensorboard. + +To use this callback you must install tensorflow first:: + + pip install tensorflow + +It only requires to define the folder where you want to log your run, and optionally, a run_id, so +your consecutive runs doesn't mix up. +If the run_id is not provided, it will create a subfolder with the current datetime of your run. + +.. code:: python3 + + from sklearn_genetic.callbacks import TensorBoard + callback = TensorBoard(log_dir="./logs") + + evolved_estimator.fit(X, y, callbacks=callback) + +While the model is being trained you can see in real time the metrics in Tensorboard. +If you have run more that 1 GASearchCV model and use the TensordBoard callback using +the same log_dir but different run_id, you can compare the metrics of each run, it looks +like this for the fitness in three different runs: + +.. image:: ../images/tensorboard_log.png + LogbookSaver ------------ It saves at each iteration the Logbook object with all the parameters and diff --git a/sklearn_genetic/callbacks/__init__.py b/sklearn_genetic/callbacks/__init__.py index 49a5ade..c11b37e 100644 --- a/sklearn_genetic/callbacks/__init__.py +++ b/sklearn_genetic/callbacks/__init__.py @@ -4,7 +4,7 @@ ConsecutiveStopping, TimerStopping, ) -from .loggers import LogbookSaver +from .loggers import LogbookSaver, TensorBoard __all__ = [ "DeltaThreshold", @@ -12,4 +12,5 @@ "ConsecutiveStopping", "TimerStopping", "LogbookSaver", + "TensorBoard", ] diff --git a/sklearn_genetic/callbacks/loggers.py b/sklearn_genetic/callbacks/loggers.py index 4cd7caf..2808a85 100644 --- a/sklearn_genetic/callbacks/loggers.py +++ b/sklearn_genetic/callbacks/loggers.py @@ -1,8 +1,20 @@ import logging +import os +import time from copy import deepcopy from joblib import dump from .base import BaseCallback +from ..parameters import Metrics + +logger = logging.getLogger(__name__) # noqa + +try: + import tensorflow as tf +except ModuleNotFoundError: # noqa + logger.error( + "Tensorflow not found, pip install tensorflow to use TensorBoard callback" + ) # noqa class LogbookSaver(BaseCallback): @@ -28,9 +40,46 @@ def on_step(self, record=None, logbook=None, estimator=None): dump_logbook = deepcopy(estimator.logbook.chapters["parameters"]) dump(dump_logbook, self.checkpoint_path, **self.dump_options) except Exception as e: - logging.error("Could not save the Logbook in the checkpoint") + logger.error("Could not save the Logbook in the checkpoint") return False - def __call__(self, record=None, logbook=None, estimator=None): - return self.on_step(record, logbook, estimator) + +class TensorBoard(BaseCallback): + """Log all the fitness metrics to Tensorboard into log_dir/run_id folder""" + + def __init__(self, log_dir="./logs", run_id=None): + """ + Parameters + ---------- + log_dir: str, default="./logs" + Path to the main folder where the data will be log + run_id: str, default=None + Subfolder where the data will be log, if None it will create a folder + with the current datetime with format time.strftime("%Y_%m_%d-%H_%M_%S") + """ + + self.log_dir = log_dir + + if run_id is None: + self.run_id = time.strftime("%Y_%m_%d-%H_%M_%S") + else: + self.run_id = run_id + + self.path = os.path.join(log_dir, self.run_id) + + def on_step(self, record=None, logbook=None, estimator=None): + + # Get the last metric value + stats = logbook[-1] + + # Create logs files placeholder + writer = tf.summary.create_file_writer(self.path) + + # Log the metrics + with writer.as_default(): + for metric in Metrics.list(): + tf.summary.scalar(name=metric, data=stats[metric], step=stats["gen"]) + writer.flush() + + return False diff --git a/sklearn_genetic/callbacks/tests/test_callbacks.py b/sklearn_genetic/callbacks/tests/test_callbacks.py index 9096e05..575c40e 100644 --- a/sklearn_genetic/callbacks/tests/test_callbacks.py +++ b/sklearn_genetic/callbacks/tests/test_callbacks.py @@ -1,5 +1,6 @@ import pytest import os +import shutil import logging from deap.tools import Logbook @@ -14,6 +15,7 @@ ConsecutiveStopping, DeltaThreshold, LogbookSaver, + TensorBoard, ) from ..validations import check_stats, check_callback from ..base import BaseCallback @@ -189,3 +191,36 @@ def test_logbook_saver_callback(caplog): callback = LogbookSaver(checkpoint_path="./no_folder/logbook.pkl", estimator=4) callback() assert "Could not save the Logbook in the checkpoint" in caplog.text + + +@pytest.mark.parametrize( + "callback, path", + [ + (TensorBoard(), "./logs"), + (TensorBoard(log_dir="./sklearn_logs"), "./sklearn_logs"), + (TensorBoard(log_dir="./logs", run_id="0"), "./logs/0"), + (TensorBoard(log_dir="./logs", run_id="1"), "./logs/1"), + ], +) +def test_tensorboard_callback(callback, path): + assert check_callback(callback) == [callback] + + clf = DecisionTreeClassifier() + evolved_estimator = GASearchCV( + clf, + cv=3, + scoring="accuracy", + generations=2, + param_grid={ + "min_weight_fraction_leaf": Continuous(0, 0.5), + "max_depth": Integer(2, 20), + "max_leaf_nodes": Integer(2, 30), + }, + verbose=False, + ) + + evolved_estimator.fit(X_train, y_train, callbacks=callback) + + assert os.path.exists(path) + + shutil.rmtree(path) diff --git a/sklearn_genetic/tests/test_genetic_search.py b/sklearn_genetic/tests/test_genetic_search.py index b0b7412..1fd7fdc 100644 --- a/sklearn_genetic/tests/test_genetic_search.py +++ b/sklearn_genetic/tests/test_genetic_search.py @@ -15,6 +15,7 @@ DeltaThreshold, ConsecutiveStopping, TimerStopping, + TensorBoard, ) data = load_digits()