From c2498a9fc262c126d88a37920a868aad291000b9 Mon Sep 17 00:00:00 2001 From: Rahul Varma Date: Fri, 25 Jun 2021 23:04:45 -0700 Subject: [PATCH 1/4] Added Parallel Coordinates plot --- sklearn_genetic/plots.py | 64 ++++++++++++++++++++++++++++- sklearn_genetic/tests/test_plots.py | 9 +++- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/sklearn_genetic/plots.py b/sklearn_genetic/plots.py index aca214f..809604c 100644 --- a/sklearn_genetic/plots.py +++ b/sklearn_genetic/plots.py @@ -1,16 +1,19 @@ import logging +logger = logging.getLogger(__name__) # noqa # Check if seaborn is installed as an extra requirement try: import seaborn as sns except ModuleNotFoundError: # noqa - logger = logging.getLogger(__name__) # noqa logger.error( "seaborn not found, pip install seaborn to use plots functions" ) # noqa +import pandas as pd +import numpy as np from .utils import logbook_to_pandas from .parameters import Metrics +from .space import Categorical """ This module contains some useful function to explore the results of the optimization routines @@ -89,3 +92,62 @@ def plot_search_space(estimator, height=2, s=25, features: list = None): ) g = g.map_diag(sns.kdeplot, shade=True, palette="crest", alpha=0.2, color="red") return g + + +def noise(score): + """ + Parameters + ---------- + estimator: Series + The `score` column from the logbook data of :class:`~sklearn_genetic.GASearchCV` + + Returns + ------- + Noise to be added to each element of the score + + """ + score_len = len(score) + score_std = score.std() + noise_ratio = 1e7 + noise = (np.random.random(score_len)*score_std/noise_ratio) - (score_std/2*noise_ratio) + return noise + + +def plot_parallel_coordinates(estimator, features: list = None): + """ + Parameters + ---------- + estimator: estimator object + A fitted estimator from :class:`~sklearn_genetic.GASearchCV` + features: list, default=None + Subset of features to plot, if ``None`` it plots all the features by default + + Returns + ------- + Parallel Coordinates plot of the non-categorical values + + """ + + df = logbook_to_pandas(estimator.logbook) + param_grid = estimator.space.param_grid + score = df["score"] + if features: + non_categorical_features = [] + for feature in features: + if not isinstance(param_grid[feature], Categorical): + non_categorical_features.append(feature) + else: + logger.warning("`%s` is Categorical variable! It was dropped from the plot feature list", feature) + stats = df[non_categorical_features] + else: + non_categorical_variables = [] + for variable, var_type in param_grid.items(): + if not isinstance(var_type, Categorical): + non_categorical_variables.append(variable) + non_categorical_variables.append("score") + stats = df[non_categorical_variables] + + stats["score_quartile"] = pd.qcut(score + noise(score), 4, labels=[1,2,3,4]) + g = pd.plotting.parallel_coordinates(stats, "score_quartile", color=("#8E8E8D", "#4ECDC4", "#C7F464", "#FF0000")) + + return g diff --git a/sklearn_genetic/tests/test_plots.py b/sklearn_genetic/tests/test_plots.py index 2102a3d..2ccd50c 100644 --- a/sklearn_genetic/tests/test_plots.py +++ b/sklearn_genetic/tests/test_plots.py @@ -4,7 +4,7 @@ from sklearn.tree import DecisionTreeRegressor from .. import GASearchCV -from ..plots import plot_fitness_evolution, plot_search_space +from ..plots import plot_fitness_evolution, plot_search_space, plot_parallel_coordinates from ..space import Integer, Categorical, Continuous @@ -61,3 +61,10 @@ def test_plot_space(): plot = plot_search_space( evolved_estimator, features=["ccp_alpha", "max_depth", "min_samples_split"] ) + + +def test_plot_parallel(): + plot = plot_parallel_coordinates(evolved_estimator) + plot = plot_parallel_coordinates( + evolved_estimator, features=["ccp_alpha", "criterion"] + ) From 4fe0ca13de82107a18a647356fdd425771b5ab21 Mon Sep 17 00:00:00 2001 From: Rahul Varma Date: Fri, 25 Jun 2021 23:11:15 -0700 Subject: [PATCH 2/4] Updated comment for noise function --- sklearn_genetic/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_genetic/plots.py b/sklearn_genetic/plots.py index 809604c..739066f 100644 --- a/sklearn_genetic/plots.py +++ b/sklearn_genetic/plots.py @@ -103,7 +103,7 @@ def noise(score): Returns ------- - Noise to be added to each element of the score + Noise to be added to each element of the score to avoid non-unique bin edges """ score_len = len(score) From bc5831d20fd4271dc87f322004522471a2592631 Mon Sep 17 00:00:00 2001 From: Rahul Varma Date: Fri, 25 Jun 2021 23:14:24 -0700 Subject: [PATCH 3/4] Update plots.py formatting --- sklearn_genetic/plots.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/sklearn_genetic/plots.py b/sklearn_genetic/plots.py index 739066f..a296e4b 100644 --- a/sklearn_genetic/plots.py +++ b/sklearn_genetic/plots.py @@ -1,4 +1,5 @@ import logging + logger = logging.getLogger(__name__) # noqa # Check if seaborn is installed as an extra requirement @@ -109,7 +110,9 @@ def noise(score): score_len = len(score) score_std = score.std() noise_ratio = 1e7 - noise = (np.random.random(score_len)*score_std/noise_ratio) - (score_std/2*noise_ratio) + noise = (np.random.random(score_len) * score_std / noise_ratio) - ( + score_std / 2 * noise_ratio + ) return noise @@ -137,7 +140,10 @@ def plot_parallel_coordinates(estimator, features: list = None): if not isinstance(param_grid[feature], Categorical): non_categorical_features.append(feature) else: - logger.warning("`%s` is Categorical variable! It was dropped from the plot feature list", feature) + logger.warning( + "`%s` is Categorical variable! It was dropped from the plot feature list", + feature, + ) stats = df[non_categorical_features] else: non_categorical_variables = [] @@ -146,8 +152,10 @@ def plot_parallel_coordinates(estimator, features: list = None): non_categorical_variables.append(variable) non_categorical_variables.append("score") stats = df[non_categorical_variables] - - stats["score_quartile"] = pd.qcut(score + noise(score), 4, labels=[1,2,3,4]) - g = pd.plotting.parallel_coordinates(stats, "score_quartile", color=("#8E8E8D", "#4ECDC4", "#C7F464", "#FF0000")) + + stats["score_quartile"] = pd.qcut(score + noise(score), 4, labels=[1, 2, 3, 4]) + g = pd.plotting.parallel_coordinates( + stats, "score_quartile", color=("#8E8E8D", "#4ECDC4", "#C7F464", "#FF0000") + ) return g From b2b729fe70989c14d7c6170bf9b46f390ee1d36b Mon Sep 17 00:00:00 2001 From: rodrigoarenas <31422766+rodrigo-arenas@users.noreply.github.com> Date: Sat, 26 Jun 2021 10:16:01 -0500 Subject: [PATCH 4/4] Docs on plot_parallel_coordinates --- docs/api/plots.rst | 1 + docs/release_notes.rst | 1 + sklearn_genetic/plots.py | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/api/plots.rst b/docs/api/plots.rst index 9e9cff6..4f47622 100644 --- a/docs/api/plots.rst +++ b/docs/api/plots.rst @@ -7,6 +7,7 @@ Plots .. autosummary:: plot_fitness_evolution plot_search_space + plot_parallel_coordinates .. automodule:: sklearn_genetic.plots :members: \ No newline at end of file diff --git a/docs/release_notes.rst b/docs/release_notes.rst index d78ddb4..5363a07 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -14,6 +14,7 @@ Features: * Added the :class:`~sklearn_genetic.callbacks.TimerStopping` callback to stop the iterations after a total (threshold) fitting time has been elapsed. +* Added new parallel coordinates plot in :func:`~sklearn_genetic.plots.plot_parallel_coordinates`. * Now if one or more callbacks decides to stop the algorithm, it will print its class name to know which callbacks were responsible of the stopping. * Added support for extra methods coming from scikit-learn's BaseSearchCV, it is diff --git a/sklearn_genetic/plots.py b/sklearn_genetic/plots.py index a296e4b..1b47113 100644 --- a/sklearn_genetic/plots.py +++ b/sklearn_genetic/plots.py @@ -99,8 +99,8 @@ def noise(score): """ Parameters ---------- - estimator: Series - The `score` column from the logbook data of :class:`~sklearn_genetic.GASearchCV` + score: Series + The `score` column from the logbook data of :class:`~sklearn_genetic.GASearchCV` Returns -------