Skip to content

Commit

Permalink
Merge pull request #39 from Raul9595/master
Browse files Browse the repository at this point in the history
Add Parallel Coordinates plot
  • Loading branch information
rodrigo-arenas authored Jun 26, 2021
2 parents ffffd33 + b2b729f commit a43554f
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/api/plots.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Plots
.. autosummary::
plot_fitness_evolution
plot_search_space
plot_parallel_coordinates

.. automodule:: sklearn_genetic.plots
:members:
1 change: 1 addition & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Features:

* Added the :class:`~sklearn_genetic.callbacks.TimerStopping` callback to stop
the iterations after a total (threshold) fitting time has been elapsed.
* Added new parallel coordinates plot in :func:`~sklearn_genetic.plots.plot_parallel_coordinates`.
* Now if one or more callbacks decides to stop the algorithm, it will print
its class name to know which callbacks were responsible of the stopping.
* Added support for extra methods coming from scikit-learn's BaseSearchCV, it is
Expand Down
72 changes: 71 additions & 1 deletion sklearn_genetic/plots.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import logging

logger = logging.getLogger(__name__) # noqa

# Check if seaborn is installed as an extra requirement
try:
import seaborn as sns
except ModuleNotFoundError: # noqa
logger = logging.getLogger(__name__) # noqa
logger.error(
"seaborn not found, pip install seaborn to use plots functions"
) # noqa
import pandas as pd
import numpy as np

from .utils import logbook_to_pandas
from .parameters import Metrics
from .space import Categorical

"""
This module contains some useful function to explore the results of the optimization routines
Expand Down Expand Up @@ -89,3 +93,69 @@ def plot_search_space(estimator, height=2, s=25, features: list = None):
)
g = g.map_diag(sns.kdeplot, shade=True, palette="crest", alpha=0.2, color="red")
return g


def noise(score):
"""
Parameters
----------
score: Series
The `score` column from the logbook data of :class:`~sklearn_genetic.GASearchCV`
Returns
-------
Noise to be added to each element of the score to avoid non-unique bin edges
"""
score_len = len(score)
score_std = score.std()
noise_ratio = 1e7
noise = (np.random.random(score_len) * score_std / noise_ratio) - (
score_std / 2 * noise_ratio
)
return noise


def plot_parallel_coordinates(estimator, features: list = None):
"""
Parameters
----------
estimator: estimator object
A fitted estimator from :class:`~sklearn_genetic.GASearchCV`
features: list, default=None
Subset of features to plot, if ``None`` it plots all the features by default
Returns
-------
Parallel Coordinates plot of the non-categorical values
"""

df = logbook_to_pandas(estimator.logbook)
param_grid = estimator.space.param_grid
score = df["score"]
if features:
non_categorical_features = []
for feature in features:
if not isinstance(param_grid[feature], Categorical):
non_categorical_features.append(feature)
else:
logger.warning(
"`%s` is Categorical variable! It was dropped from the plot feature list",
feature,
)
stats = df[non_categorical_features]
else:
non_categorical_variables = []
for variable, var_type in param_grid.items():
if not isinstance(var_type, Categorical):
non_categorical_variables.append(variable)
non_categorical_variables.append("score")
stats = df[non_categorical_variables]

stats["score_quartile"] = pd.qcut(score + noise(score), 4, labels=[1, 2, 3, 4])
g = pd.plotting.parallel_coordinates(
stats, "score_quartile", color=("#8E8E8D", "#4ECDC4", "#C7F464", "#FF0000")
)

return g
9 changes: 8 additions & 1 deletion sklearn_genetic/tests/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sklearn.tree import DecisionTreeRegressor

from .. import GASearchCV
from ..plots import plot_fitness_evolution, plot_search_space
from ..plots import plot_fitness_evolution, plot_search_space, plot_parallel_coordinates
from ..space import Integer, Categorical, Continuous


Expand Down Expand Up @@ -61,3 +61,10 @@ def test_plot_space():
plot = plot_search_space(
evolved_estimator, features=["ccp_alpha", "max_depth", "min_samples_split"]
)


def test_plot_parallel():
plot = plot_parallel_coordinates(evolved_estimator)
plot = plot_parallel_coordinates(
evolved_estimator, features=["ccp_alpha", "criterion"]
)

0 comments on commit a43554f

Please sign in to comment.