From e88eb480f58e5b2bdaf5ac6b57c0d83e94b26c3a Mon Sep 17 00:00:00 2001 From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com> Date: Fri, 21 Jan 2022 11:21:04 -0500 Subject: [PATCH 1/2] sample mechanism in GAFeatureSelectionCV --- docs/release_notes.rst | 11 +++++++++++ sklearn_genetic/_version.py | 2 +- sklearn_genetic/genetic_search.py | 14 ++++++++++++-- sklearn_genetic/utils/random.py | 20 ++++++++++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 sklearn_genetic/utils/random.py diff --git a/docs/release_notes.rst b/docs/release_notes.rst index ae393f1..98edfcf 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -3,6 +3,17 @@ Release Notes Some notes on new features in various releases +What's new in 0.8.1dev0 +----------------------- + +^^^^^^^^^ +Features: +^^^^^^^^^ + +* If the `max_features` parameter from :class:`~sklearn_genetic.GAFeatureSelectionCV` is set, + the initial population is now sampled giving more probability to solutions with less than `max_features` features. + + What's new in 0.8.0 ------------------- diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py index 777f190..fc79097 100644 --- a/sklearn_genetic/_version.py +++ b/sklearn_genetic/_version.py @@ -1 +1 @@ -__version__ = "0.8.0" +__version__ = "0.8.1dev0" diff --git a/sklearn_genetic/genetic_search.py b/sklearn_genetic/genetic_search.py index 55414f9..b375b94 100644 --- a/sklearn_genetic/genetic_search.py +++ b/sklearn_genetic/genetic_search.py @@ -24,6 +24,7 @@ create_gasearch_cv_results_, create_feature_selection_cv_results_, ) +from .utils.random import weighted_choice class GASearchCV(BaseSearchCV): @@ -940,6 +941,7 @@ def __init__( self.n_features = None self.X_ = None self.y_ = None + self.features_proportion = None self.callbacks = None self.best_features_ = None self.best_estimator_ = None @@ -999,7 +1001,11 @@ def _register(self): # Register the array to choose the features # Each binary value represents if the feature is selected or not - self.toolbox.register("features", random.randint, 0, 1) + + if self.features_proportion: + self.toolbox.register("features", weighted_choice, self.features_proportion) + else: + self.toolbox.register("features", random.randint, 0, 1) self.toolbox.register( "individual", @@ -1107,10 +1113,11 @@ def evaluate(self, individual): self.logbook.record(parameters=current_generation_features) # Penalize individuals with more features than the max_features parameter + if self.max_features and ( n_selected_features > self.max_features or n_selected_features == 0 ): - score = -self.criteria_sign * 10000 + score = -self.criteria_sign * 100000 return [score, n_selected_features] @@ -1136,6 +1143,9 @@ def fit(self, X, y, callbacks=None): self.X_, self.y_ = check_X_y(X, y) self.n_features = X.shape[1] + if self.max_features: + self.features_proportion = self.max_features/self.n_features + # Make sure the callbacks are valid self.callbacks = check_callback(callbacks) diff --git a/sklearn_genetic/utils/random.py b/sklearn_genetic/utils/random.py new file mode 100644 index 0000000..ea323af --- /dev/null +++ b/sklearn_genetic/utils/random.py @@ -0,0 +1,20 @@ +import random + + +def weighted_choice(weight): + """ + Parameters + ---------- + weight: float + Weight of choosing a chromosome + + Returns + ------- + Bool random (not uniform) choice + """ + + # This help to don't generate individuals of the same size on average + p = random.uniform(0, weight) + choice = random.choices([0, 1], [1-p, p])[0] + + return choice From 4fc9ce71bb756e1f2971a9aed0699ba53bfcce31 Mon Sep 17 00:00:00 2001 From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com> Date: Wed, 9 Mar 2022 12:02:56 -0500 Subject: [PATCH 2/2] Version 0.8.1 release --- docs/release_notes.rst | 4 ++-- sklearn_genetic/_version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 98edfcf..a20f504 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -3,8 +3,8 @@ Release Notes Some notes on new features in various releases -What's new in 0.8.1dev0 ------------------------ +What's new in 0.8.1 +------------------- ^^^^^^^^^ Features: diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py index fc79097..8088f75 100644 --- a/sklearn_genetic/_version.py +++ b/sklearn_genetic/_version.py @@ -1 +1 @@ -__version__ = "0.8.1dev0" +__version__ = "0.8.1"