Skip to content

Commit

Permalink
warm_start_configs for model initialization
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigo-arenas committed Sep 12, 2024
1 parent 741ef00 commit 5bdb85d
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 4 deletions.
9 changes: 9 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,19 @@ Example: Hyperparameters Tuning
clf = RandomForestClassifier()
# Defines the possible values to search
param_grid = {'min_weight_fraction_leaf': Continuous(0.01, 0.5, distribution='log-uniform'),
'bootstrap': Categorical([True, False]),
'max_depth': Integer(2, 30),
'max_leaf_nodes': Integer(2, 35),
'n_estimators': Integer(100, 300)}
# Seed solutions
warm_start_configs = [
{"min_weight_fraction_leaf": 0.02, "bootstrap": True, "max_depth": None, "n_estimators": 100},
{"min_weight_fraction_leaf": 0.4, "bootstrap": True, "max_depth": 5, "n_estimators": 200},
]
cv = StratifiedKFold(n_splits=3, shuffle=True)
evolved_estimator = GASearchCV(estimator=clf,
Expand All @@ -118,6 +125,8 @@ Example: Hyperparameters Tuning
param_grid=param_grid,
n_jobs=-1,
verbose=True,
use_cache=True,
warm_start_configs=warm_start_configs,
keep_top_k=4)
# Train and optimize the estimator
Expand Down
2 changes: 2 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ sphinx_rtd_theme
sphinx-copybutton
numpydoc
nbsphinx
ipython>=8.27.0
Pygments>=2.18.0
tensorflow>=2.4.0
tqdm>=4.61.1
tk
27 changes: 24 additions & 3 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,30 @@ What's new in 0.11.0dev0
Features:
^^^^^^^^^

* Added a parameter named `use_cache`, defaults to `True`, If set to true it will avoid to re-evaluating solutions that have already seen,
otherwise it will always evaluate the solutions to get the performance metrics

* Added a parameter `use_cache`, which defaults to ``True``. When enabled, the algorithm will skip re-evaluating solutions that have already been evaluated, retrieving the performance metrics from the cache instead.
If use_cache is set to ``False``, the algorithm will always re-evaluate solutions, even if they have been seen before, to obtain fresh performance metrics.
* Add a parameter in `GAFeatureSelectionCV` named warm_start_configs, defaults to ``None``, a list of predefined hyperparameter configurations to seed the initial population.
Each element in the list is a dictionary where the keys are the names of the hyperparameters,
and the values are the corresponding hyperparameter values to be used for the individual.

Example:

.. code-block:: python
:linenos:
warm_start_configs = [
{"min_weight_fraction_leaf": 0.02, "bootstrap": True, "max_depth": None, "n_estimators": 100},
{"min_weight_fraction_leaf": 0.4, "bootstrap": True, "max_depth": 5, "n_estimators": 200},
]
The genetic algorithm will initialize part of the population with these configurations to
warm-start the optimization process. The remaining individuals in the population will
be initialized randomly according to the defined hyperparameter space.

This parameter is useful when prior knowledge of good hyperparameter configurations exists,
allowing the algorithm to focus on refining known good solutions while still exploring new
areas of the hyperparameter space. If set to ``None``, the entire population will be initialized
randomly.

What's new in 0.10.1
--------------------
Expand Down
27 changes: 26 additions & 1 deletion sklearn_genetic/genetic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def __init__(
return_train_score=False,
log_config=None,
use_cache=True,
warm_start_configs=None,
):
self.estimator = estimator
self.cv = cv
Expand All @@ -266,6 +267,7 @@ def __init__(
self.log_config = log_config
self.use_cache = use_cache
self.fitness_cache = {}
self.warm_start_configs = warm_start_configs or []

# Check that the estimator is compatible with scikit-learn
if not is_classifier(self.estimator) and not is_regressor(self.estimator):
Expand Down Expand Up @@ -346,7 +348,7 @@ def _register(self):

self.toolbox.register("evaluate", self.evaluate)

self._pop = self.toolbox.population(n=self.population_size)
self._pop = self._initialize_population()
self._hof = tools.HallOfFame(self.keep_top_k)

self._stats = tools.Statistics(lambda ind: ind.fitness.values)
Expand All @@ -357,6 +359,29 @@ def _register(self):

self.logbook = tools.Logbook()

def _initialize_population(self):
"""
Initialize the population, using warm-start configurations if provided.
"""
population = []
# Seed part of the population with warm-start values
num_warm_start = min(len(self.warm_start_configs), self.population_size)

for config in self.warm_start_configs[:num_warm_start]:
# Sample an individual from the warm-start configuration
individual_values = self.space.sample_warm_start(config)
individual_values_list = list(individual_values.values())

# Manually create the individual and assign its fitness
individual = creator.Individual(individual_values_list)
population.append(individual)

# Fill the remaining population with random individuals
num_random = self.population_size - num_warm_start
population.extend(self.toolbox.population(n=num_random))

return population

def mutate(self, individual):
"""
This function is responsible for change a randomly selected parameter from an individual
Expand Down
21 changes: 21 additions & 0 deletions sklearn_genetic/space/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,27 @@ def __init__(self, param_grid: dict = None):

self.param_grid = param_grid

def sample_warm_start(self, warm_start_values: dict):
"""
Sample a predefined configuration (warm-start) or fill in random values if missing.
Parameters
----------
warm_start_values: dict
Predefined configuration values for hyperparameters.
Returns
-------
A dictionary containing sampled values for each hyperparameter.
"""
sampled_params = {}
for param, dimension in self.param_grid.items():
if param in warm_start_values:
sampled_params[param] = warm_start_values[param]
else:
sampled_params[param] = dimension.sample() # Random sample if no warm-start value
return sampled_params

@property
def dimensions(self):
"""
Expand Down

0 comments on commit 5bdb85d

Please sign in to comment.