From 3200d57ffd8e8159ea26cf0cd2686a7f5d167510 Mon Sep 17 00:00:00 2001
From: Turtle24 <aidan.nell@yahoo.com>
Date: Sun, 27 Jun 2021 11:27:17 +0200
Subject: [PATCH 1/3] mlflow tests

---
 sklearn_genetic/tests/test_mlflow.py | 156 +++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 sklearn_genetic/tests/test_mlflow.py

diff --git a/sklearn_genetic/tests/test_mlflow.py b/sklearn_genetic/tests/test_mlflow.py
new file mode 100644
index 0000000..db013db
--- /dev/null
+++ b/sklearn_genetic/tests/test_mlflow.py
@@ -0,0 +1,156 @@
+import pytest
+import mlflow
+import shutil
+import os
+from urllib.parse import urlparse
+from mlflow.tracking import MlflowClient
+from mlflow.entities import ViewType
+
+from sklearn.datasets import load_digits
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import StratifiedKFold
+
+from ..genetic_search import GASearchCV
+from ..mlflow import MLflowConfig
+from ..space import Integer, Categorical, Continuous
+from ..callbacks import (
+    ThresholdStopping,
+    DeltaThreshold,
+    ConsecutiveStopping,
+    TimerStopping,
+)
+
+
+@pytest.fixture
+def mlflow_resources():
+    uri = mlflow.get_tracking_uri()
+    client = MlflowClient(uri)
+    return (uri, client)
+
+@pytest.fixture
+def mlflow_run(mlflow_resources):
+    _, client = mlflow_resources
+    exp_id = client.get_experiment_by_name("Digits-sklearn-genetic-opt").experiment_id
+    active_run = mlflow.list_run_infos(exp_id, run_view_type=ViewType.ACTIVE_ONLY)
+    runs = [run.run_id for run in active_run]
+    return runs
+
+def test_mlflow_config(mlflow_resources):
+    """
+    Check MLflow config creation. 
+    """
+    uri, _ = mlflow_resources
+    mlflow_config = MLflowConfig(
+        tracking_uri=uri,
+        experiment="Digits-sklearn-genetic-opt",
+        run_name="Decision Tree",
+        save_models=True,
+        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"})
+    assert isinstance(mlflow_config, MLflowConfig)
+
+
+def test_mlruns_file():
+    """
+    Check if the mlruns file is created.
+    """
+    tracking_url_type_store = urlparse(mlflow.get_tracking_uri())
+    assert 'Sklearn-genetic-opt/sklearn_genetic/tests/mlruns' in tracking_url_type_store.geturl()
+
+
+def test_runs(mlflow_resources, mlflow_run):
+    """
+    Check if runs are captured and parameters are true.
+    """
+    uri, client = mlflow_resources
+    mlflow_config = MLflowConfig(
+        tracking_uri=uri,
+        experiment="Digits-sklearn-genetic-opt",
+        run_name="Decision Tree",
+        save_models=True,
+        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"})
+
+    clf = DecisionTreeClassifier()
+
+    data = load_digits()
+    label_names = data["target_names"]
+    y = data["target"]
+    X = data["data"]
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.33, random_state=42
+    )
+
+    params_grid = {
+        "min_weight_fraction_leaf": Continuous(0, 0.5),
+        "criterion": Categorical(["gini", "entropy"]),
+        "max_depth": Integer(2, 20),
+        "max_leaf_nodes": Integer(2, 30)}
+
+    cv = StratifiedKFold(n_splits=3, shuffle=True)
+
+    evolved_estimator = GASearchCV(
+        clf,
+        cv=cv,
+        scoring="accuracy",
+        population_size=3,
+        generations=5,
+        tournament_size=3,
+        elitism=True,
+        crossover_probability=0.9,
+        mutation_probability=0.05,
+        param_grid=params_grid,
+        algorithm="eaMuPlusLambda",
+        n_jobs=-1,
+        verbose=True,
+        log_config=mlflow_config)
+
+    evolved_estimator.fit(X_train, y_train)
+    y_predict_ga = evolved_estimator.predict(X_test)
+    accuracy = accuracy_score(y_test, y_predict_ga)
+    runs = mlflow_run
+    assert len(runs) >= 1 and evolved_estimator.best_params_['min_weight_fraction_leaf']
+
+def test_mlflow_artifacts(mlflow_resources, mlflow_run):
+    _, client = mlflow_resources
+    run_id = mlflow_run[0]
+    run = client.get_run(run_id)
+    assert client.list_artifacts(run_id)[0].path == "model"
+
+def test_mlflow_params(mlflow_resources, mlflow_run):
+    """
+    Test parameters are all in the run and within range.
+    """
+    _, client = mlflow_resources
+    run_id = mlflow_run[0]
+    run = client.get_run(run_id)
+    params = run.data.params
+
+    assert 0 <= float(params['min_weight_fraction_leaf']) <= 0.5
+    assert params['criterion'] == 'gini' or 'entropy'
+    assert 2 <= int(params['max_depth']) <= 20
+    assert 2 <= int(params['max_leaf_nodes']) <= 30
+
+def test_mlflow_after_run(mlflow_resources, mlflow_run):
+    """
+    Check the end of the runs are logged artifacts/metric/hyperparameters exists in the mlflow server
+    """
+    run_id = mlflow_run[0]
+    mlflow.end_run()
+    _, client = mlflow_resources 
+    run = client.get_run(run_id)
+    params = run.data.params
+
+    assert 0 <= float(params['min_weight_fraction_leaf']) <= 0.5
+    assert params['criterion'] == 'gini' or 'entropy'
+    assert 2 <= int(params['max_depth']) <= 20
+    assert 2 <= int(params['max_leaf_nodes']) <= 30
+    assert client.get_metric_history(run_id, "score")[0].key == "score"
+
+def test_cleanup():
+    """
+    Ensure resources are cleaned up.
+    """
+    shutil.rmtree("mlruns")
+    assert 'mlruns' not in os.listdir(os.getcwd())

From 83ec231b01ed8e15548b1272637f74acfbf43e0f Mon Sep 17 00:00:00 2001
From: Turtle24 <aidan.nell@yahoo.com>
Date: Sun, 27 Jun 2021 11:35:52 +0200
Subject: [PATCH 2/3] linting and checked that all tests passed

---
 sklearn_genetic/tests/test_mlflow.py | 50 +++++++++++++++-------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/sklearn_genetic/tests/test_mlflow.py b/sklearn_genetic/tests/test_mlflow.py
index db013db..5fbefae 100644
--- a/sklearn_genetic/tests/test_mlflow.py
+++ b/sklearn_genetic/tests/test_mlflow.py
@@ -29,6 +29,7 @@ def mlflow_resources():
     client = MlflowClient(uri)
     return (uri, client)
 
+
 @pytest.fixture
 def mlflow_run(mlflow_resources):
     _, client = mlflow_resources
@@ -37,9 +38,10 @@ def mlflow_run(mlflow_resources):
     runs = [run.run_id for run in active_run]
     return runs
 
+
 def test_mlflow_config(mlflow_resources):
     """
-    Check MLflow config creation. 
+    Check MLflow config creation.
     """
     uri, _ = mlflow_resources
     mlflow_config = MLflowConfig(
@@ -47,18 +49,11 @@ def test_mlflow_config(mlflow_resources):
         experiment="Digits-sklearn-genetic-opt",
         run_name="Decision Tree",
         save_models=True,
-        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"})
+        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"},
+    )
     assert isinstance(mlflow_config, MLflowConfig)
 
 
-def test_mlruns_file():
-    """
-    Check if the mlruns file is created.
-    """
-    tracking_url_type_store = urlparse(mlflow.get_tracking_uri())
-    assert 'Sklearn-genetic-opt/sklearn_genetic/tests/mlruns' in tracking_url_type_store.geturl()
-
-
 def test_runs(mlflow_resources, mlflow_run):
     """
     Check if runs are captured and parameters are true.
@@ -69,7 +64,8 @@ def test_runs(mlflow_resources, mlflow_run):
         experiment="Digits-sklearn-genetic-opt",
         run_name="Decision Tree",
         save_models=True,
-        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"})
+        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"},
+    )
 
     clf = DecisionTreeClassifier()
 
@@ -86,7 +82,8 @@ def test_runs(mlflow_resources, mlflow_run):
         "min_weight_fraction_leaf": Continuous(0, 0.5),
         "criterion": Categorical(["gini", "entropy"]),
         "max_depth": Integer(2, 20),
-        "max_leaf_nodes": Integer(2, 30)}
+        "max_leaf_nodes": Integer(2, 30),
+    }
 
     cv = StratifiedKFold(n_splits=3, shuffle=True)
 
@@ -104,13 +101,15 @@ def test_runs(mlflow_resources, mlflow_run):
         algorithm="eaMuPlusLambda",
         n_jobs=-1,
         verbose=True,
-        log_config=mlflow_config)
+        log_config=mlflow_config,
+    )
 
     evolved_estimator.fit(X_train, y_train)
     y_predict_ga = evolved_estimator.predict(X_test)
     accuracy = accuracy_score(y_test, y_predict_ga)
     runs = mlflow_run
-    assert len(runs) >= 1 and evolved_estimator.best_params_['min_weight_fraction_leaf']
+    assert len(runs) >= 1 and evolved_estimator.best_params_["min_weight_fraction_leaf"]
+
 
 def test_mlflow_artifacts(mlflow_resources, mlflow_run):
     _, client = mlflow_resources
@@ -118,6 +117,7 @@ def test_mlflow_artifacts(mlflow_resources, mlflow_run):
     run = client.get_run(run_id)
     assert client.list_artifacts(run_id)[0].path == "model"
 
+
 def test_mlflow_params(mlflow_resources, mlflow_run):
     """
     Test parameters are all in the run and within range.
@@ -127,10 +127,11 @@ def test_mlflow_params(mlflow_resources, mlflow_run):
     run = client.get_run(run_id)
     params = run.data.params
 
-    assert 0 <= float(params['min_weight_fraction_leaf']) <= 0.5
-    assert params['criterion'] == 'gini' or 'entropy'
-    assert 2 <= int(params['max_depth']) <= 20
-    assert 2 <= int(params['max_leaf_nodes']) <= 30
+    assert 0 <= float(params["min_weight_fraction_leaf"]) <= 0.5
+    assert params["criterion"] == "gini" or "entropy"
+    assert 2 <= int(params["max_depth"]) <= 20
+    assert 2 <= int(params["max_leaf_nodes"]) <= 30
+
 
 def test_mlflow_after_run(mlflow_resources, mlflow_run):
     """
@@ -138,19 +139,20 @@ def test_mlflow_after_run(mlflow_resources, mlflow_run):
     """
     run_id = mlflow_run[0]
     mlflow.end_run()
-    _, client = mlflow_resources 
+    _, client = mlflow_resources
     run = client.get_run(run_id)
     params = run.data.params
 
-    assert 0 <= float(params['min_weight_fraction_leaf']) <= 0.5
-    assert params['criterion'] == 'gini' or 'entropy'
-    assert 2 <= int(params['max_depth']) <= 20
-    assert 2 <= int(params['max_leaf_nodes']) <= 30
+    assert 0 <= float(params["min_weight_fraction_leaf"]) <= 0.5
+    assert params["criterion"] == "gini" or "entropy"
+    assert 2 <= int(params["max_depth"]) <= 20
+    assert 2 <= int(params["max_leaf_nodes"]) <= 30
     assert client.get_metric_history(run_id, "score")[0].key == "score"
 
+
 def test_cleanup():
     """
     Ensure resources are cleaned up.
     """
     shutil.rmtree("mlruns")
-    assert 'mlruns' not in os.listdir(os.getcwd())
+    assert "mlruns" not in os.listdir(os.getcwd())

From 953be4279b41da6f9fab9a5aa4ef807fa3d6a140 Mon Sep 17 00:00:00 2001
From: Turtle24 <aidan.nell@yahoo.com>
Date: Sun, 27 Jun 2021 12:14:10 +0200
Subject: [PATCH 3/3] removed unused imports

---
 sklearn_genetic/tests/test_mlflow.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sklearn_genetic/tests/test_mlflow.py b/sklearn_genetic/tests/test_mlflow.py
index 5fbefae..61ed412 100644
--- a/sklearn_genetic/tests/test_mlflow.py
+++ b/sklearn_genetic/tests/test_mlflow.py
@@ -15,12 +15,6 @@
 from ..genetic_search import GASearchCV
 from ..mlflow import MLflowConfig
 from ..space import Integer, Categorical, Continuous
-from ..callbacks import (
-    ThresholdStopping,
-    DeltaThreshold,
-    ConsecutiveStopping,
-    TimerStopping,
-)
 
 
 @pytest.fixture