From b274fd29b6da3816b25b6d06605cb18773269cd6 Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Mon, 29 Apr 2024 16:21:59 +0200 Subject: [PATCH] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- python_scripts/ensemble_gradient_boosting.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python_scripts/ensemble_gradient_boosting.py b/python_scripts/ensemble_gradient_boosting.py index 206d19756..6e27f79dc 100644 --- a/python_scripts/ensemble_gradient_boosting.py +++ b/python_scripts/ensemble_gradient_boosting.py @@ -6,13 +6,13 @@ # --- # %% [markdown] -# # Gradient-boosting decision tree (GBDT) +# # Gradient-boosting decision tree # -# In this notebook, we present the gradient boosting decision tree algorithm. +# In this notebook, we present the gradient boosting decision tree (GBDT) algorithm. # # Even if AdaBoost and GBDT are both boosting algorithms, they are different in # nature: the former assigns weights to specific samples, whereas GBDT fits -# succesive decision trees on the residual errors (hence the name "gradient") of +# successive decision trees on the residual errors (hence the name "gradient") of # their preceding tree. Therefore, each new tree in the ensemble tries to refine # its predictions by specifically addressing the errors made by the previous # learner, instead of predicting the target directly. @@ -87,8 +87,7 @@ def generate_data(n_samples=50): # %% def plot_decision_tree_with_residuals(y_train, y_train_pred, y_test_pred): - # Create a plot and get the Axes object - fig, ax = plt.subplots() + _fig_, ax = plt.subplots() # plot the data sns.scatterplot( x=data_train["Feature"], y=y_train, color="black", alpha=0.5, ax=ax @@ -109,6 +108,7 @@ def plot_decision_tree_with_residuals(y_train, y_train_pred, y_test_pred): return handles, ax +# %% handles, ax = plot_decision_tree_with_residuals( target_train, target_train_predicted, target_test_predicted ) @@ -259,7 +259,7 @@ def plot_decision_tree_with_residuals(y_train, y_train_pred, y_test_pred): # second tree corrects the first tree's error, while the third tree corrects the # second tree's error and so on). # -# ## First comparison of GBDT vs random forests +# ## First comparison of GBDT vs. random forests # # We now compare the generalization performance of random-forest and gradient # boosting on the California housing dataset.