IMMM-SFA · Jaydon2005 · Aug 20, 2024 · Aug 23, 2024 · Jan 3, 2025 · Jan 6, 2025
diff --git a/msdbook/model.py b/msdbook/model.py
diff --git a/msdbook/tests/test_model.py b/msdbook/tests/test_model.py
diff --git a/msdbook/tests/test_utils.py b/msdbook/tests/test_utils.py
@@ -0,0 +1,123 @@
+import pytest
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from msdbook.utils import fit_logit, plot_contour_map
+from statsmodels.base.wrapper import ResultsWrapper
+
+@pytest.fixture
+def sample_data():
+    """Fixture to provide sample data for testing."""
+    np.random.seed(0)  # For reproducibility
+
+    # Number of samples
+    n = 100
+
+    # Generate some random data
+    df = pd.DataFrame({
+        'Success': np.random.randint(0, 2, size=n),  # Binary outcome variable (0 or 1)
+        'Predictor1': np.random.randn(n),  # Random values for Predictor1
+        'Predictor2': np.random.randn(n),  # Random values for Predictor2
+        'Interaction': np.random.randn(n)  # Random values for Interaction term (not necessarily related)
+    })
+
+    return df 
+def test_fit_logit(sample_data):
+    """Test the fit_logit function."""
+    predictors = ['Predictor1', 'Predictor2']
+    result = fit_logit(sample_data, predictors)
+
+    # Check if result is a statsmodels LogitResultsWrapper object
+    assert isinstance(result, ResultsWrapper) 
+
+    # Check if the result object has the expected attributes
+    assert hasattr(result, 'params')
+    assert hasattr(result, 'pvalues')
+    assert hasattr(result, 'predict')
+
+    # Check that parameters (coefficients) are not empty
+    assert result.params is not None
+    assert result.pvalues is not None
+
+def test_plot_contour_map(sample_data):
+    """Test the plot_contour_map function."""
+    fig, ax = plt.subplots()
+
+    # Fit a logit model for the purpose of plotting
+    predictors = ['Predictor1', 'Predictor2']
+    result = fit_logit(sample_data, predictors)
+
+    # Dynamically generate grid and levels
+    xgrid = np.linspace(sample_data['Predictor1'].min() - 1, sample_data['Predictor1'].max() + 1, 50)
+    ygrid = np.linspace(sample_data['Predictor2'].min() - 1, sample_data['Predictor2'].max() + 1, 50)
+    levels = np.linspace(0, 1, 10)
+
+    contour_cmap = 'viridis'
+    dot_cmap = 'coolwarm'
+
+    # Call the plot function
+    contourset = plot_contour_map(
+        ax, result, sample_data, contour_cmap, dot_cmap, levels, xgrid, ygrid, 'Predictor1', 'Predictor2', base=0
+    )
+
+    # Check if the contour plot is created
+    assert contourset is not None
+
+    # Check if the axis limits and labels are set correctly
+    assert ax.get_xlim() == (np.min(xgrid), np.max(xgrid))
+    assert ax.get_ylim() == (np.min(ygrid), np.max(ygrid))
+    assert ax.get_xlabel() == 'Predictor1'
+    assert ax.get_ylabel() == 'Predictor2'
+
+    # Verify that scatter plot is present by checking number of points
+    assert len(ax.collections) > 0  
+    plt.close(fig)
+
+def test_empty_data():
+    """Test with empty data to ensure no errors."""
+    empty_df = pd.DataFrame({
+        'Success': [],
+        'Predictor1': [],
+        'Predictor2': [],
+        'Interaction': []
+    })
+
+    predictors = ['Predictor1', 'Predictor2']
+
+    # Check if fitting with empty data raises an error
+    with pytest.raises(ValueError):
+        fit_logit(empty_df, predictors)
+
+    # We should not attempt plotting with empty data
+    fig, ax = plt.subplots()
+
+    # Check if plotting with empty data doesn't crash
+    if not empty_df.empty:
+        result = fit_logit(empty_df, predictors)
+        contourset = plot_contour_map(
+            ax, result, empty_df,
+            'viridis', 'coolwarm', np.linspace(0, 1, 10), np.linspace(-2, 2, 50),
+            np.linspace(-2, 2, 50), 'Predictor1', 'Predictor2', base=0
+        )
+        assert contourset is not None
+    else:
+        # Skip if no result is generated (empty DataFrame)
+        pass
+    plt.close(fig)
+
+def test_invalid_predictors(sample_data):
+    """Test with invalid predictors."""
+    invalid_predictors = ['InvalidPredictor1', 'InvalidPredictor2']
+
+    with pytest.raises(KeyError):
+        fit_logit(sample_data, invalid_predictors)
+
+def test_logit_with_interaction(sample_data):
+    """Test logistic regression with interaction term."""
+    sample_data["Interaction"] = sample_data["Predictor1"] * sample_data["Predictor2"]
+    predictors = ['Predictor1', 'Predictor2']
+
+    result = fit_logit(sample_data, predictors)
+
+    # Ensure the interaction term is included in the result
+    assert 'Interaction' in result.params.index
diff --git a/msdbook/utils.py b/msdbook/utils.py
@@ -3,30 +3,30 @@
 import numpy as np
 import statsmodels.api as sm
 
-
 def fit_logit(dta, predictors):
     """Logistic regression"""
 
-    # concatenate intercept column of 1s
+    # Add intercept column of 1s
     dta["Intercept"] = np.ones(np.shape(dta)[0])
-    # get columns of predictors
+
+    # Get columns of predictors
     cols = dta.columns.tolist()[-1:] + predictors + ["Interaction"]
-    # fit logistic regression
-    logit = sm.Logit(dta["Success"], dta[cols], disp=False)
-    result = logit.fit()
-
+
+    # Fit logistic regression without the deprecated 'disp' argument
+    logit = sm.Logit(dta["Success"], dta[cols])
+    result = logit.fit(method='bfgs')  # Use method='bfgs' or another supported method
+
     return result
 
-
 def plot_contour_map(
     ax, result, dta, contour_cmap, dot_cmap, levels, xgrid, ygrid, xvar, yvar, base
 ):
     """Plot the contour map"""
 
-    # TODO:  see why this warning is being raised about the tight layout
+    # Ignore tight layout warnings
     warnings.filterwarnings("ignore")
 
-    # find probability of success for x=xgrid, y=ygrid
+    # Generate probability of success for x=xgrid, y=ygrid
     X, Y = np.meshgrid(xgrid, ygrid)
     x = X.flatten()
     y = Y.flatten()
@@ -36,9 +36,12 @@ def plot_contour_map(
     Z = np.reshape(z, np.shape(X))
 
     contourset = ax.contourf(X, Y, Z, levels, cmap=contour_cmap, aspect="auto")
+
+    # Plot scatter points based on the data
     xpoints = np.mean(dta[xvar].values.reshape(-1, 10), axis=1)
     ypoints = np.mean(dta[yvar].values.reshape(-1, 10), axis=1)
     colors = np.round(np.mean(dta["Success"].values.reshape(-1, 10), axis=1), 0)
+
     ax.scatter(xpoints, ypoints, s=10, c=colors, edgecolor="none", cmap=dot_cmap)
     ax.set_xlim(np.min(xgrid), np.max(xgrid))
     ax.set_ylim(np.min(ygrid), np.max(ygrid))

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,8 @@ dependencies = [
     "scipy>=1.13.1",
     "seaborn>=0.13.2",
     "statsmodels>=0.14.2",
+    "pytest>=7.0.0",
+    "pytest-mock>=3.10",
 ]
 
 [project.optional-dependencies]