Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Creating test_utils.py #113

Open
wants to merge 13 commits into
base: dev
Choose a base branch
from
11 changes: 0 additions & 11 deletions msdbook/model.py

This file was deleted.

18 changes: 0 additions & 18 deletions msdbook/tests/test_model.py

This file was deleted.

153 changes: 153 additions & 0 deletions msdbook/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import pytest
erexer marked this conversation as resolved.
Show resolved Hide resolved
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from msdbook.utils import fit_logit, plot_contour_map
from statsmodels.base.wrapper import ResultsWrapper


# Define commonly used column names as constants
PREDICTOR_1 = "Predictor1"
PREDICTOR_2 = "Predictor2"
INTERACTION = "Interaction"


@pytest.fixture
def sample_data():
"""Fixture to provide sample data for testing."""
np.random.seed(0) # For reproducibility

# Number of samples
n = 100

# Generate some random data
df = pd.DataFrame({
'Success': np.random.randint(0, 2, size=n), # Binary outcome variable (0 or 1)
PREDICTOR_1: np.random.randn(n), # Random values for Predictor1
PREDICTOR_2: np.random.randn(n), # Random values for Predictor2
INTERACTION: np.random.randn(n) # Random values for Interaction term
})

return df


def test_fit_logit(sample_data):
"""Test the fit_logit function."""
predictors = [PREDICTOR_1, PREDICTOR_2]
result = fit_logit(sample_data, predictors)

# Check if result is a statsmodels LogitResultsWrapper object
assert isinstance(result, ResultsWrapper)

# Check if the result object has the expected attributes
assert hasattr(result, "params")
assert hasattr(result, "pvalues")
assert hasattr(result, "predict")

# Check that parameters (coefficients) are not empty
assert result.params is not None
assert result.pvalues is not None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to check that the parameters and values are correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the tests only confirm that the parameters and p-values are not empty or None. They do not ensure that the values are correct in a meaningful way, such as being logically reasonable or statistically valid.


# Check that the parameters (coefficients) are reasonable (e.g., non-zero)
assert np.all(np.abs(result.params) > 0) # Coefficients should not be zero

# Check that the p-values are reasonable (not NaN, not infinity)
assert np.all(np.isfinite(result.pvalues)) # P-values should be finite numbers
assert np.any(result.pvalues < 0.05) # At least one coefficient should be statistically significant (p-value < 0.05)


def test_plot_contour_map(sample_data):
"""Test the plot_contour_map function."""
fig, ax = plt.subplots()

# Fit a logit model for the purpose of plotting
predictors = [PREDICTOR_1, PREDICTOR_2]
result = fit_logit(sample_data, predictors)

# Dynamically generate grid and levels
xgrid = np.linspace(sample_data[PREDICTOR_1].min() - 1, sample_data[PREDICTOR_1].max() + 1, 50)
ygrid = np.linspace(sample_data[PREDICTOR_2].min() - 1, sample_data[PREDICTOR_2].max() + 1, 50)
levels = np.linspace(0, 1, 10)

contour_cmap = 'viridis'
dot_cmap = 'coolwarm'

# Call the plot function
contourset = plot_contour_map (
ax,
result,
sample_data,
contour_cmap,
dot_cmap,
levels,
xgrid,
ygrid,
PREDICTOR_1,
PREDICTOR_2,
base=0,
)

# Check if the contour plot is created
assert contourset is not None

# Check if the axis limits and labels are set correctly
assert ax.get_xlim() == (np.min(xgrid), np.max(xgrid))
assert ax.get_ylim() == (np.min(ygrid), np.max(ygrid))
assert ax.get_xlabel() == PREDICTOR_1
assert ax.get_ylabel() == PREDICTOR_2

# Verify that scatter plot is present by checking number of points
assert len(ax.collections) > 0
plt.close(fig)


def test_empty_data():
"""Test with empty data to ensure no errors."""
empty_df = pd.DataFrame({
'Success': [],
PREDICTOR_1: [],
PREDICTOR_2: [],
INTERACTION: []
})

predictors = [PREDICTOR_1, PREDICTOR_2]

# Check if fitting with empty data raises an error
with pytest.raises(ValueError):
fit_logit(empty_df, predictors)

# Skip plotting test if the dataframe is empty
fig, ax = plt.subplots()

# Ensure we don't try plotting with empty data
if not empty_df.empty:
erexer marked this conversation as resolved.
Show resolved Hide resolved
result = fit_logit(empty_df, predictors)
contourset = plot_contour_map(

Check warning on line 125 in msdbook/tests/test_utils.py

View check run for this annotation

Codecov / codecov/patch

msdbook/tests/test_utils.py#L124-L125

Added lines #L124 - L125 were not covered by tests
ax, result, empty_df,
'viridis', 'coolwarm', np.linspace(0, 1, 10), np.linspace(-2, 2, 50),
np.linspace(-2, 2, 50), PREDICTOR_1, PREDICTOR_2, base=0
)
assert contourset is not None

Check warning on line 130 in msdbook/tests/test_utils.py

View check run for this annotation

Codecov / codecov/patch

msdbook/tests/test_utils.py#L130

Added line #L130 was not covered by tests
else:
# Skip if no result is generated (empty DataFrame)
pass
plt.close(fig)


def test_invalid_predictors(sample_data):
"""Test with invalid predictors."""
invalid_predictors = ['InvalidPredictor1', 'InvalidPredictor2']

with pytest.raises(KeyError):
fit_logit(sample_data, invalid_predictors)


def test_logit_with_interaction(sample_data):
"""Test logistic regression with interaction term."""
sample_data[INTERACTION] = sample_data[PREDICTOR_1] * sample_data[PREDICTOR_2]
predictors = [PREDICTOR_1, PREDICTOR_2]

result = fit_logit(sample_data, predictors)

# Ensure the interaction term is included in the result
assert INTERACTION in result.params.index
23 changes: 13 additions & 10 deletions msdbook/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,30 @@
import numpy as np
import statsmodels.api as sm


def fit_logit(dta, predictors):
"""Logistic regression"""

# concatenate intercept column of 1s
# Add intercept column of 1s
dta["Intercept"] = np.ones(np.shape(dta)[0])
# get columns of predictors

# Get columns of predictors
cols = dta.columns.tolist()[-1:] + predictors + ["Interaction"]
# fit logistic regression
logit = sm.Logit(dta["Success"], dta[cols], disp=False)
result = logit.fit()


# Fit logistic regression without the deprecated 'disp' argument
logit = sm.Logit(dta["Success"], dta[cols])
result = logit.fit(method='bfgs') # Use method='bfgs' or another supported method

return result


def plot_contour_map(
ax, result, dta, contour_cmap, dot_cmap, levels, xgrid, ygrid, xvar, yvar, base
):
"""Plot the contour map"""

# TODO: see why this warning is being raised about the tight layout
# Ignore tight layout warnings
warnings.filterwarnings("ignore")

# find probability of success for x=xgrid, y=ygrid
# Generate probability of success for x=xgrid, y=ygrid
X, Y = np.meshgrid(xgrid, ygrid)
x = X.flatten()
y = Y.flatten()
Expand All @@ -36,9 +36,12 @@ def plot_contour_map(
Z = np.reshape(z, np.shape(X))

contourset = ax.contourf(X, Y, Z, levels, cmap=contour_cmap, aspect="auto")

# Plot scatter points based on the data
xpoints = np.mean(dta[xvar].values.reshape(-1, 10), axis=1)
ypoints = np.mean(dta[yvar].values.reshape(-1, 10), axis=1)
colors = np.round(np.mean(dta["Success"].values.reshape(-1, 10), axis=1), 0)

ax.scatter(xpoints, ypoints, s=10, c=colors, edgecolor="none", cmap=dot_cmap)
ax.set_xlim(np.min(xgrid), np.max(xgrid))
ax.set_ylim(np.min(ygrid), np.max(ygrid))
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ dev = [
"sphinx-book-theme>=0.2.0",
"sphinxcontrib-bibtex>=2.4.1",
"twine>=3.4.1",
"pytest>=7.0.0",
"pytest-mock>=3.10",
]

[project.urls]
Expand Down
Loading