Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: plot_density for KDE plotting of point patterns based on statsmodels #118

Merged
merged 4 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ci/envs/310-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ dependencies:
- scikit-learn
- shapely
- geopandas
- statsmodels
- pytest
- pytest-cov
- codecov
- pip
- pip:
- opencv-contrib-python
- KDEpy
2 changes: 2 additions & 0 deletions ci/envs/311-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ dependencies:
- git+https://github.com/pysal/libpysal.git
- scipy
- scikit-learn
- statsmodels
- KDEpy
2 changes: 2 additions & 0 deletions ci/envs/311-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ dependencies:
- scikit-learn
- shapely
- geopandas
- statsmodels
- pytest
- pytest-cov
- codecov
- pip
- pip:
- opencv-contrib-python
- KDEpy
# for docs build action (this env only)
- nbsphinx
- numpydoc
Expand Down
2 changes: 2 additions & 0 deletions ci/envs/39-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ dependencies:
- scikit-learn
- shapely
- geopandas
- statsmodels
- pytest
- pytest-cov
- codecov
- pip
- pip:
- opencv-contrib-python
- KDEpy
11 changes: 11 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ Centrography
skyum
dtot


.. _density_api:

Density
-------

.. autosummary::
:toctree: generated/

plot_density

.. _quadrat_api:

Quadrat Based Statistics
Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ def setup(app):
'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
'matplotlib':("https://matplotlib.org/", None),
'opencv-contrib-python':("https://docs.opencv.org/3.4/index.html", None),
'KDEpy':("https://kdepy.readthedocs.io/en/latest/", None),
'statsmodels':("https://www.statsmodels.org/stable/", None),
}


1 change: 1 addition & 0 deletions pointpats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from .quadrat_statistics import *
from .distance_statistics import *
from .spacetime import *
from .kde import *
149 changes: 149 additions & 0 deletions pointpats/kde.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import numpy as np


def plot_density(
data,
bandwidth,
kernel=None,
resolution=100,
levels=10,
fill=False,
margin=0.1,
**kwargs,
):
"""Plot kernel density of a given point pattern

The KDE can be done either using :class:`statsmodels.nonparametric.KDEMultivariate`,
which is used when ``kernel=None``, or using :class:`KDEpy.FFTKDE` when kernel is
set. :class:`~KDEpy.FFTKDE` tends to be generally faster in most cases but may need
different than ``"gaussian"`` kernel to resolve in some cases. For small data of up
to 10 000 points, the difference is not noticeable. For larger data, specify
``bandwidth`` to enforce the use of :class:`~KDEpy.FFTKDE`. Note that while being
faster, :class:`~KDEpy.FFTKDE` may in some case result in erroneous KDE.

KDE is plotted using matplotlib's :meth:`~matplotlib.pyplot.contour` or
:meth:`~matplotlib.pyplot.contourf` function to plot the density.

If MultiPoints are given, each point is treated as separate observation.

Parameters
----------
data : array or geopandas object
Array with a shape (2, n) containing coordinates of points
or a geopandas object with (Multi)Point geometry. Assumes
projected coordinates, geographical coordinates (latitude, longitude)
are not supported.
bandwidth : float
bandwidth in the units of CRS in which data is
kernel : str | None, optional
The kernel function. If None, defaults to the Gaussian kernel and statsmodels
implementation. If set, uses KDEpy implementation. See
:meth:`KDEpy.FFTKDE._available_kernels.keys()` for choices.
resolution : int | tuple(int, int), optional
resolution of the grid used to evaluate the probability density
function. If tuple, each dimension of the grid is specified separately.
By default 100
levels : int or array-like, optional
Determines the number and positions of the contour lines / regions.
See the documentation of :meth:`~matplotlib.pyplot.contour` for details.
By default 10
fill : bool, optional
Fill the area between contour lines, by default False
margin : float, optional
The factor of the margin by which the extent of the data will be expanded when
creating the grid. 0.1 means 10% on each side, by default 0.1. Only used
with the ``statsmodels`` implementation.
**kwargs
Keyword arguments passed to :meth:`~matplotlib.pyplot.contour` or
:meth:`~matplotlib.pyplot.contourf` used for further
styling of the plot, for example ``cmap``, ``linewidths``, ``linestyles``,
or `alpha`. See the documentation of :meth:`~matplotlib.pyplot.contour` for
details.

martinfleis marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
matplotlib.pyplot.QuadContourSet
plot
"""
if kernel is None:
try:
import statsmodels.api as sm
except ImportError as err:
raise ImportError(
"statsmodels is required for `plot_density` when kernel"
"is not specified."
) from err

engine = "sm"
else:
try:
from KDEpy import FFTKDE
except ImportError as err:
raise ImportError(
"KDEpy is required for `plot_density` when kernel is not None."
) from err

engine = "kdepy"

try:
import matplotlib.pyplot as plt
except ImportError as err:
raise ImportError("matplotlib is required for `plot_density`") from err

if isinstance(data, np.ndarray):
pass
else: # geopandas
if not data.geom_type.str.contains("Point").all():
raise ValueError(
"data contain non-point geometries. "
"Only (Multi)Points are supported."
)
data = data.get_coordinates().values

if engine == "sm":
dens_u = sm.nonparametric.KDEMultivariate(
data=[data[:, 0], data[:, 1]],
var_type="cc",
bw=[bandwidth, bandwidth],
)

xmax = data[:, 0].max()
xmin = data[:, 0].min()
ymax = data[:, 1].max()
ymin = data[:, 1].min()

# get margin to go beyond the extent to avoid cutting of countour lines
x_margin = (xmax - xmin) * margin
y_margin = (ymax - ymin) * margin

if isinstance(resolution, tuple):
x_res, y_res = resolution
elif isinstance(resolution, (float, int)):
x_res = resolution
y_res = resolution
elif resolution is None:
x_res = 100
y_res = 100
else:
raise ValueError("Unsupported option for `resolution`.")

# create mesh for predicting KDE on with more space around the points
x_mesh, y_mesh = np.meshgrid(
np.linspace(xmin - x_margin, xmax + x_margin, x_res),
np.linspace(ymin - y_margin, ymax + y_margin, y_res),
)

# get the prediction
pred = dens_u.pdf(np.vstack([x_mesh.flatten(), y_mesh.flatten()]).T)
z = pred.reshape(x_mesh.shape)

else:
kde = FFTKDE(bw=bandwidth, kernel=kernel)
grid, points = kde.fit(data).evaluate(resolution)
x_mesh, y_mesh = np.unique(grid[:, 0]), np.unique(grid[:, 1])
z = points.reshape(resolution, resolution).T

if fill:
return plt.contourf(x_mesh, y_mesh, z, levels=levels, **kwargs)
else:
return plt.contour(x_mesh, y_mesh, z, levels=levels, **kwargs)
98 changes: 98 additions & 0 deletions pointpats/tests/test_kde.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import numpy as np
import pytest

from pointpats import plot_density

matplotlib = pytest.importorskip("matplotlib")
statsmodels = pytest.importorskip("statsmodels")
KDEpy = pytest.importorskip("KDEpy")


class TestDensity:
def setup_method(self):
self.points = np.array(
[
[66.22, 32.54],
[22.52, 22.39],
[31.01, 81.21],
[9.47, 31.02],
[30.78, 60.10],
[75.21, 58.93],
[79.26, 7.68],
[8.23, 39.93],
[98.73, 77.17],
[89.78, 42.53],
[65.19, 92.08],
[54.46, 8.48],
]
)

def test_default(self):
qm = plot_density(self.points, 10)
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(1.5)
np.testing.assert_array_equal(
collections[5].get_edgecolor(),
np.array([[0.143343, 0.522773, 0.556295, 1.0]]),
)

def test_bandwidth(self):
qm = plot_density(self.points, 1)
collections = list(qm.collections)
assert len(collections) == 10

def test_resolution(self):
qm = plot_density(self.points, 10, resolution=200)
collections = list(qm.collections)
assert len(collections) == 12

def test_margin(self):
qm = plot_density(self.points, 10, margin=.3)
collections = list(qm.collections)
assert len(collections) == 12

def test_kdepy(self):
qm = plot_density(self.points, 10, kernel="gaussian")
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(1.5)

def test_levels(self):
qm = plot_density(self.points, 10, levels=5)
collections = list(qm.collections)
assert len(collections) == 7

def test_fill(self):
qm = plot_density(self.points, 10, fill=True)
collections = list(qm.collections)
assert collections[0].get_edgecolor().shape == (0, 4)
np.testing.assert_array_equal(
collections[0].get_facecolor(),
np.array([[0.279566, 0.067836, 0.391917, 1.0]]),
)

def test_geopandas(self):
geopandas = pytest.importorskip("geopandas")

gs = geopandas.GeoSeries.from_xy(*self.points.T)
qm = plot_density(gs, 10)
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(1.5)

def test_kwargs(self):
qm = plot_density(
self.points, 10, cmap="magma", linewidths=0.5, linestyles="-."
)
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(0.5)
np.testing.assert_array_equal(
collections[5].get_edgecolor(),
np.array([[0.639216, 0.189921, 0.49415, 1.0]]),
)