Skip to content

Commit

Permalink
Merge pull request #118 from martinfleis/kde
Browse files Browse the repository at this point in the history
  • Loading branch information
knaaptime authored Sep 18, 2023
2 parents 8fd02a6 + 26b45b3 commit 0725890
Show file tree
Hide file tree
Showing 9 changed files with 269 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ci/envs/310-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ dependencies:
- scikit-learn
- shapely
- geopandas
- statsmodels
- pytest
- pytest-cov
- codecov
- pip
- pip:
- opencv-contrib-python
- KDEpy
2 changes: 2 additions & 0 deletions ci/envs/311-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ dependencies:
- git+https://github.com/pysal/libpysal.git
- scipy
- scikit-learn
- statsmodels
- KDEpy
2 changes: 2 additions & 0 deletions ci/envs/311-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ dependencies:
- scikit-learn
- shapely
- geopandas
- statsmodels
- pytest
- pytest-cov
- codecov
- pip
- pip:
- opencv-contrib-python
- KDEpy
# for docs build action (this env only)
- nbsphinx
- numpydoc
Expand Down
2 changes: 2 additions & 0 deletions ci/envs/39-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ dependencies:
- scikit-learn
- shapely
- geopandas
- statsmodels
- pytest
- pytest-cov
- codecov
- pip
- pip:
- opencv-contrib-python
- KDEpy
11 changes: 11 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ Centrography
skyum
dtot


.. _density_api:

Density
-------

.. autosummary::
:toctree: generated/

plot_density

.. _quadrat_api:

Quadrat Based Statistics
Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ def setup(app):
'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
'matplotlib':("https://matplotlib.org/", None),
'opencv-contrib-python':("https://docs.opencv.org/3.4/index.html", None),
'KDEpy':("https://kdepy.readthedocs.io/en/latest/", None),
'statsmodels':("https://www.statsmodels.org/stable/", None),
}


1 change: 1 addition & 0 deletions pointpats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from .quadrat_statistics import *
from .distance_statistics import *
from .spacetime import *
from .kde import *
149 changes: 149 additions & 0 deletions pointpats/kde.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import numpy as np


def plot_density(
data,
bandwidth,
kernel=None,
resolution=100,
levels=10,
fill=False,
margin=0.1,
**kwargs,
):
"""Plot kernel density of a given point pattern
The KDE can be done either using :class:`statsmodels.nonparametric.KDEMultivariate`,
which is used when ``kernel=None``, or using :class:`KDEpy.FFTKDE` when kernel is
set. :class:`~KDEpy.FFTKDE` tends to be generally faster in most cases but may need
different than ``"gaussian"`` kernel to resolve in some cases. For small data of up
to 10 000 points, the difference is not noticeable. For larger data, specify
``bandwidth`` to enforce the use of :class:`~KDEpy.FFTKDE`. Note that while being
faster, :class:`~KDEpy.FFTKDE` may in some case result in erroneous KDE.
KDE is plotted using matplotlib's :meth:`~matplotlib.pyplot.contour` or
:meth:`~matplotlib.pyplot.contourf` function to plot the density.
If MultiPoints are given, each point is treated as separate observation.
Parameters
----------
data : array or geopandas object
Array with a shape (2, n) containing coordinates of points
or a geopandas object with (Multi)Point geometry. Assumes
projected coordinates, geographical coordinates (latitude, longitude)
are not supported.
bandwidth : float
bandwidth in the units of CRS in which data is
kernel : str | None, optional
The kernel function. If None, defaults to the Gaussian kernel and statsmodels
implementation. If set, uses KDEpy implementation. See
:meth:`KDEpy.FFTKDE._available_kernels.keys()` for choices.
resolution : int | tuple(int, int), optional
resolution of the grid used to evaluate the probability density
function. If tuple, each dimension of the grid is specified separately.
By default 100
levels : int or array-like, optional
Determines the number and positions of the contour lines / regions.
See the documentation of :meth:`~matplotlib.pyplot.contour` for details.
By default 10
fill : bool, optional
Fill the area between contour lines, by default False
margin : float, optional
The factor of the margin by which the extent of the data will be expanded when
creating the grid. 0.1 means 10% on each side, by default 0.1. Only used
with the ``statsmodels`` implementation.
**kwargs
Keyword arguments passed to :meth:`~matplotlib.pyplot.contour` or
:meth:`~matplotlib.pyplot.contourf` used for further
styling of the plot, for example ``cmap``, ``linewidths``, ``linestyles``,
or `alpha`. See the documentation of :meth:`~matplotlib.pyplot.contour` for
details.
Returns
-------
matplotlib.pyplot.QuadContourSet
plot
"""
if kernel is None:
try:
import statsmodels.api as sm
except ImportError as err:
raise ImportError(
"statsmodels is required for `plot_density` when kernel"
"is not specified."
) from err

engine = "sm"
else:
try:
from KDEpy import FFTKDE
except ImportError as err:
raise ImportError(
"KDEpy is required for `plot_density` when kernel is not None."
) from err

engine = "kdepy"

try:
import matplotlib.pyplot as plt
except ImportError as err:
raise ImportError("matplotlib is required for `plot_density`") from err

if isinstance(data, np.ndarray):
pass
else: # geopandas
if not data.geom_type.str.contains("Point").all():
raise ValueError(
"data contain non-point geometries. "
"Only (Multi)Points are supported."
)
data = data.get_coordinates().values

if engine == "sm":
dens_u = sm.nonparametric.KDEMultivariate(
data=[data[:, 0], data[:, 1]],
var_type="cc",
bw=[bandwidth, bandwidth],
)

xmax = data[:, 0].max()
xmin = data[:, 0].min()
ymax = data[:, 1].max()
ymin = data[:, 1].min()

# get margin to go beyond the extent to avoid cutting of countour lines
x_margin = (xmax - xmin) * margin
y_margin = (ymax - ymin) * margin

if isinstance(resolution, tuple):
x_res, y_res = resolution
elif isinstance(resolution, (float, int)):
x_res = resolution
y_res = resolution
elif resolution is None:
x_res = 100
y_res = 100
else:
raise ValueError("Unsupported option for `resolution`.")

# create mesh for predicting KDE on with more space around the points
x_mesh, y_mesh = np.meshgrid(
np.linspace(xmin - x_margin, xmax + x_margin, x_res),
np.linspace(ymin - y_margin, ymax + y_margin, y_res),
)

# get the prediction
pred = dens_u.pdf(np.vstack([x_mesh.flatten(), y_mesh.flatten()]).T)
z = pred.reshape(x_mesh.shape)

else:
kde = FFTKDE(bw=bandwidth, kernel=kernel)
grid, points = kde.fit(data).evaluate(resolution)
x_mesh, y_mesh = np.unique(grid[:, 0]), np.unique(grid[:, 1])
z = points.reshape(resolution, resolution).T

if fill:
return plt.contourf(x_mesh, y_mesh, z, levels=levels, **kwargs)
else:
return plt.contour(x_mesh, y_mesh, z, levels=levels, **kwargs)
98 changes: 98 additions & 0 deletions pointpats/tests/test_kde.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import numpy as np
import pytest

from pointpats import plot_density

matplotlib = pytest.importorskip("matplotlib")
statsmodels = pytest.importorskip("statsmodels")
KDEpy = pytest.importorskip("KDEpy")


class TestDensity:
def setup_method(self):
self.points = np.array(
[
[66.22, 32.54],
[22.52, 22.39],
[31.01, 81.21],
[9.47, 31.02],
[30.78, 60.10],
[75.21, 58.93],
[79.26, 7.68],
[8.23, 39.93],
[98.73, 77.17],
[89.78, 42.53],
[65.19, 92.08],
[54.46, 8.48],
]
)

def test_default(self):
qm = plot_density(self.points, 10)
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(1.5)
np.testing.assert_array_equal(
collections[5].get_edgecolor(),
np.array([[0.143343, 0.522773, 0.556295, 1.0]]),
)

def test_bandwidth(self):
qm = plot_density(self.points, 1)
collections = list(qm.collections)
assert len(collections) == 10

def test_resolution(self):
qm = plot_density(self.points, 10, resolution=200)
collections = list(qm.collections)
assert len(collections) == 12

def test_margin(self):
qm = plot_density(self.points, 10, margin=.3)
collections = list(qm.collections)
assert len(collections) == 12

def test_kdepy(self):
qm = plot_density(self.points, 10, kernel="gaussian")
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(1.5)

def test_levels(self):
qm = plot_density(self.points, 10, levels=5)
collections = list(qm.collections)
assert len(collections) == 7

def test_fill(self):
qm = plot_density(self.points, 10, fill=True)
collections = list(qm.collections)
assert collections[0].get_edgecolor().shape == (0, 4)
np.testing.assert_array_equal(
collections[0].get_facecolor(),
np.array([[0.279566, 0.067836, 0.391917, 1.0]]),
)

def test_geopandas(self):
geopandas = pytest.importorskip("geopandas")

gs = geopandas.GeoSeries.from_xy(*self.points.T)
qm = plot_density(gs, 10)
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(1.5)

def test_kwargs(self):
qm = plot_density(
self.points, 10, cmap="magma", linewidths=0.5, linestyles="-."
)
collections = list(qm.collections)
assert len(collections) == 12
for col in collections:
assert col.get_linewidths() == np.array(0.5)
np.testing.assert_array_equal(
collections[5].get_edgecolor(),
np.array([[0.639216, 0.189921, 0.49415, 1.0]]),
)

0 comments on commit 0725890

Please sign in to comment.