From 647c3f88503da2f436d222d45413b1aabc2b7497 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sun, 17 Sep 2023 19:19:31 +0200 Subject: [PATCH 1/4] plot_density based on statsmodels --- docs/api.rst | 11 +++++ pointpats/__init__.py | 1 + pointpats/kde.py | 108 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 pointpats/kde.py diff --git a/docs/api.rst b/docs/api.rst index 2dcfd03..9d3d295 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -46,6 +46,17 @@ Centrography skyum dtot + +.. _density_api: + +Density +------- + +.. autosummary:: + :toctree: generated/ + + plot_density + .. _quadrat_api: Quadrat Based Statistics diff --git a/pointpats/__init__.py b/pointpats/__init__.py index 483d793..3989773 100644 --- a/pointpats/__init__.py +++ b/pointpats/__init__.py @@ -8,3 +8,4 @@ from .quadrat_statistics import * from .distance_statistics import * from .spacetime import * +from .kde import * diff --git a/pointpats/kde.py b/pointpats/kde.py new file mode 100644 index 0000000..80cfce3 --- /dev/null +++ b/pointpats/kde.py @@ -0,0 +1,108 @@ +import numpy as np + + +def plot_density( + data, bandwidth, resolution=100, levels=10, fill=False, margin=0.1, **kwargs +): + """Plot kernel density of a given point pattern + + This uses s``tatsmodels.nonparametric.KDEMultivariate`` class to create + KDE and matplotlib's ``contour`` or ``contourf` function to plot the + density. + + If MultiPoints are given, each point is treated as a separate + observation. + + Parameters + ---------- + data : array or geopandas object + Array with a shape (2, n) containing coordinates of points + or a geopandas object with (Multi)Point geometry. Assumes + projected coordinates, geographical coordinates (latitude, longitude) + are not supported. + bandwidth : float + bandwidth in the units of CRS in which data is + resolution : int | tuple(int, int), optional + resolution of the grid used to evaluate the probability density + function. If tuple, each dimension of the grid is specified separately. + By default 100 + levels : int or array-like, optional + Determines the number and positions of the contour lines / regions. + See the documentation of ``matplotlib.pyplot.contour`` for details., by default 10 + fill : bool, optional + Fill the area between contour lines, by default False + margin : float, optional + The factor of the margin by which the extent of the data will be expanded when + creating the grid. 0.1 means 10% on each side, by default 0.1. + + Returns + ------- + matplotlib.pyplot.QuadContourSet + plot + """ + try: + import statsmodels.api as sm + except ImportError as err: + raise ImportError("statsmodels is required for `plot_density`") from err + + try: + import matplotlib.pyplot as plt + except ImportError as err: + raise ImportError("matplotlib is required for `plot_density`") from err + + if isinstance(data, np.ndarray): + x = data[:, 0] + y = data[:, 1] + else: # geopandas + if not data.geom_type.str.contains("Point").all(): + raise ValueError( + "data contain non-point geometries. " + "Only (Multi)Points are supported." + ) + coords = data.get_coordinates() + x = coords.x.values + y = coords.y.values + + dens_u = sm.nonparametric.KDEMultivariate( + data=[x, y], + var_type="cc", + bw=[bandwidth, bandwidth], + ) + + xmax = x.max() + xmin = x.min() + ymax = y.max() + ymin = y.min() + + # get margin to go beyond the extent to avoid cutting of countour lines + x_margin = (xmax - xmin) * margin + y_margin = (ymax - ymin) * margin + + if isinstance(resolution, tuple): + x_res, y_res = resolution + elif isinstance(resolution, (float, int)): + x_res = resolution + y_res = resolution + elif resolution is None: + x_res = 100 + y_res = 100 + else: + raise ValueError("Unsupported option for `resolution`.") + + # create mesh for predicting KDE on with more space around the points + x_mesh, y_mesh = np.meshgrid( + np.linspace(xmin - x_margin, xmax + x_margin, x_res), + np.linspace(ymin - y_margin, ymax + y_margin, y_res), + ) + + # get the prediction + pred = dens_u.pdf(np.vstack([x_mesh.flatten(), y_mesh.flatten()]).T) + + if fill: + return plt.contourf( + x_mesh, y_mesh, pred.reshape(x_mesh.shape), levels=levels, **kwargs + ) + else: + return plt.contour( + x_mesh, y_mesh, pred.reshape(x_mesh.shape), levels=levels, **kwargs + ) From b6cdd8c1ca460c442fe1b6a3300880e322ca372c Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sun, 17 Sep 2023 20:39:20 +0200 Subject: [PATCH 2/4] fix docstring --- pointpats/kde.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pointpats/kde.py b/pointpats/kde.py index 80cfce3..78394d8 100644 --- a/pointpats/kde.py +++ b/pointpats/kde.py @@ -6,8 +6,8 @@ def plot_density( ): """Plot kernel density of a given point pattern - This uses s``tatsmodels.nonparametric.KDEMultivariate`` class to create - KDE and matplotlib's ``contour`` or ``contourf` function to plot the + This uses ``statsmodels.nonparametric.KDEMultivariate`` class to create + KDE and matplotlib's ``contour`` or ``contourf`` function to plot the density. If MultiPoints are given, each point is treated as a separate From fd7f60117141568f837667d76d7b1ff19981d8e9 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sun, 17 Sep 2023 22:12:37 +0200 Subject: [PATCH 3/4] KDEpy engine, tests --- ci/envs/310-latest.yaml | 2 + ci/envs/311-dev.yaml | 2 + ci/envs/311-latest.yaml | 2 + ci/envs/39-latest.yaml | 2 + pointpats/kde.py | 142 ++++++++++++++++++++++-------------- pointpats/tests/test_kde.py | 98 +++++++++++++++++++++++++ 6 files changed, 194 insertions(+), 54 deletions(-) create mode 100644 pointpats/tests/test_kde.py diff --git a/ci/envs/310-latest.yaml b/ci/envs/310-latest.yaml index 92d9d59..4221397 100644 --- a/ci/envs/310-latest.yaml +++ b/ci/envs/310-latest.yaml @@ -13,9 +13,11 @@ dependencies: - scikit-learn - shapely - geopandas + - statsmodels - pytest - pytest-cov - codecov - pip - pip: - opencv-contrib-python + - KDEpy diff --git a/ci/envs/311-dev.yaml b/ci/envs/311-dev.yaml index 25d7996..63156f0 100644 --- a/ci/envs/311-dev.yaml +++ b/ci/envs/311-dev.yaml @@ -23,3 +23,5 @@ dependencies: - git+https://github.com/pysal/libpysal.git - scipy - scikit-learn + - statsmodels + - KDEpy diff --git a/ci/envs/311-latest.yaml b/ci/envs/311-latest.yaml index 2d60a52..be53100 100644 --- a/ci/envs/311-latest.yaml +++ b/ci/envs/311-latest.yaml @@ -13,12 +13,14 @@ dependencies: - scikit-learn - shapely - geopandas + - statsmodels - pytest - pytest-cov - codecov - pip - pip: - opencv-contrib-python + - KDEpy # for docs build action (this env only) - nbsphinx - numpydoc diff --git a/ci/envs/39-latest.yaml b/ci/envs/39-latest.yaml index 5d905c5..18551a9 100644 --- a/ci/envs/39-latest.yaml +++ b/ci/envs/39-latest.yaml @@ -13,9 +13,11 @@ dependencies: - scikit-learn - shapely - geopandas + - statsmodels - pytest - pytest-cov - codecov - pip - pip: - opencv-contrib-python + - KDEpy diff --git a/pointpats/kde.py b/pointpats/kde.py index 78394d8..a6ccbcd 100644 --- a/pointpats/kde.py +++ b/pointpats/kde.py @@ -2,16 +2,29 @@ def plot_density( - data, bandwidth, resolution=100, levels=10, fill=False, margin=0.1, **kwargs + data, + bandwidth, + kernel=None, + resolution=100, + levels=10, + fill=False, + margin=0.1, + **kwargs, ): """Plot kernel density of a given point pattern - This uses ``statsmodels.nonparametric.KDEMultivariate`` class to create - KDE and matplotlib's ``contour`` or ``contourf`` function to plot the + The KDE can be done either using ``statsmodels.nonparametric.KDEMultivariate``, + which is used when ``kernel=None``, or using ``KDEpy.FFTKDE`` when kernel + is set. ``FFTKDE`` tends to be generally faster in most cases but may need + different than ``"gaussian"`` kernel to resolve in some cases. For small data of up + to 10 000 points, the difference is not noticeable. For larger data, specify + ``bandwidth`` to enforce the use of ``FFTKDE``. Note that while being faster, + ``FFTKDE`` may in some case result in erroneous KDE. + + KDE is plotted using matplotlib's ``contour`` or ``contourf`` function to plot the density. - If MultiPoints are given, each point is treated as a separate - observation. + If MultiPoints are given, each point is treated as separate observation. Parameters ---------- @@ -22,6 +35,10 @@ def plot_density( are not supported. bandwidth : float bandwidth in the units of CRS in which data is + kernel : str | None, optional + The kernel function. If None, defaults to the Gaussian kernel and statsmodels + implementation. If set, uses KDEpy implementation. See + ``KDEpy.FFTKDE._available_kernels.keys()`` for choices. resolution : int | tuple(int, int), optional resolution of the grid used to evaluate the probability density function. If tuple, each dimension of the grid is specified separately. @@ -33,17 +50,33 @@ def plot_density( Fill the area between contour lines, by default False margin : float, optional The factor of the margin by which the extent of the data will be expanded when - creating the grid. 0.1 means 10% on each side, by default 0.1. + creating the grid. 0.1 means 10% on each side, by default 0.1. Only used + with the ``statsmodels`` implementation. Returns ------- matplotlib.pyplot.QuadContourSet plot """ - try: - import statsmodels.api as sm - except ImportError as err: - raise ImportError("statsmodels is required for `plot_density`") from err + if kernel is None: + try: + import statsmodels.api as sm + except ImportError as err: + raise ImportError( + "statsmodels is required for `plot_density` when kernel" + "is not specified." + ) from err + + engine = "sm" + else: + try: + from KDEpy import FFTKDE + except ImportError as err: + raise ImportError( + "KDEpy is required for `plot_density` when kernel is not None." + ) from err + + engine = "kdepy" try: import matplotlib.pyplot as plt @@ -51,58 +84,59 @@ def plot_density( raise ImportError("matplotlib is required for `plot_density`") from err if isinstance(data, np.ndarray): - x = data[:, 0] - y = data[:, 1] + pass else: # geopandas if not data.geom_type.str.contains("Point").all(): raise ValueError( "data contain non-point geometries. " "Only (Multi)Points are supported." ) - coords = data.get_coordinates() - x = coords.x.values - y = coords.y.values - - dens_u = sm.nonparametric.KDEMultivariate( - data=[x, y], - var_type="cc", - bw=[bandwidth, bandwidth], - ) - - xmax = x.max() - xmin = x.min() - ymax = y.max() - ymin = y.min() - - # get margin to go beyond the extent to avoid cutting of countour lines - x_margin = (xmax - xmin) * margin - y_margin = (ymax - ymin) * margin - - if isinstance(resolution, tuple): - x_res, y_res = resolution - elif isinstance(resolution, (float, int)): - x_res = resolution - y_res = resolution - elif resolution is None: - x_res = 100 - y_res = 100 - else: - raise ValueError("Unsupported option for `resolution`.") + data = data.get_coordinates().values - # create mesh for predicting KDE on with more space around the points - x_mesh, y_mesh = np.meshgrid( - np.linspace(xmin - x_margin, xmax + x_margin, x_res), - np.linspace(ymin - y_margin, ymax + y_margin, y_res), - ) + if engine == "sm": + dens_u = sm.nonparametric.KDEMultivariate( + data=[data[:, 0], data[:, 1]], + var_type="cc", + bw=[bandwidth, bandwidth], + ) - # get the prediction - pred = dens_u.pdf(np.vstack([x_mesh.flatten(), y_mesh.flatten()]).T) + xmax = data[:, 0].max() + xmin = data[:, 0].min() + ymax = data[:, 1].max() + ymin = data[:, 1].min() - if fill: - return plt.contourf( - x_mesh, y_mesh, pred.reshape(x_mesh.shape), levels=levels, **kwargs + # get margin to go beyond the extent to avoid cutting of countour lines + x_margin = (xmax - xmin) * margin + y_margin = (ymax - ymin) * margin + + if isinstance(resolution, tuple): + x_res, y_res = resolution + elif isinstance(resolution, (float, int)): + x_res = resolution + y_res = resolution + elif resolution is None: + x_res = 100 + y_res = 100 + else: + raise ValueError("Unsupported option for `resolution`.") + + # create mesh for predicting KDE on with more space around the points + x_mesh, y_mesh = np.meshgrid( + np.linspace(xmin - x_margin, xmax + x_margin, x_res), + np.linspace(ymin - y_margin, ymax + y_margin, y_res), ) + + # get the prediction + pred = dens_u.pdf(np.vstack([x_mesh.flatten(), y_mesh.flatten()]).T) + z = pred.reshape(x_mesh.shape) + else: - return plt.contour( - x_mesh, y_mesh, pred.reshape(x_mesh.shape), levels=levels, **kwargs - ) + kde = FFTKDE(bw=bandwidth, kernel=kernel) + grid, points = kde.fit(data).evaluate(resolution) + x_mesh, y_mesh = np.unique(grid[:, 0]), np.unique(grid[:, 1]) + z = points.reshape(resolution, resolution).T + + if fill: + return plt.contourf(x_mesh, y_mesh, z, levels=levels, **kwargs) + else: + return plt.contour(x_mesh, y_mesh, z, levels=levels, **kwargs) diff --git a/pointpats/tests/test_kde.py b/pointpats/tests/test_kde.py new file mode 100644 index 0000000..60d4649 --- /dev/null +++ b/pointpats/tests/test_kde.py @@ -0,0 +1,98 @@ +import numpy as np +import pytest + +from pointpats import plot_density + +matplotlib = pytest.importorskip("matplotlib") +statsmodels = pytest.importorskip("statsmodels") +KDEpy = pytest.importorskip("KDEpy") + + +class TestDensity: + def setup_method(self): + self.points = np.array( + [ + [66.22, 32.54], + [22.52, 22.39], + [31.01, 81.21], + [9.47, 31.02], + [30.78, 60.10], + [75.21, 58.93], + [79.26, 7.68], + [8.23, 39.93], + [98.73, 77.17], + [89.78, 42.53], + [65.19, 92.08], + [54.46, 8.48], + ] + ) + + def test_default(self): + qm = plot_density(self.points, 10) + collections = list(qm.collections) + assert len(collections) == 12 + for col in collections: + assert col.get_linewidths() == np.array(1.5) + np.testing.assert_array_equal( + collections[5].get_edgecolor(), + np.array([[0.143343, 0.522773, 0.556295, 1.0]]), + ) + + def test_bandwidth(self): + qm = plot_density(self.points, 1) + collections = list(qm.collections) + assert len(collections) == 10 + + def test_resolution(self): + qm = plot_density(self.points, 10, resolution=200) + collections = list(qm.collections) + assert len(collections) == 12 + + def test_margin(self): + qm = plot_density(self.points, 10, margin=.3) + collections = list(qm.collections) + assert len(collections) == 12 + + def test_kdepy(self): + qm = plot_density(self.points, 10, kernel="gaussian") + collections = list(qm.collections) + assert len(collections) == 12 + for col in collections: + assert col.get_linewidths() == np.array(1.5) + + def test_levels(self): + qm = plot_density(self.points, 10, levels=5) + collections = list(qm.collections) + assert len(collections) == 7 + + def test_fill(self): + qm = plot_density(self.points, 10, fill=True) + collections = list(qm.collections) + assert collections[0].get_edgecolor().shape == (0, 4) + np.testing.assert_array_equal( + collections[0].get_facecolor(), + np.array([[0.279566, 0.067836, 0.391917, 1.0]]), + ) + + def test_geopandas(self): + geopandas = pytest.importorskip("geopandas") + + gs = geopandas.GeoSeries.from_xy(*self.points.T) + qm = plot_density(gs, 10) + collections = list(qm.collections) + assert len(collections) == 12 + for col in collections: + assert col.get_linewidths() == np.array(1.5) + + def test_kwargs(self): + qm = plot_density( + self.points, 10, cmap="magma", linewidths=0.5, linestyles="-." + ) + collections = list(qm.collections) + assert len(collections) == 12 + for col in collections: + assert col.get_linewidths() == np.array(0.5) + np.testing.assert_array_equal( + collections[5].get_edgecolor(), + np.array([[0.639216, 0.189921, 0.49415, 1.0]]), + ) From 26b45b356fa9a2e15d6a571e73040ed31c5b1b5a Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sun, 17 Sep 2023 23:04:34 +0200 Subject: [PATCH 4/4] better docstring --- docs/conf.py | 2 ++ pointpats/kde.py | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 9e48f0a..639b296 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -288,6 +288,8 @@ def setup(app): 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'matplotlib':("https://matplotlib.org/", None), 'opencv-contrib-python':("https://docs.opencv.org/3.4/index.html", None), + 'KDEpy':("https://kdepy.readthedocs.io/en/latest/", None), + 'statsmodels':("https://www.statsmodels.org/stable/", None), } diff --git a/pointpats/kde.py b/pointpats/kde.py index a6ccbcd..7e95a49 100644 --- a/pointpats/kde.py +++ b/pointpats/kde.py @@ -13,16 +13,16 @@ def plot_density( ): """Plot kernel density of a given point pattern - The KDE can be done either using ``statsmodels.nonparametric.KDEMultivariate``, - which is used when ``kernel=None``, or using ``KDEpy.FFTKDE`` when kernel - is set. ``FFTKDE`` tends to be generally faster in most cases but may need + The KDE can be done either using :class:`statsmodels.nonparametric.KDEMultivariate`, + which is used when ``kernel=None``, or using :class:`KDEpy.FFTKDE` when kernel is + set. :class:`~KDEpy.FFTKDE` tends to be generally faster in most cases but may need different than ``"gaussian"`` kernel to resolve in some cases. For small data of up to 10 000 points, the difference is not noticeable. For larger data, specify - ``bandwidth`` to enforce the use of ``FFTKDE``. Note that while being faster, - ``FFTKDE`` may in some case result in erroneous KDE. + ``bandwidth`` to enforce the use of :class:`~KDEpy.FFTKDE`. Note that while being + faster, :class:`~KDEpy.FFTKDE` may in some case result in erroneous KDE. - KDE is plotted using matplotlib's ``contour`` or ``contourf`` function to plot the - density. + KDE is plotted using matplotlib's :meth:`~matplotlib.pyplot.contour` or + :meth:`~matplotlib.pyplot.contourf` function to plot the density. If MultiPoints are given, each point is treated as separate observation. @@ -38,20 +38,27 @@ def plot_density( kernel : str | None, optional The kernel function. If None, defaults to the Gaussian kernel and statsmodels implementation. If set, uses KDEpy implementation. See - ``KDEpy.FFTKDE._available_kernels.keys()`` for choices. + :meth:`KDEpy.FFTKDE._available_kernels.keys()` for choices. resolution : int | tuple(int, int), optional resolution of the grid used to evaluate the probability density function. If tuple, each dimension of the grid is specified separately. By default 100 levels : int or array-like, optional Determines the number and positions of the contour lines / regions. - See the documentation of ``matplotlib.pyplot.contour`` for details., by default 10 + See the documentation of :meth:`~matplotlib.pyplot.contour` for details. + By default 10 fill : bool, optional Fill the area between contour lines, by default False margin : float, optional The factor of the margin by which the extent of the data will be expanded when creating the grid. 0.1 means 10% on each side, by default 0.1. Only used with the ``statsmodels`` implementation. + **kwargs + Keyword arguments passed to :meth:`~matplotlib.pyplot.contour` or + :meth:`~matplotlib.pyplot.contourf` used for further + styling of the plot, for example ``cmap``, ``linewidths``, ``linestyles``, + or `alpha`. See the documentation of :meth:`~matplotlib.pyplot.contour` for + details. Returns -------