From 64fb9e079aee4703eae04aae1d82997888491793 Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Wed, 18 Sep 2024 14:09:36 +0200 Subject: [PATCH] Initial version of the DSS --- scoringrules/__init__.py | 2 ++ scoringrules/_dawid_sebastiani.py | 49 +++++++++++++++++++++++++++++++ scoringrules/backend/numpy.py | 14 +++++++++ scoringrules/core/dss/__init__.py | 10 +++++++ scoringrules/core/dss/_gufuncs.py | 19 ++++++++++++ scoringrules/core/dss/_score.py | 21 +++++++++++++ 6 files changed, 115 insertions(+) create mode 100644 scoringrules/_dawid_sebastiani.py create mode 100644 scoringrules/core/dss/__init__.py create mode 100644 scoringrules/core/dss/_gufuncs.py create mode 100644 scoringrules/core/dss/_score.py diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py index 6e27063..ddeabda 100644 --- a/scoringrules/__init__.py +++ b/scoringrules/__init__.py @@ -43,6 +43,7 @@ twenergy_score, vrenergy_score, ) +from scoringrules._dawid_sebastiani import dawid_sebastiani_score from scoringrules._error_spread import error_spread_score from scoringrules._interval import interval_score, weighted_interval_score from scoringrules._logs import ( @@ -113,6 +114,7 @@ "crps_quantile", "crps_t", "crps_uniform", + "dawid_sebastiani_score", "owcrps_ensemble", "twcrps_ensemble", "vrcrps_ensemble", diff --git a/scoringrules/_dawid_sebastiani.py b/scoringrules/_dawid_sebastiani.py new file mode 100644 index 0000000..27f647e --- /dev/null +++ b/scoringrules/_dawid_sebastiani.py @@ -0,0 +1,49 @@ +import typing as tp + +from scoringrules.core import dss +from scoringrules.core.utils import multivariate_array_check + +if tp.TYPE_CHECKING: + from scoringrules.core.typing import Array, Backend + + +def dawid_sebastiani_score( + observations: "Array", + forecasts: "Array", + /, + m_axis: int = -2, + v_axis: int = -1, + *, + backend: "Backend" = None, +) -> "Array": + r"""Compute the Dawid-Sebastiani-Score for a finite multivariate ensemble. + + ## TO-DO ## + + Parameters + ---------- + forecasts: Array + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: Array + The observed values, where the variables dimension is by default the last axis. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + ds_score: Array + The computed Dawid-Sebastiani-Score. + """ + observations, forecasts = multivariate_array_check( + observations, forecasts, m_axis, v_axis, backend=backend + ) + + if backend == "numba": + return dss._dss_gufunc(observations, forecasts) + + return dss._dss(observations, forecasts, backend=backend) diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py index 694b934..75c0cb0 100644 --- a/scoringrules/backend/numpy.py +++ b/scoringrules/backend/numpy.py @@ -224,6 +224,20 @@ def where(self, condition: "NDArray", x1: "NDArray", x2: "NDArray") -> "NDArray" def size(self, x: "NDArray") -> int: return x.size + def inv(self, x: "NDArray") -> "NDArray": + return np.linalg.inv(x) + + def cov(self, x: "NDArray") -> "NDArray": + # TODO: This deserves some axis parameter probably? + if len(x.shape) == 2: + return np.cov(x) + else: + centered = x - x.mean(axis=-2, keepdims=True) + return (centered.transpose(0, 2, 1) @ centered) / (x.shape[-2] - 1) + + def det(self, x: "NDArray") -> "NDArray": + return np.linalg.det(x) + class NumbaBackend(NumpyBackend): """Numba backend.""" diff --git a/scoringrules/core/dss/__init__.py b/scoringrules/core/dss/__init__.py new file mode 100644 index 0000000..13de158 --- /dev/null +++ b/scoringrules/core/dss/__init__.py @@ -0,0 +1,10 @@ +try: + from ._gufuncs import ( + _dss_gufunc, + ) +except ImportError: + _dss_gufunc = None + +from ._score import _ds_score as _dss + +__all__ = ["_dss_gufunc", "_dss"] diff --git a/scoringrules/core/dss/_gufuncs.py b/scoringrules/core/dss/_gufuncs.py new file mode 100644 index 0000000..1697c12 --- /dev/null +++ b/scoringrules/core/dss/_gufuncs.py @@ -0,0 +1,19 @@ +import numpy as np +from numba import guvectorize + + +@guvectorize( + [ + "void(float32[:], float32[:,:], float32[:])", + "void(float64[:], float64[:,:], float64[:])", + ], + "(d),(m,d)->()", +) +def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): + M = fct.shape[0] + bias = obs - (np.sum(fct, axis=0) / M) + cov = np.cov(fct, rowvar=False).astype(bias.dtype) + prec = np.linalg.inv(cov).astype(bias.dtype) + log_det = np.log(np.linalg.det(cov)) + bias_precision = bias.T @ prec @ bias + out[0] = log_det + bias_precision diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py new file mode 100644 index 0000000..58acf0d --- /dev/null +++ b/scoringrules/core/dss/_score.py @@ -0,0 +1,21 @@ +import typing as tp + +from scoringrules.backend import backends + +if tp.TYPE_CHECKING: + from scoringrules.core.typing import Array, Backend + + +def _ds_score( + obs: "Array", # (... D) + fct: "Array", # (... M D) + backend: "Backend" = None, +) -> "Array": + """Compute the Dawid Sebastiani score Score for a multivariate finite ensemble.""" + B = backends.active if backend is None else backends[backend] + cov = B.cov(fct) + precision = B.inv(cov) + log_det = B.log(B.det(cov)) + bias = obs - B.mean(fct, axis=-2) + bias_precision = B.squeeze(bias[:, None, :] @ precision @ bias[:, :, None]) + return log_det + bias_precision