-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dawid-Sebastiani Score #75
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import typing as tp | ||
|
||
from scoringrules.core import dss | ||
from scoringrules.core.utils import multivariate_array_check | ||
|
||
if tp.TYPE_CHECKING: | ||
from scoringrules.core.typing import Array, Backend | ||
|
||
|
||
def dawid_sebastiani_score( | ||
observations: "Array", | ||
forecasts: "Array", | ||
/, | ||
m_axis: int = -2, | ||
v_axis: int = -1, | ||
*, | ||
backend: "Backend" = None, | ||
) -> "Array": | ||
r"""Compute the Dawid-Sebastiani-Score for a finite multivariate ensemble. | ||
|
||
## TO-DO ## | ||
|
||
Parameters | ||
---------- | ||
forecasts: Array | ||
The predicted forecast ensemble, where the ensemble dimension is by default | ||
represented by the second last axis and the variables dimension by the last axis. | ||
observations: Array | ||
The observed values, where the variables dimension is by default the last axis. | ||
m_axis: int | ||
The axis corresponding to the ensemble dimension. Defaults to -2. | ||
v_axis: int | ||
The axis corresponding to the variables dimension. Defaults to -1. | ||
backend: str | ||
The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. | ||
|
||
Returns | ||
------- | ||
ds_score: Array | ||
The computed Dawid-Sebastiani-Score. | ||
""" | ||
observations, forecasts = multivariate_array_check( | ||
observations, forecasts, m_axis, v_axis, backend=backend | ||
) | ||
|
||
if backend == "numba": | ||
return dss._dss_gufunc(observations, forecasts) | ||
|
||
return dss._dss(observations, forecasts, backend=backend) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -224,6 +224,20 @@ def where(self, condition: "NDArray", x1: "NDArray", x2: "NDArray") -> "NDArray" | |
def size(self, x: "NDArray") -> int: | ||
return x.size | ||
|
||
def inv(self, x: "NDArray") -> "NDArray": | ||
return np.linalg.inv(x) | ||
|
||
def cov(self, x: "NDArray") -> "NDArray": | ||
# TODO: This deserves some axis parameter probably? | ||
if len(x.shape) == 2: | ||
return np.cov(x) | ||
else: | ||
centered = x - x.mean(axis=-2, keepdims=True) | ||
return (centered.transpose(0, 2, 1) @ centered) / (x.shape[-2] - 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. centered.transpose(0, 2, 1) assumes the array is 3D. Is there a way to generalise this to calculate the covariance matrix for higher dimensions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merry Christmas :) |
||
|
||
def det(self, x: "NDArray") -> "NDArray": | ||
return np.linalg.det(x) | ||
|
||
|
||
class NumbaBackend(NumpyBackend): | ||
"""Numba backend.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
try: | ||
from ._gufuncs import ( | ||
_dss_gufunc, | ||
) | ||
except ImportError: | ||
_dss_gufunc = None | ||
|
||
from ._score import _ds_score as _dss | ||
|
||
__all__ = ["_dss_gufunc", "_dss"] |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd be curious to see the performance benefit here compared to plain numpy. Usually numba doesn't optimize much when using a lot of numpy operations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, and the DSS is also by far not as computationlly intensive as e.g. ES or VS so I guess even if we get a good relative speed improvement the user might not feel it |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import numpy as np | ||
from numba import guvectorize | ||
|
||
|
||
@guvectorize( | ||
[ | ||
"void(float32[:], float32[:,:], float32[:])", | ||
"void(float64[:], float64[:,:], float64[:])", | ||
], | ||
"(d),(m,d)->()", | ||
) | ||
def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): | ||
M = fct.shape[0] | ||
bias = obs - (np.sum(fct, axis=0) / M) | ||
cov = np.cov(fct, rowvar=False).astype(bias.dtype) | ||
prec = np.linalg.inv(cov).astype(bias.dtype) | ||
log_det = np.log(np.linalg.det(cov)) | ||
bias_precision = bias.T @ prec @ bias | ||
out[0] = log_det + bias_precision |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import typing as tp | ||
|
||
from scoringrules.backend import backends | ||
|
||
if tp.TYPE_CHECKING: | ||
from scoringrules.core.typing import Array, Backend | ||
|
||
|
||
def _ds_score( | ||
obs: "Array", # (... D) | ||
fct: "Array", # (... M D) | ||
backend: "Backend" = None, | ||
) -> "Array": | ||
"""Compute the Dawid Sebastiani score Score for a multivariate finite ensemble.""" | ||
B = backends.active if backend is None else backends[backend] | ||
cov = B.cov(fct) | ||
precision = B.inv(cov) | ||
log_det = B.log(B.det(cov)) | ||
bias = obs - B.mean(fct, axis=-2) | ||
bias_precision = B.squeeze(bias[:, None, :] @ precision @ bias[:, :, None]) | ||
return log_det + bias_precision |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose this would look more or less the same for the other backends?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pretty much I guess. I have not so much experience in tf/torch though, so I'm not sure yet about the exact implementation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The det and inv functions should be easy/similar to define in the other backends.
For cov:
Although, as mentioned in a separate comment, the transpose function currently assumes we only have 3 dimensions.