diff --git a/docs/api/crps/weighted.md b/docs/api/crps/weighted.md new file mode 100644 index 0000000..ce90cc8 --- /dev/null +++ b/docs/api/crps/weighted.md @@ -0,0 +1,3 @@ +::: scoringrules.owcrps_ensemble +::: scoringrules.twcrps_ensemble +::: scoringrules.vrcrps_ensemble \ No newline at end of file diff --git a/docs/api/energy.md b/docs/api/energy.md deleted file mode 100644 index 4edbe76..0000000 --- a/docs/api/energy.md +++ /dev/null @@ -1,3 +0,0 @@ -# Energy Score - -::: scoringrules.energy_score diff --git a/docs/api/energy/ensemble.md b/docs/api/energy/ensemble.md new file mode 100644 index 0000000..6430394 --- /dev/null +++ b/docs/api/energy/ensemble.md @@ -0,0 +1 @@ +::: scoringrules.energy_score \ No newline at end of file diff --git a/docs/api/energy/index.md b/docs/api/energy/index.md new file mode 100644 index 0000000..68f8bb3 --- /dev/null +++ b/docs/api/energy/index.md @@ -0,0 +1,74 @@ +# Energy Score + +The energy score (ES) is a scoring rule for evaluating multivariate probabilistic forecasts. +It is defined as + +$$\text{ES}(F, \mathbf{y})= \mathbb{E} \| \mathbf{X} - \mathbf{y} \| - \frac{1}{2} \mathbb{E} \| \mathbf{X} - \mathbf{X}^{\prime} \|, $$ + +where $\mathbf{y} \in \mathbb{R}^{d}$ is the multivariate observation ($d > 1$), and +$\mathbf{X}$ and $\mathbf{X}^{\prime}$ are independent random variables that follow the +multivariate forecast distribution $F$ (Gneiting and Raftery, 2007)[@gneiting_strictly_2007]. +If the dimension $d$ were equal to one, the energy score would reduce to the continuous ranked probability score (CRPS). + +

+ +## Ensemble forecasts + +While multivariate probabilistic forecasts could belong to a parametric family of +distributions, such as a multivariate normal distribution, it is more common in practice +that these forecasts are ensemble forecasts; that is, the forecast is comprised of a +predictive sample $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$, +where each ensemble member $\mathbf{x}_{1}, \dots, \mathbf{x}_{M} \in \R^{d}$. + +In this case, the expectations in the definition of the energy score can be replaced by +sample means over the ensemble members, yielding the following representation of the energy +score when evaluating an ensemble forecast $F_{ens}$ with $M$ members: + +$$\text{ES}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} - \mathbf{y} \| - \frac{1}{2 M^{2}} \sum_{m=1}^{M} \sum_{j=1}^{M} \| \mathbf{x}_{m} - \mathbf{x}_{j} \|. $$ + +

+ +## Weighted energy scores + +The energy score provides a measure of overall forecast performance. However, it is often +the case that certain outcomes are of more interest than others, making it desirable to +assign more weight to these outcomes when evaluating forecast performance. This can be +achieved using weighted scoring rules. Weighted scoring rules typically introduce a +weight function into conventional scoring rules, and users can choose the weight function +depending on what outcomes they want to emphasise. Allen et al. (2022)[@allen2022evaluating] +discuss three weighted versions of the energy score. These are all available in `scoringrules`. + +Firstly, the outcome-weighted energy score (originally introduced by Holzmann and Klar (2014)[@holzmann2017focusing]) +is defined as + +$$\text{owES}(F, \mathbf{y}; w)= \frac{1}{\bar{w}} \mathbb{E} \| \mathbf{X} - \mathbf{y} \| w(\mathbf{X}) w(\mathbf{y}) - \frac{1}{2 \bar{w}^{2}} \mathbb{E} \| \mathbf{X} - \mathbf{X}^{\prime} \| w(\mathbf{X})w(\mathbf{X}^{\prime})w(\mathbf{y}), $$ + +where $w : \mathbb{R}^{d} \to [0, \infty)$ is the non-negative weight function used to +target particular multivariate outcomes, and $\bar{w} = \mathbb{E}[w(X)]$. +As before, $\mathbf{X}, \mathbf{X}^{\prime} \sim F$ are independent. + +Secondly, Allen et al. (2022) introduced the threshold-weighted energy score as + +$$\text{twES}(F, \mathbf{y}; v)= \mathbb{E} \| v(\mathbf{X}) - v(\mathbf{y}) \| - \frac{1}{2} \mathbb{E} \| v(\mathbf{X}) - v(\mathbf{X}^{\prime}) \|, $$ + +where $v : \mathbb{R}^{d} \to \mathbb{R}^{d}$ is a so-called chaining function. +The threshold-weighted energy score transforms the forecasts and observations according +to the chaining function $v$, prior to calculating the unweighted energy score. Choosing +a chaining function is generally more difficult than choosing a weight function when +emphasising particular outcomes. + +As an alternative, the vertically re-scaled energy score is defined as + +$$\text{vrES}(F, \mathbf{y}; w, \mathbf{x}_{0})= \mathbb{E} \| \mathbf{X} - \mathbf{y} \| w(\mathbf{X}) w(\mathbf{y}) - \frac{1}{2} \mathbb{E} \| \mathbf{X} - \mathbf{X}^{\prime} \| w(\mathbf{X})w(\mathbf{X}^{\prime}) + \left( \mathbb{E} \| \mathbf{X} - \mathbf{x}_{0} \| w(\mathbf{X}) - \| \mathbf{y} - \mathbf{x}_{0} \| w(\mathbf{y}) \right) \left(\mathbb{E}[w(\mathbf{X})] - w(\mathbf{y}) \right), $$ + +where $w : \mathbb{R}^{d} \to [0, \infty)$ is the non-negative weight function used to +target particular multivariate outcomes, and $\mathbf{x}_{0} \in \mathbb{R}^{d}$. Typically, +$\mathbf{x}_{0}$ is chosen to be zero. + +Each of these weighted energy scores targets particular outcomes in a different way. +Further details regarding the differences between these scoring rules, as well as choices +for the weight and chaining functions, can be found in Allen et al. (2022). The weighted +energy scores can easily be computed for ensemble forecasts by +replacing the expectations with sample means over the ensemble members. + +

\ No newline at end of file diff --git a/docs/api/energy/weighted.md b/docs/api/energy/weighted.md new file mode 100644 index 0000000..2fd5924 --- /dev/null +++ b/docs/api/energy/weighted.md @@ -0,0 +1,3 @@ +::: scoringrules.owenergy_score +::: scoringrules.twenergy_score +::: scoringrules.vrenergy_score \ No newline at end of file diff --git a/docs/api/variogram.md b/docs/api/variogram.md deleted file mode 100644 index 7a64c67..0000000 --- a/docs/api/variogram.md +++ /dev/null @@ -1,3 +0,0 @@ -# Variogram Score [@scheuerer_variogram-based_2015] - -::: scoringrules.variogram_score diff --git a/docs/api/variogram/ensemble.md b/docs/api/variogram/ensemble.md new file mode 100644 index 0000000..d6683de --- /dev/null +++ b/docs/api/variogram/ensemble.md @@ -0,0 +1 @@ +::: scoringrules.variogram_score diff --git a/docs/api/variogram/index.md b/docs/api/variogram/index.md new file mode 100644 index 0000000..f0caa19 --- /dev/null +++ b/docs/api/variogram/index.md @@ -0,0 +1,84 @@ +# Variogram Score + +The varigoram score (VS) is a scoring rule for evaluating multivariate probabilistic forecasts. +It is defined as + +$$\text{VS}_{p}(F, \mathbf{y})= \sum_{i=1}^{d} \sum_{j=1}^{d} \left( \mathbb{E} | X_{i} - X_{j} |^{p} - | y_{i} - y_{j} |^{p} \right)^{2}, $$ + +where $p > 0$, $\mathbf{y} = (y_{1}, \dots, y_{d}) \in \mathbb{R}^{d}$ is the multivariate observation ($d > 1$), and +$\mathbf{X} = (X_{1}, \dots, X_{d})$ is a random vector that follows the +multivariate forecast distribution $F$ (Scheuerer and Hamill, 2015)[@scheuerer_variogram-based_2015]. +The exponent $p$ is typically chosen to be 0.5 or 1. + +The variogram score is less sensitive to marginal forecast performance than the energy score, +and Scheuerer and Hamill (2015) argue that it should therefore be more sensitive to errors in the +forecast's dependence structure. + +

+ +## Ensemble forecasts + +While multivariate probabilistic forecasts could belong to a parametric family of +distributions, such as a multivariate normal distribution, it is more common in practice +that these forecasts are ensemble forecasts; that is, the forecast is comprised of a +predictive sample $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$, +where each ensemble member $\mathbf{x}_{i} = (x_{i, 1}, \dots, x_{i, d}) \in \R^{d}$ for +$i = 1, \dots, M$. + +In this case, the expectation in the definition of the variogram score can be replaced by +a sample mean over the ensemble members, yielding the following representation of the variogram +score when evaluating an ensemble forecast $F_{ens}$ with $M$ members: + +$$\text{VS}_{p}(F_{ens}, \mathbf{y})= \sum_{i=1}^{d} \sum_{j=1}^{d} \left( \frac{1}{M} \sum_{m=1}^{M} | x_{m,i} - x_{m,j} |^{p} - | y_{i} - y_{j} |^{p} \right)^{2}. $$ + +

+ +## Weighted variogram scores + +It is often the case that certain outcomes are of more interest than others when evaluating +forecast performance. These outcomes can be emphasised by employing weighted scoring rules. +Weighted scoring rules typically introduce a weight function into conventional scoring rules, +and users can choose the weight function depending on what outcomes they want to emphasise. +Allen et al. (2022)[@allen2022evaluating] introduced three weighted versions of the variogram score. +These are all available in `scoringrules`. + +Firstly, the outcome-weighted variogram score (see also Holzmann and Klar (2014)[@holzmann2017focusing]) +is defined as + +$$\text{owVS}_{p}(F, \mathbf{y}; w) = \frac{1}{\bar{w}} \mathbb{E} [ \rho_{p}(\mathbf{X}, \mathbf{y}) w(\mathbf{X}) w(\mathbf{y}) ] - \frac{1}{2 \bar{w}^{2}} \mathbb{E} [ \rho_{p}(\mathbf{X}, \mathbf{X}^{\prime}) w(\mathbf{X}) w(\mathbf{X}^{\prime}) w(\mathbf{y}) ], $$ + +where + +$$ \rho_{p}(\mathbf{x}, \mathbf{z}) = \sum_{i=1}^{d} \sum_{j=1}^{d} \left( |x_{i} - x_{j}|^{p} - |z_{i} - z_{j}|^{p} \right)^{2}, $$ + +for $\mathbf{x} = (x_{1}, \dots, x_{d}) \in \mathbb{R}^{d}$ and $\mathbf{z} = (z_{1}, \dots, z_{d}) \in \mathbb{R}^{d}$. + +Here, $w : \mathbb{R}^{d} \to [0, \infty)$ is the non-negative weight function used to +target particular multivariate outcomes, and $\bar{w} = \mathbb{E}[w(X)]$. +As before, $\mathbf{X}, \mathbf{X}^{\prime} \sim F$ are independent. + +Secondly, Allen et al. (2022) introduced the threshold-weighted variogram score as + +$$\text{twVS}_{p}(F, \mathbf{y}; v)= \sum_{i=1}^{d} \sum_{j=1}^{d} \left( \mathbb{E} | v(\mathbf{X})_{i} - v(\mathbf{X})_{j} |^{p} - | v(\mathbf{y})_{i} - v(\mathbf{y})_{j} |^{p} \right)^{2}, $$ + +where $v : \mathbb{R}^{d} \to \mathbb{R}^{d}$ is a so-called chaining function, so that +$v(\mathbf{X}) = (v(\mathbf{X})_{1}, \dots, v(\mathbf{X})_{d}) \in \mathbb{R}^{d}$. +The threshold-weighted variogram score transforms the forecasts and observations according +to the chaining function $v$, prior to calculating the unweighted variogram score. Choosing +a chaining function is generally more difficult than choosing a weight function when +emphasising particular outcomes. + +As an alternative, the vertically re-scaled variogram score is defined as + +$$\text{vrVS}_{p}(F, \mathbf{y}; w) = \mathbb{E} [ \rho_{p}(\mathbf{X}, \mathbf{y}) w(\mathbf{X}) w(\mathbf{y}) ] - \frac{1}{2} \mathbb{E} [ \rho_{p}(\mathbf{X}, \mathbf{X}^{\prime}) w(\mathbf{X}) w(\mathbf{X}^{\prime}) ] + \left( \mathbb{E} [ \rho_{p} ( \mathbf{X}, \mathbf{x}_{0} ) w(\mathbf{X}) ] - \rho_{p} ( \mathbf{y}, \mathbf{x}_{0}) w(\mathbf{y}) \right) \left(\mathbb{E}[w(\mathbf{X})] - w(\mathbf{y}) \right), $$ + +where $w$ and $\rho_{p}$ are as defined above, and $\mathbf{x}_{0} \in \mathbb{R}^{d}$. +Typically, $\mathbf{x}_{0}$ is chosen to be the zero vector. + +Each of these weighted variogram scores targets particular outcomes in a different way. +Further details regarding the differences between these scoring rules, as well as choices +for the weight and chaining functions, can be found in Allen et al. (2022). The weighted +variogram scores can easily be computed for ensemble forecasts by +replacing the expectations with sample means over the ensemble members. + +

\ No newline at end of file diff --git a/docs/api/variogram/weighted.md b/docs/api/variogram/weighted.md new file mode 100644 index 0000000..16f4202 --- /dev/null +++ b/docs/api/variogram/weighted.md @@ -0,0 +1,3 @@ +::: scoringrules.owvariogram_score +::: scoringrules.twvariogram_score +::: scoringrules.vrvariogram_score \ No newline at end of file diff --git a/docs/refs.bib b/docs/refs.bib index 263b4a5..39c842f 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -1,5 +1,30 @@ +@article{allen2022evaluating, + title={Evaluating forecasts for high-impact events using transformed kernel scores}, + author={Allen, Sam and Ginsbourger, David and Ziegel, Johanna}, + journal={arXiv preprint arXiv:2202.12732}, + year={2022} +} + +@article{gneiting_strictly_2007, + title = {Strictly {Proper} {Scoring} {Rules}, {Prediction}, and {Estimation}}, + url = {https://doi.org/10.1198/016214506000001437}, + doi = {10.1198/016214506000001437}, + abstract = {Scoring rules assess the quality of probabilistic forecasts, by assigning a numerical score based on the predictive distribution and on the event or value that materializes. A scoring rule is proper if the forecaster maximizes the expected score for an observation drawn from the distributionF if he or she issues the probabilistic forecast F, rather than G ≠ F. It is strictly proper if the maximum is unique. In prediction problems, proper scoring rules encourage the forecaster to make careful assessments and to be honest. In estimation problems, strictly proper scoring rules provide attractive loss and utility functions that can be tailored to the problem at hand. This article reviews and develops the theory of proper scoring rules on general probability spaces, and proposes and discusses examples thereof. Proper scoring rules derive from convex functions and relate to information measures, entropy functions, and Bregman divergences. In the case of categorical variables, we prove a rigorous version of the Savage representation. Examples of scoring rules for probabilistic forecasts in the form of predictive densities include the logarithmic, spherical, pseudospherical, and quadratic scores. The continuous ranked probability score applies to probabilistic forecasts that take the form of predictive cumulative distribution functions. It generalizes the absolute error and forms a special case of a new and very general type of score, the energy score. Like many other scoring rules, the energy score admits a kernel representation in terms of negative definite functions, with links to inequalities of Hoeffding type, in both univariate and multivariate settings. Proper scoring rules for quantile and interval forecasts are also discussed. We relate proper scoring rules to Bayes factors and to cross-validation, and propose a novel form of cross-validation known as random-fold cross-validation. A case study on probabilistic weather forecasts in the North American Pacific Northwest illustrates the importance of propriety. We note optimum score approaches to point and quantile estimation, and propose the intuitively appealing interval score as a utility function in interval estimation that addresses width as well as coverage.}, + journal = {Journal of the American Statistical Association}, + author = {Gneiting, Tilmann and Raftery, Adrian E}, + year = {2007}, +} +@article{holzmann2017focusing, + author = {Holzmann, Hajo and Klar, Bernhard}, + journal = {The Annals of Applied Statistics}, + pages = {2404--2431}, + publisher = {Institute of Mathematical Statistics}, + title = {Focusing on regions of interest in forecast evaluation}, + volume = {11}, + year = {2017} +} @phdthesis{jordan_facets_2016, author = {Jordan, Alexander}, @@ -13,20 +38,16 @@ @phdthesis{jordan_facets_2016 language = {english} } - - -@article{gneiting_strictly_2007, - title = {Strictly {Proper} {Scoring} {Rules}, {Prediction}, and {Estimation}}, - url = {https://doi.org/10.1198/016214506000001437}, - doi = {10.1198/016214506000001437}, - abstract = {Scoring rules assess the quality of probabilistic forecasts, by assigning a numerical score based on the predictive distribution and on the event or value that materializes. A scoring rule is proper if the forecaster maximizes the expected score for an observation drawn from the distributionF if he or she issues the probabilistic forecast F, rather than G ≠ F. It is strictly proper if the maximum is unique. In prediction problems, proper scoring rules encourage the forecaster to make careful assessments and to be honest. In estimation problems, strictly proper scoring rules provide attractive loss and utility functions that can be tailored to the problem at hand. This article reviews and develops the theory of proper scoring rules on general probability spaces, and proposes and discusses examples thereof. Proper scoring rules derive from convex functions and relate to information measures, entropy functions, and Bregman divergences. In the case of categorical variables, we prove a rigorous version of the Savage representation. Examples of scoring rules for probabilistic forecasts in the form of predictive densities include the logarithmic, spherical, pseudospherical, and quadratic scores. The continuous ranked probability score applies to probabilistic forecasts that take the form of predictive cumulative distribution functions. It generalizes the absolute error and forms a special case of a new and very general type of score, the energy score. Like many other scoring rules, the energy score admits a kernel representation in terms of negative definite functions, with links to inequalities of Hoeffding type, in both univariate and multivariate settings. Proper scoring rules for quantile and interval forecasts are also discussed. We relate proper scoring rules to Bayes factors and to cross-validation, and propose a novel form of cross-validation known as random-fold cross-validation. A case study on probabilistic weather forecasts in the North American Pacific Northwest illustrates the importance of propriety. We note optimum score approaches to point and quantile estimation, and propose the intuitively appealing interval score as a utility function in interval estimation that addresses width as well as coverage.}, - journal = {Journal of the American Statistical Association}, - author = {Gneiting, Tilmann and Raftery, Adrian E}, - year = {2007}, +@article{scheuerer_variogram-based_2015, + title = {Variogram-{Based} {Proper} {Scoring} {Rules} for {Probabilistic} {Forecasts} of {Multivariate} {Quantities}}, + url = {https://journals.ametsoc.org/view/journals/mwre/143/4/mwr-d-14-00269.1.xml}, + doi = {10.1175/MWR-D-14-00269.1}, + abstract = {Abstract Proper scoring rules provide a theoretically principled framework for the quantitative assessment of the predictive performance of probabilistic forecasts. While a wide selection of such scoring rules for univariate quantities exists, there are only few scoring rules for multivariate quantities, and many of them require that forecasts are given in the form of a probability density function. The energy score, a multivariate generalization of the continuous ranked probability score, is the only commonly used score that is applicable in the important case of ensemble forecasts, where the multivariate predictive distribution is represented by a finite sample. Unfortunately, its ability to detect incorrectly specified correlations between the components of the multivariate quantity is somewhat limited. In this paper the authors present an alternative class of proper scoring rules based on the geostatistical concept of variograms. The sensitivity of these variogram-based scoring rules to incorrectly predicted means, variances, and correlations is studied in a number of examples with simulated observations and forecasts; they are shown to be distinctly more discriminative with respect to the correlation structure. This conclusion is confirmed in a case study with postprocessed wind speed forecasts at five wind park locations in Colorado.}, + journal = {Monthly Weather Review}, + author = {Scheuerer, Michael and Hamill, Thomas M.}, + year = {2015}, } - - @article{taillardat_calibrated_2016, title = {Calibrated {Ensemble} {Forecasts} {Using} {Quantile} {Regression} {Forests} and {Ensemble} {Model} {Output} {Statistics}}, url = {http://journals.ametsoc.org/doi/10.1175/MWR-D-15-0260.1}, @@ -39,9 +60,6 @@ @article{taillardat_calibrated_2016 year = {2016}, } - - - @article{zamo_estimation_2018, title = {Estimation of the {Continuous} {Ranked} {Probability} {Score} with {Limited} {Information} and {Applications} to {Ensemble} {Weather} {Forecasts}}, url = {https://doi.org/10.1007/s11004-017-9709-7}, @@ -50,15 +68,4 @@ @article{zamo_estimation_2018 journal = {Mathematical Geosciences}, author = {Zamo, Michaël and Naveau, Philippe}, year = {2018}, -} - - -@article{scheuerer_variogram-based_2015, - title = {Variogram-{Based} {Proper} {Scoring} {Rules} for {Probabilistic} {Forecasts} of {Multivariate} {Quantities}}, - url = {https://journals.ametsoc.org/view/journals/mwre/143/4/mwr-d-14-00269.1.xml}, - doi = {10.1175/MWR-D-14-00269.1}, - abstract = {Abstract Proper scoring rules provide a theoretically principled framework for the quantitative assessment of the predictive performance of probabilistic forecasts. While a wide selection of such scoring rules for univariate quantities exists, there are only few scoring rules for multivariate quantities, and many of them require that forecasts are given in the form of a probability density function. The energy score, a multivariate generalization of the continuous ranked probability score, is the only commonly used score that is applicable in the important case of ensemble forecasts, where the multivariate predictive distribution is represented by a finite sample. Unfortunately, its ability to detect incorrectly specified correlations between the components of the multivariate quantity is somewhat limited. In this paper the authors present an alternative class of proper scoring rules based on the geostatistical concept of variograms. The sensitivity of these variogram-based scoring rules to incorrectly predicted means, variances, and correlations is studied in a number of examples with simulated observations and forecasts; they are shown to be distinctly more discriminative with respect to the correlation structure. This conclusion is confirmed in a case study with postprocessed wind speed forecasts at five wind park locations in Colorado.}, - journal = {Monthly Weather Review}, - author = {Scheuerer, Michael and Hamill, Thomas M.}, - year = {2015}, -} +} \ No newline at end of file diff --git a/mkdocs.yaml b/mkdocs.yaml index a8f3c16..35329c8 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -11,9 +11,16 @@ nav: - api/crps/index.md - Ensemble-based estimation: api/crps/ensemble.md - Analytical formulations: api/crps/analytical.md + - Weighted versions: api/crps/weighted.md - Logarithmic Score: api/logarithmic.md - - Energy Score: api/energy.md - - Variogram Score: api/variogram.md + - Energy Score: + - api/energy/index.md + - Ensemble-based estimation: api/energy/ensemble.md + - Weighted versions: api/energy/weighted.md + - Variogram Score: + - api/variogram/index.md + - Ensemble-based estimation: api/variogram/ensemble.md + - Weighted versions: api/variogram/weighted.md theme: diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py index 5ea2de6..1b3bd5f 100644 --- a/scoringrules/__init__.py +++ b/scoringrules/__init__.py @@ -1,10 +1,13 @@ from importlib.metadata import version from scoringrules._brier import brier_score -from scoringrules._crps import crps_ensemble, crps_lognormal, crps_normal +from scoringrules._crps import crps_ensemble, crps_lognormal, crps_normal, crps_logistic +from scoringrules._wcrps import owcrps_ensemble, twcrps_ensemble, vrcrps_ensemble from scoringrules._energy import energy_score +from scoringrules._wenergy import owenergy_score, twenergy_score, vrenergy_score from scoringrules._logs import logs_normal from scoringrules._variogram import variogram_score +from scoringrules._wvariogram import owvariogram_score, twvariogram_score, vrvariogram_score from scoringrules.backend import register_backend __version__ = version("scoringrules") @@ -15,8 +18,18 @@ "crps_ensemble", "crps_normal", "crps_lognormal", + "crps_logistic", + "owcrps_ensemble", + "twcrps_ensemble", + "vrcrps_ensemble", "logs_normal", "brier_score", "energy_score", + "owenergy_score", + "twenergy_score", + "vrenergy_score", "variogram_score", + "owvariogram_score", + "twvariogram_score", + "vrvariogram_score", ] diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py index ee4e494..d6cd964 100644 --- a/scoringrules/_crps.py +++ b/scoringrules/_crps.py @@ -138,8 +138,49 @@ def crps_lognormal( return srb[backend].crps_lognormal(mulog, sigmalog, observation) +def crps_logistic( + mu: ArrayLike, + sigma: ArrayLike, + observation: ArrayLike, + /, + *, + backend: str = "numpy", +) -> ArrayLike: + r"""Compute the closed form of the CRPS for the logistic distribution. + + It is based on the following formulation from + [Jordan et al. (2019)](https://www.jstatsoft.org/article/view/v090i12): + + $$ \mathrm{CRPS}(\mathcal{L}(\mu, \sigma), y) = \sigma \left\{ \omega - 2 \log F(\omega) - 1 \right\}, $$ + + where $F(\omega)$ is the CDF of the standard logistic distribution at the + normalized prediction error $\omega = \frac{y - \mu}{\sigma}$. + + Parameters + ---------- + mu: ArrayLike + Location parameter of the forecast logistic distribution. + sigma: ArrayLike + Scale parameter of the forecast logistic distribution. + observation: ArrayLike + Observed values. + + Returns + ------- + crps: array_like + The CRPS for the Logistic(mu, sigma) forecasts given the observations. + + Examples + -------- + >>> from scoringrules import crps + >>> crps.logistic(0.1, 0.4, 0.0) + """ + return srb[backend].crps_logistic(mu, sigma, observation) + + __all__ = [ "crps_ensemble", "crps_normal", "crps_lognormal", + "crps_logistic", ] diff --git a/scoringrules/_variogram.py b/scoringrules/_variogram.py index 2c2c6d3..c56fa22 100644 --- a/scoringrules/_variogram.py +++ b/scoringrules/_variogram.py @@ -10,8 +10,8 @@ def variogram_score( forecasts: Array, observations: Array, p: float = 1.0, - m_axis: int = -1, - v_axis: int = -2, + m_axis: int = -2, + v_axis: int = -1, backend="numba", ) -> Array: r"""Compute the Variogram Score for a finite multivariate ensemble. diff --git a/scoringrules/_wcrps.py b/scoringrules/_wcrps.py new file mode 100644 index 0000000..6e9c5c0 --- /dev/null +++ b/scoringrules/_wcrps.py @@ -0,0 +1,187 @@ +import numpy as np +import typing as tp + +from scoringrules.backend import backends as srb +from scoringrules.backend.arrayapi import Array + +ArrayLike = tp.TypeVar("ArrayLike", Array, float) + + +def twcrps_ensemble( + forecasts: Array, + observations: ArrayLike, + /, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + *, + axis: int = -1, + backend: str = "numba", +) -> Array: + r"""Estimate the Threshold-Weighted Continuous Ranked Probability Score (twCRPS) for a finite ensemble. + + Computation is performed using the ensemble representation of the twCRPS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + $$ \mathrm{twCRPS}(F_{ens}, y) = \frac{1}{M} \sum_{m = 1}^{M} |v(x_{m}) - v(y)| - \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} |v(x_{m}) - v(x_{j})|,$$ + + where $F_{ens}(x) = \sum_{m=1}^{M} 1 \{ x_{m} \leq x \}/M$ is the empirical + distribution function associated with an ensemble forecast $x_{1}, \dots, x_{M}$ with + $M$ members, and $v$ is the chaining function used to target particular outcomes. + + Parameters + ---------- + forecasts: ArrayLike + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the last axis. + observations: ArrayLike + The observed values. + v_func: tp.Callable + Chaining function used to emphasise particular outcomes. + v_funcargs: tuple + Additional arguments to the chaining function. + axis: int + The axis corresponding to the ensemble. Default is the last axis. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + twcrps: ArrayLike + The twCRPS between the forecast ensemble and obs for the chosen chaining function. + + Examples + -------- + >>> from scoringrules import crps + >>> twcrps.ensemble(pred, obs) + """ + return srb[backend].twcrps_ensemble( + forecasts, + observations, + v_func, + v_funcargs, + axis=axis, + ) + + +def owcrps_ensemble( + forecasts: Array, + observations: ArrayLike, + /, + w_func: tp.Callable = lambda x, *args: np.ones_like(x), + w_funcargs: tuple = (), + *, + axis: int = -1, + backend: str = "numba", +) -> Array: + r"""Estimate the Outcome-Weighted Continuous Ranked Probability Score (owCRPS) for a finite ensemble. + + Computation is performed using the ensemble representation of the owCRPS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + $$ \mathrm{owCRPS}(F_{ens}, y) = \frac{1}{M \bar{w}} \sum_{m = 1}^{M} |x_{m} - y|w(x_{m})w(y) - \frac{1}{2 M^{2} \bar{w}^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} |x_{m} - x_{j}|w(x_{m})w(x_{j})w(y),$$ + + where $F_{ens}(x) = \sum_{m=1}^{M} 1\{ x_{m} \leq x \}/M$ is the empirical + distribution function associated with an ensemble forecast $x_{1}, \dots, x_{M}$ with + $M$ members, $w$ is the chosen weight function, and $\bar{w} = \sum_{m=1}^{M}w(x_{m})/M$. + + Parameters + ---------- + forecasts: ArrayLike + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the last axis. + observations: ArrayLike + The observed values. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + w_funcargs: tuple + Additional arguments to the weight function. + axis: int + The axis corresponding to the ensemble. Default is the last axis. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + owcrps: ArrayLike + The owCRPS between the forecast ensemble and obs for the chosen weight function. + + Examples + -------- + >>> from scoringrules import crps + >>> owcrps.ensemble(pred, obs) + """ + return srb[backend].owcrps_ensemble( + forecasts, + observations, + w_func, + w_funcargs, + axis=axis, + ) + + +def vrcrps_ensemble( + forecasts: Array, + observations: ArrayLike, + /, + w_func: tp.Callable = lambda x, *args: np.ones_like(x), + w_funcargs: tuple = (), + *, + axis: int = -1, + backend: str = "numba", +) -> Array: + r"""Estimate the Vertically Re-scaled Continuous Ranked Probability Score (vrCRPS) for a finite ensemble. + + Computation is performed using the ensemble representation of the vrCRPS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \begin{split} + \mathrm{vrCRPS}(F_{ens}, y) = & \frac{1}{M} \sum_{m = 1}^{M} |x_{m} - y|w(x_{m})w(y) - \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} |x_{m} - x_{j}|w(x_{m})w(x_{j}) \\ + & + \left( \frac{1}{M} \sum_{m = 1}^{M} |x_{m}| w(x_{m}) - |y| w(y) \right) \left( \frac{1}{M} \sum_{m = 1}^{M} w(x_{m}) - w(y) \right), + \end{split} + \] + + where $F_{ens}(x) = \sum_{m=1}^{M} 1 \{ x_{m} \leq x \}/M$ is the empirical + distribution function associated with an ensemble forecast $x_{1}, \dots, x_{M}$ with + $M$ members, $w$ is the chosen weight function, and $\bar{w} = \sum_{m=1}^{M}w(x_{m})/M$. + + Parameters + ---------- + forecasts: ArrayLike + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the last axis. + observations: ArrayLike + The observed values. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + w_funcargs: tuple + Additional arguments to the weight function. + axis: int + The axis corresponding to the ensemble. Default is the last axis. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + vrcrps: ArrayLike + The vrCRPS between the forecast ensemble and obs for the chosen weight function. + + Examples + -------- + >>> from scoringrules import crps + >>> vrcrps.ensemble(pred, obs) + """ + return srb[backend].vrcrps_ensemble( + forecasts, + observations, + w_func, + w_funcargs, + axis=axis, + ) + + +__all__ = [ + "twcrps_ensemble", + "owcrps_ensemble", + "vrcrps_ensemble", +] \ No newline at end of file diff --git a/scoringrules/_wenergy.py b/scoringrules/_wenergy.py new file mode 100644 index 0000000..59b1164 --- /dev/null +++ b/scoringrules/_wenergy.py @@ -0,0 +1,182 @@ +import typing as tp + +from scoringrules.backend import backends as srb +from scoringrules.backend.arrayapi import Array + +ArrayLike = tp.TypeVar("ArrayLike", Array, float) + + +def owenergy_score( + forecasts: Array, + observations: Array, + /, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + *, + m_axis: int = -2, + v_axis: int = -1, + backend="numba", +) -> Array: + r"""Compute the Outcome-Weighted Energy Score (owES) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of the owES in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \mathrm{owES}(F_{ens}, \mathbf{y}) = \frac{1}{M \bar{w}} \sum_{m = 1}^{M} \| \mathbf{x}_{m} - \mathbf{y} \| w(\mathbf{x}_{m}) w(\mathbf{y}) - \frac{1}{2 M^{2} \bar{w}^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} \| \mathbf{x}_{m} - \mathbf{x}_{j} \| w(\mathbf{x}_{m}) w(\mathbf{x}_{j}) w(\mathbf{y}), + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $\| \cdotp \|$ is the Euclidean distance, $w$ is the chosen weight function, + and $\bar{w} = \sum_{m=1}^{M}w(\mathbf{x}_{m})/M$. + + + Parameters + ---------- + forecasts: ArrayLike of shape (..., M, D) + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: ArrayLike of shape (...,D) + The observed values, where the variables dimension is by default the last axis. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + w_funcargs: tuple + Additional arguments to the weight function. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int or tuple(int) + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + owenergy_score: ArrayLike of shape (...) + The computed Outcome-Weighted Energy Score. + """ + return srb[backend].owenergy_score( + forecasts, + observations, + w_func, + w_funcargs, + m_axis=m_axis, + v_axis=v_axis + ) + + +def twenergy_score( + forecasts: Array, + observations: Array, + /, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + *, + m_axis: int = -2, + v_axis: int = -1, + backend="numba", +) -> Array: + r"""Compute the Threshold-Weighted Energy Score (twES) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of the twES in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \mathrm{twES}(F_{ens}, \mathbf{y}) = \frac{1}{M} \sum_{m = 1}^{M} \| v(\mathbf{x}_{m}) - v(\mathbf{y}) \| - \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} \| v(\mathbf{x}_{m}) - v(\mathbf{x}_{j}) \|, + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $\| \cdotp \|$ is the Euclidean distance, and $v$ is the chaining function + used to target particular outcomes. + + + Parameters + ---------- + forecasts: ArrayLike of shape (..., M, D) + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: ArrayLike of shape (...,D) + The observed values, where the variables dimension is by default the last axis. + v_func: tp.Callable + Chaining function used to emphasise particular outcomes. + v_funcargs: tuple + Additional arguments to the chaining function. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int or tuple(int) + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + twenergy_score: ArrayLike of shape (...) + The computed Threshold-Weighted Energy Score. + """ + return srb[backend].twenergy_score( + forecasts, + observations, + v_func, + v_funcargs, + m_axis=m_axis, + v_axis=v_axis + ) + + +def vrenergy_score( + forecasts: Array, + observations: Array, + /, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + *, + m_axis: int = -2, + v_axis: int = -1, + backend="numba", +) -> Array: + r"""Compute the Vertically Re-scaled Energy Score (vrES) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of the twES in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \begin{split} + \mathrm{vrES}(F_{ens}, \mathbf{y}) = & \frac{1}{M} \sum_{m = 1}^{M} \| \mathbf{x}_{m} - \mathbf{y} \| w(\mathbf{x}_{m}) w(\mathbf{y}) - \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} \| \mathbf{x}_{m} - \mathbf{x}_{j} \| w(\mathbf{x}_{m}) w(\mathbf{x_{j}}) \\ + & + \left( \frac{1}{M} \sum_{m = 1}^{M} \| \mathbf{x}_{m} \| w(\mathbf{x}_{m}) - \| \mathbf{y} \| w(\mathbf{y}) \right) \left( \frac{1}{M} \sum_{m = 1}^{M} w(\mathbf{x}_{m}) - w(\mathbf{y}) \right), + \end{split} + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, and $v$ is the chaining function used to target particular outcomes. + + + Parameters + ---------- + forecasts: ArrayLike of shape (..., M, D) + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: ArrayLike of shape (...,D) + The observed values, where the variables dimension is by default the last axis. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + w_funcargs: tuple + Additional arguments to the weight function. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int or tuple(int) + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + vrenergy_score: ArrayLike of shape (...) + The computed Vertically Re-scaled Energy Score. + """ + return srb[backend].vrenergy_score( + forecasts, + observations, + w_func, + w_funcargs, + m_axis=m_axis, + v_axis=v_axis + ) \ No newline at end of file diff --git a/scoringrules/_wvariogram.py b/scoringrules/_wvariogram.py new file mode 100644 index 0000000..4bbecb7 --- /dev/null +++ b/scoringrules/_wvariogram.py @@ -0,0 +1,193 @@ +import typing as tp + +from scoringrules.backend import backends as srb +from scoringrules.backend.arrayapi import Array + +ArrayLike = tp.TypeVar("ArrayLike", Array, float) + + +def owvariogram_score( + forecasts: Array, + observations: Array, + p: float = 1.0, + /, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + *, + m_axis: int = -2, + v_axis: int = -1, + backend="numba", +) -> Array: + r"""Compute the Outcome-Weighted Variogram Score (owVS) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of the owVS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \begin{split} + \mathrm{owVS}(F_{ens}, \mathbf{y}) = & \frac{1}{M \bar{w}} \sum_{m=1}^{M} \sum_{i,j=1}^{D}(|y_{i} - y_{j}|^{p} - |x_{m,i} - x_{m,j}|^{p})^{2} w(\mathbf{x}_{m}) w(\mathbf{y}) \\ + & - \frac{1}{2 M^{2} \bar{w}^{2}} \sum_{k,m=1}^{M} \sum_{i,j=1}^{D} \left( |x_{k,i} - x_{k,j}|^{p} - |x_{m,i} - x_{m,j}|^{p} \right)^{2} w(\mathbf{x}_{k}) w(\mathbf{x}_{m}) w(\mathbf{y}), + \end{split} + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $w$ is the chosen weight function, and $\bar{w} = \sum_{m=1}^{M}w(\mathbf{x}_{m})/M$. + + Parameters + ---------- + forecasts: Array + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: Array + The observed values, where the variables dimension is by default the last axis. + p: float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + w_funcargs: tuple + Additional arguments to the weight function. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + owvariogram_score: ArrayLike of shape (...) + The computed Outcome-Weighted Variogram Score. + """ + return srb[backend].owvariogram_score( + forecasts, + observations, + p, + w_func, + w_funcargs, + m_axis=m_axis, + v_axis=v_axis + ) + + +def twvariogram_score( + forecasts: Array, + observations: Array, + p: float = 1.0, + /, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + *, + m_axis: int = -2, + v_axis: int = -1, + backend="numba", +) -> Array: + r"""Compute the Threshold-Weighted Variogram Score (twVS) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of the twVS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \mathrm{twVS}(F_{ens}, \mathbf{y}) = \sum_{i,j=1}^{D}(|v(\mathbf{y})_i - v(\mathbf{y})_{j}|^{p} - \frac{1}{M} \sum_{m=1}^{M}|v(\mathbf{x}_{m})_{i} - v(\mathbf{x}_{m})_{j}|^{p})^{2}, + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, and $v$ is the chaining function used to target particular outcomes. + + Parameters + ---------- + forecasts: Array + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: Array + The observed values, where the variables dimension is by default the last axis. + p: float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. + v_func: tp.Callable + Chaining function used to emphasise particular outcomes. + v_funcargs: tuple + Additional arguments to the chaining function. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + twvariogram_score: ArrayLike of shape (...) + The computed Threshold-Weighted Variogram Score. + """ + return srb[backend].twvariogram_score( + forecasts, + observations, + p, + v_func, + v_funcargs, + m_axis=m_axis, + v_axis=v_axis + ) + + +def vrvariogram_score( + forecasts: Array, + observations: Array, + p: float = 1.0, + /, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + *, + m_axis: int = -2, + v_axis: int = -1, + backend="numba", +) -> Array: + r"""Compute the Vertically Re-scaled Variogram Score (vrVS) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of the vrVS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \begin{split} + \mathrm{vrVS}(F_{ens}, \mathbf{y}) = & \frac{1}{M} \sum_{m=1}^{M} \sum_{i,j=1}^{D}(|y_{i} - y_{j}|^{p} - |x_{m,i} - x_{m,j}|^{p})^{2} w(\mathbf{x}_{m}) w(\mathbf{y}) \\ + & - \frac{1}{2 M^{2}} \sum_{k,m=1}^{M} \sum_{i,j=1}^{D} \left( |x_{k,i} - x_{k,j}|^{p} - |x_{m,i} - x_{m,j}|^{p} \right)^{2} w(\mathbf{x}_{k}) w(\mathbf{x}_{m})) \\ + & + \left( \frac{1}{M} \sum_{m = 1}^{M} \sum_{i,j=1}^{D}(|x_{m,i} - x_{m,j}|^{p} w(\mathbf{x}_{m}) - \sum_{i,j=1}^{D}(|y_{i} - y_{j}|^{p} w(\mathbf{y}) \right) \left( \frac{1}{M} \sum_{m = 1}^{M} w(\mathbf{x}_{m}) - w(\mathbf{y}) \right), + \end{split} + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $w$ is the chosen weight function, and $\bar{w} = \sum_{m=1}^{M}w(\mathbf{x}_{m})/M$. + + Parameters + ---------- + forecasts: Array + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + observations: Array + The observed values, where the variables dimension is by default the last axis. + p: float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + w_funcargs: tuple + Additional arguments to the weight function. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + vrvariogram_score: ArrayLike of shape (...) + The computed Vertically Re-scaled Variogram Score. + """ + return srb[backend].vrvariogram_score( + forecasts, + observations, + p, + w_func, + w_funcargs, + m_axis=m_axis, + v_axis=v_axis + ) \ No newline at end of file diff --git a/scoringrules/backend/array.py b/scoringrules/backend/array.py index 67f1578..5e590e4 100644 --- a/scoringrules/backend/array.py +++ b/scoringrules/backend/array.py @@ -1,7 +1,7 @@ from types import ModuleType -from typing import TypeVar import numpy as np +import typing as tp from .arrayapi import Array, ArrayAPIProtocol from .base import AbstractBackend @@ -9,7 +9,7 @@ INV_SQRT_PI = 1 / np.sqrt(np.pi) EPSILON = 1e-6 -ArrayLike = TypeVar("ArrayLike", Array, float) +ArrayLike = tp.TypeVar("ArrayLike", Array, float) class ArrayAPIBackend(AbstractBackend): @@ -89,6 +89,74 @@ def crps_lognormal( - 1 ) + def crps_logistic(self, mu: ArrayLike, sigma: ArrayLike, obs: ArrayLike) -> Array: + """Compute the CRPS for the normal distribution.""" + ω = (obs - mu) / sigma + return sigma * (ω - 2 * self.np.log(self._logis_cdf(ω)) - 1) + + def owcrps_ensemble( + self, + fcts: Array, + obs: ArrayLike, + w_func: tp.Callable = lambda x, *args: np.ones_like(x), + w_funcargs: tuple = (), + axis: int = -1, + ) -> Array: + """Compute the Outcome-Weighted CRPS for a finite ensemble.""" + obs: Array = self.np.asarray(obs) + + if axis != -1: + fcts = self.np.moveaxis(fcts, axis, -1) + + fw = w_func(fcts, *w_funcargs) + ow = w_func(obs, *w_funcargs) + + out = self._owcrps_ensemble_nrg(fcts, obs, fw, ow) + + return out + + def twcrps_ensemble( + self, + fcts: Array, + obs: ArrayLike, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + axis: int = -1, + ) -> Array: + """Compute the Threshold-Weighted CRPS for a finite ensemble.""" + obs: Array = self.np.asarray(obs) + + if axis != -1: + fcts = self.np.moveaxis(fcts, axis, -1) + + v_fcts = v_func(fcts, *v_funcargs) + v_obs = v_func(obs, *v_funcargs) + + out = self._crps_ensemble_nrg(v_fcts, v_obs) + + return out + + def vrcrps_ensemble( + self, + fcts: Array, + obs: ArrayLike, + w_func: tp.Callable = lambda x, *args: np.ones_like(x), + w_funcargs: tuple = (), + axis: int = -1, + ) -> Array: + """Compute the Vertically Re-scaled CRPS for a finite ensemble.""" + obs: Array = self.np.asarray(obs) + + if axis != -1: + fcts = self.np.moveaxis(fcts, axis, -1) + + fw = w_func(fcts, *w_funcargs) + ow = w_func(obs, *w_funcargs) + + out = self._vrcrps_ensemble_nrg(fcts, obs, fw, ow) + + return out + def energy_score(self, fcts: Array, obs: Array, m_axis=-2, v_axis=-1) -> Array: """ Compute the energy score based on a finite ensemble. @@ -103,17 +171,111 @@ def energy_score(self, fcts: Array, obs: Array, m_axis=-2, v_axis=-1) -> Array: E_1 = self.np.sum(err_norm, axis=m_axis) / M ens_diff = self.np.expand_dims(fcts, axis=m_axis) - self.np.expand_dims( - fcts, axis=m_axis - 1 + fcts, axis=(m_axis - 1) ) - ens_diff = self.np.where(ens_diff == 0.0, self.np.ones_like(ens_diff), ens_diff) spread_norm = self.np.linalg.norm(ens_diff, axis=v_axis, keepdims=True) - spread_norm = self.np.where( - self.np.eye(M, M, dtype=bool), self.np.zeros_like(spread_norm), spread_norm + + E_2 = self.np.sum(spread_norm, axis=(m_axis, m_axis - 1)) / (M**2) + return self.np.squeeze(E_1 - 0.5 * E_2) + + def owenergy_score( + self, + fcts: Array, + obs: Array, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis=-2, + v_axis=-1, + ) -> Array: + """Compute the outcome-weighted energy score based on a finite ensemble.""" + M: int = fcts.shape[m_axis] + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=fcts) + ow = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=obs) + wbar = self.np.mean(fw, axis=-1, keepdims=True) + + err_norm = self.np.linalg.norm( + fcts - self.np.expand_dims(obs, axis=m_axis), axis=v_axis, keepdims=True ) - E_2 = self.np.sum(spread_norm, axis=(m_axis, m_axis - 1), keepdims=True) / ( - M * (M - 1) + E_1 = self.np.sum(err_norm * + self.np.expand_dims(fw, axis=v_axis) * + self.np.expand_dims(ow, axis=(m_axis, v_axis)), + axis=m_axis) / (M * wbar) + + ens_diff = self.np.expand_dims(fcts, axis=m_axis) - self.np.expand_dims( + fcts, axis=(m_axis - 1) ) + spread_norm = self.np.linalg.norm(ens_diff, axis=v_axis, keepdims=True) + + fw_diff = self.np.expand_dims(fw, axis=m_axis) * self.np.expand_dims(fw, axis=v_axis) + E_2 = self.np.sum(spread_norm * + self.np.expand_dims(fw_diff, axis=-1) * + ow[..., None, None, None], + axis=(m_axis, m_axis - 1)) / (M**2 * wbar**2) return self.np.squeeze(E_1 - 0.5 * E_2) + + def twenergy_score( + self, + fcts: Array, + obs: Array, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + m_axis=-2, + v_axis=-1 + ) -> Array: + """ + Compute the threshold-weighted energy score based on a finite ensemble. + """ + if m_axis != -2: + fcts = self.np.moveaxis(fcts, m_axis, -2) + v_axis = v_axis - 1 if v_axis != 0 else v_axis + if v_axis != -1: + fcts = self.np.moveaxis(fcts, v_axis, -1) + obs = self.np.moveaxis(obs, v_axis, -1) + + v_fcts = v_func(fcts, *v_funcargs) + v_obs = v_func(obs, *v_funcargs) + return self.energy_score(v_fcts, v_obs) + + def vrenergy_score( + self, + fcts: Array, + obs: Array, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis=-2, + v_axis=-1, + ) -> Array: + """Compute the vertically re-scaled energy score based on a finite ensemble.""" + M: int = fcts.shape[m_axis] + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=fcts) + ow = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=obs) + wbar = self.np.mean(fw, axis=-1) + + err_norm = self.np.linalg.norm( + fcts - self.np.expand_dims(obs, axis=m_axis), axis=v_axis, keepdims=True + ) + E_1 = self.np.sum(err_norm * + self.np.expand_dims(fw, axis=v_axis) * + self.np.expand_dims(ow, axis=(m_axis, v_axis)), + axis=m_axis) / M + + ens_diff = self.np.expand_dims(fcts, axis=m_axis) - self.np.expand_dims( + fcts, axis=(m_axis - 1) + ) + spread_norm = self.np.linalg.norm(ens_diff, axis=v_axis, keepdims=True) + + fw_diff = self.np.expand_dims(fw, axis=m_axis) * self.np.expand_dims(fw, axis=v_axis) + E_2 = self.np.sum(spread_norm * + self.np.expand_dims(fw_diff, axis=-1), + axis=(m_axis, m_axis - 1)) / (M**2) + + rhobar = self.np.mean(self.np.linalg.norm(fcts, axis=v_axis) * fw, axis=(m_axis + 1)) + E_3 = (rhobar - self.np.linalg.norm(obs, axis=-1) * ow) * (wbar - ow) + return self.np.squeeze(E_1 - 0.5 * E_2 + self.np.expand_dims(E_3, axis=-1)) def brier_score(self, fcts: ArrayLike, obs: ArrayLike) -> Array: """Compute the Brier Score for predicted probabilities of events.""" @@ -130,30 +292,153 @@ def variogram_score( forecasts: Array, observations: Array, p: float = 1, - m_axis: int = -1, - v_axis: int = -2, + m_axis: int = -2, + v_axis: int = -1, ) -> Array: """Compute the Variogram Score for a multivariate finite ensemble.""" - if m_axis != -1: - forecasts = self.np.moveaxis(forecasts, m_axis, -1) + if m_axis != -2: + forecasts = self.np.moveaxis(forecasts, m_axis, -2) v_axis = v_axis - 1 if v_axis != 0 else v_axis - if v_axis != -2: - forecasts = self.np.moveaxis(forecasts, v_axis, -2) + if v_axis != -1: + forecasts = self.np.moveaxis(forecasts, v_axis, -1) observations = self.np.moveaxis(observations, v_axis, -1) - M: int = forecasts.shape[-1] + M: int = forecasts.shape[-2] fct_diff = ( self.np.abs( - self.np.expand_dims(forecasts, -2) - self.np.expand_dims(forecasts, -3) + self.np.expand_dims(forecasts, v_axis) - self.np.expand_dims(forecasts, m_axis) ) ** p ) - vfcts = self.np.sum(fct_diff, axis=-1) / M + vfcts = self.np.sum(fct_diff, axis=(m_axis-1)) / M obs_diff = ( self.np.abs(observations[..., None] - observations[..., None, :]) ** p ) out = self.np.sum((obs_diff - vfcts) ** 2, axis=(-2, -1)) return out + + def owvariogram_score( + self, + forecasts: Array, + observations: Array, + p: float = 1, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Outcome-Weighted Variogram Score for a multivariate finite ensemble.""" + if m_axis != -2: + forecasts = self.np.moveaxis(forecasts, m_axis, -2) + v_axis = v_axis - 1 if v_axis != 0 else v_axis + if v_axis != -1: + forecasts = self.np.moveaxis(forecasts, v_axis, -1) + observations = self.np.moveaxis(observations, v_axis, -1) + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=forecasts) + ow = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=observations) + wbar = self.np.mean(fw, axis=-1) + + M: int = forecasts.shape[-2] + fct_diff = ( + self.np.abs( + self.np.expand_dims(forecasts, v_axis) - self.np.expand_dims(forecasts, m_axis) + ) + ** p + ) + + obs_diff = ( + self.np.abs(observations[..., None] - observations[..., None, :]) ** p + ) + obs_diff = self.np.expand_dims(obs_diff, axis=-3) + e_1 = (fct_diff - obs_diff) ** 2 + e_1 = self.np.sum(e_1, axis=(-2, -1)) + e_1 = self.np.sum(e_1 * fw * self.np.expand_dims(ow, axis=-1), axis=-1) / (M * wbar) + + e_2 = (self.np.expand_dims(fct_diff, axis=-3) - self.np.expand_dims(fct_diff, axis=-4)) ** 2 + e_2 = self.np.sum(e_2, axis=(-2, -1)) + + fw_diff = self.np.expand_dims(fw, axis=m_axis) * self.np.expand_dims(fw, axis=v_axis) + e_2 = self.np.sum(e_2 * fw_diff * self.np.expand_dims(ow, axis=(-2, -1)), + axis=(-2, -1)) / (M**2 * wbar**2) + + out = e_1 - 0.5 * e_2 + return out + + def twvariogram_score( + self, + forecasts: Array, + observations: Array, + p: float = 1, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Threshold-Weighted Variogram Score for a multivariate finite ensemble.""" + if m_axis != -2: + forecasts = self.np.moveaxis(forecasts, m_axis, -2) + v_axis = v_axis - 1 if v_axis != 0 else v_axis + if v_axis != -1: + forecasts = self.np.moveaxis(forecasts, v_axis, -1) + observations = self.np.moveaxis(observations, v_axis, -1) + + v_fcts = v_func(forecasts, *v_funcargs) + v_obs = v_func(observations, *v_funcargs) + return self.variogram_score(v_fcts, v_obs, p) + + def vrvariogram_score( + self, + forecasts: Array, + observations: Array, + p: float = 1, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Vertically Re-scaled Variogram Score for a multivariate finite ensemble.""" + if m_axis != -2: + forecasts = self.np.moveaxis(forecasts, m_axis, -2) + v_axis = v_axis - 1 if v_axis != 0 else v_axis + if v_axis != -1: + forecasts = self.np.moveaxis(forecasts, v_axis, -1) + observations = self.np.moveaxis(observations, v_axis, -1) + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=forecasts) + ow = self.np.apply_along_axis(w_func_wrap, axis=-1, arr=observations) + wbar = self.np.mean(fw, axis=-1) + + M: int = forecasts.shape[-2] + fct_diff = ( + self.np.abs( + self.np.expand_dims(forecasts, v_axis) - self.np.expand_dims(forecasts, m_axis) + ) + ** p + ) + + obs_diff = ( + self.np.abs(observations[..., None] - observations[..., None, :]) ** p + ) + e_1 = (fct_diff - self.np.expand_dims(obs_diff, axis=-3)) ** 2 + e_1 = self.np.sum(e_1, axis=(-2, -1)) + e_1 = self.np.sum(e_1 * fw * self.np.expand_dims(ow, axis=-1), axis=-1) / M + + e_2 = (self.np.expand_dims(fct_diff, axis=-3) - self.np.expand_dims(fct_diff, axis=-4)) ** 2 + e_2 = self.np.sum(e_2, axis=(-2, -1)) + + fw_diff = self.np.expand_dims(fw, axis=m_axis) * self.np.expand_dims(fw, axis=v_axis) + e_2 = self.np.sum(e_2 * fw_diff, axis=(-2, -1)) / (M**2) + + e_3 = self.np.sum(fct_diff ** 2, axis=(-2, -1)) + e_3 = self.np.sum(e_3 * fw, axis=-1) / M + e_3 -= self.np.sum(obs_diff ** 2, axis=(-2, -1)) * ow + e_3 *= (wbar - ow) + + out = e_1 - 0.5 * e_2 + e_3 + return out def logs_normal( self, @@ -198,6 +483,38 @@ def _crps_ensemble_pwm(self, fcts: Array, obs: Array) -> Array: β_1 = self.np.sum(fcts * self.np.arange(M), axis=-1) / (M * (M - 1)) return expected_diff + β_0 - 2.0 * β_1 + def _owcrps_ensemble_nrg(self, fcts: Array, obs: Array, fw: Array, ow: Array) -> Array: + """Outcome-Weighted CRPS estimator based on the energy form.""" + M: int = fcts.shape[-1] + wbar = self.np.mean(fw, axis=1) + e_1 = self.np.sum(self.np.abs(obs[..., None] - fcts) * fw, axis=-1) * ow / (M * wbar) + e_2 = self.np.sum( + self.np.abs( + self.np.expand_dims(fcts, axis=-1) - self.np.expand_dims(fcts, axis=-2) + ) * ( + self.np.expand_dims(fw, axis=-1) * self.np.expand_dims(fw, axis=-2) + ), + axis=(-1, -2), + ) + e_2 *= ow / (M**2 * wbar**2) + return e_1 - 0.5 * e_2 + + def _vrcrps_ensemble_nrg(self, fcts: Array, obs: Array, fw: Array, ow: Array) -> Array: + """Vertically Re-scaled CRPS estimator based on the energy form.""" + M: int = fcts.shape[-1] + e_1 = self.np.sum(self.np.abs(obs[..., None] - fcts) * fw, axis=-1) * ow / M + e_2 = self.np.sum( + self.np.abs( + self.np.expand_dims(fcts, axis=-1) - self.np.expand_dims(fcts, axis=-2) + ) * ( + self.np.expand_dims(fw, axis=-1) * self.np.expand_dims(fw, axis=-2) + ), + axis=(-1, -2), + ) / (M**2) + e_3 = (self.np.mean(self.np.abs(fcts) * fw, axis=-1) - self.np.abs(obs) * ow) + e_3 *= (self.np.mean(fw, axis=1) - ow) + return e_1 - 0.5 * e_2 + e_3 + def _norm_cdf(self, x: ArrayLike) -> Array: """Cumulative distribution function for the standard normal distribution.""" return (1.0 + self.scipy.special.erf(x / self.np.sqrt(2.0))) / 2.0 @@ -206,5 +523,9 @@ def _norm_pdf(self, x: ArrayLike) -> Array: """Probability density function for the standard normal distribution.""" return (1.0 / self.np.sqrt(2.0 * self.np.pi)) * self.np.exp(-(x**2) / 2) + def _logis_cdf(self, x: ArrayLike) -> Array: + """Cumulative distribution function for the standard logistic distribution.""" + return (1 / (1 + np.exp(-x))) + def __repr__(self) -> str: return f"NumpyAPIBackend('{self.backend_name}')" diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index f5e1eb3..7e88c3b 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -1,9 +1,9 @@ from abc import abstractmethod -from typing import TypeVar +import typing as tp from .arrayapi import Array -ArrayLike = TypeVar("ArrayLike", Array, float) +ArrayLike = tp.TypeVar("ArrayLike", Array, float) class AbstractBackend: @@ -14,8 +14,8 @@ class AbstractBackend: @abstractmethod def crps_ensemble( self, - fcts: Array, - obs: ArrayLike, + forecasts: Array, + observations: ArrayLike, axis: int = -1, sorted_ensemble: bool = False, estimator: str = "pwm", @@ -33,6 +33,42 @@ def crps_lognormal( """Compute the CRPS for the log normal distribution.""" @abstractmethod + def crps_logistic(self, mu: ArrayLike, sigma: ArrayLike, obs: ArrayLike) -> Array: + """Compute the CRPS for the logistic distribution.""" + + @abstractmethod + def owcrps_ensemble( + self, + forecasts: Array, + observations: ArrayLike, + w_func: tp.Callable, + w_funcargs: tuple, + axis: int = -1, + ) -> Array: + """Compute the Outcome-Weighted CRPS for a finite ensemble.""" + + @abstractmethod + def twcrps_ensemble( + self, + forecasts: Array, + observations: ArrayLike, + v_func: tp.Callable, + v_funcargs: tuple, + axis: int = -1, + ) -> Array: + """Compute the Threshold-Weighted CRPS for a finite ensemble.""" + + @abstractmethod + def vrcrps_ensemble( + self, + forecasts: Array, + observations: ArrayLike, + w_func: tp.Callable, + w_funcargs: tuple, + axis: int = -1, + ) -> Array: + """Compute the Vertically Re-scaled CRPS for a finite ensemble.""" + def logs_normal( self, mu: ArrayLike, @@ -46,6 +82,24 @@ def energy_score( self, fcts: Array, obs: Array, m_axis: int = -2, v_axis: int = -1 ) -> Array: """Compute the Energy Score for a multivariate finite ensemble.""" + + @abstractmethod + def owenergy_score( + self, fcts: Array, obs: Array, w_func: tp.Callable, w_funcargs: tuple, m_axis: int = -2, v_axis: int = -1 + ) -> Array: + """Compute the Outcome-Weighted Energy Score for a multivariate finite ensemble.""" + + @abstractmethod + def twenergy_score( + self, fcts: Array, obs: Array, v_func: tp.Callable, v_funcargs: tuple, m_axis: int = -2, v_axis: int = -1 + ) -> Array: + """Compute the Threshold-Weighted Energy Score for a multivariate finite ensemble.""" + + @abstractmethod + def vrenergy_score( + self, fcts: Array, obs: Array, w_func: tp.Callable, w_funcargs: tuple, m_axis: int = -2, v_axis: int = -1 + ) -> Array: + """Compute the Vertically Re-scaled Energy Score for a multivariate finite ensemble.""" @abstractmethod def variogram_score( @@ -57,6 +111,45 @@ def variogram_score( v_axisc: int = -1, ) -> Array: """Compute the Variogram Score for a multivariate finite ensemble.""" + + @abstractmethod + def owvariogram_score( + self, + fcts: Array, + obs: Array, + w_func: tp.Callable, + w_funcargs: tuple, + p: float = 1.0, + m_axis: int = -2, + v_axisc: int = -1, + ) -> Array: + """Compute the Outcome-Weighted Variogram Score for a multivariate finite ensemble.""" + + @abstractmethod + def twvariogram_score( + self, + fcts: Array, + obs: Array, + v_func: tp.Callable, + v_funcargs: tuple, + p: float = 1.0, + m_axis: int = -2, + v_axisc: int = -1, + ) -> Array: + """Compute the Threshold-Weighted Variogram Score for a multivariate finite ensemble.""" + + @abstractmethod + def vrvariogram_score( + self, + fcts: Array, + obs: Array, + w_func: tp.Callable, + w_funcargs: tuple, + p: float = 1.0, + m_axis: int = -2, + v_axisc: int = -1, + ) -> Array: + """Compute the Vertically Re-scaled Variogram Score for a multivariate finite ensemble.""" @abstractmethod def brier_score(self, fcts: ArrayLike, obs: ArrayLike) -> Array: diff --git a/scoringrules/backend/gufuncs.py b/scoringrules/backend/gufuncs.py index 8848383..a44e3e6 100644 --- a/scoringrules/backend/gufuncs.py +++ b/scoringrules/backend/gufuncs.py @@ -24,6 +24,13 @@ def _norm_pdf(x: float) -> float: return out +@njit(["float32(float32)", "float64(float64)"]) +def _logis_cdf(x: float) -> float: + """Cumulative distribution function for the standard logistic distribution.""" + out: float = 1.0 / (1.0 + math.exp(-x)) + return out + + @vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"]) def _crps_normal_ufunc(mu: float, sigma: float, observation: float) -> float: ω = (observation - mu) / sigma @@ -42,6 +49,12 @@ def _crps_lognormal_ufunc(mulog: float, sigmalog: float, observation: float) -> @vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"]) +def _crps_logistic_ufunc(mu: float, sigma: float, observation: float) -> float: + ω = (observation - mu) / sigma + out: float = sigma * (ω - 2 * np.log(_logis_cdf(ω)) - 1) + return out + + def _logs_normal_ufunc(mu: np.ndarray, sigma: np.ndarray, observation: np.ndarray): ω = (observation - mu) / sigma return -np.log(_norm_pdf(ω) / sigma) @@ -257,6 +270,80 @@ def _crps_ensemble_akr_circperm_gufunc( out[0] = e_1 / M - 0.5 * 1 / M * e_2 +@guvectorize( + [ + "void(float32[:], float32[:], float32[:], float32[:], float32[:])", + "void(float64[:], float64[:], float64[:], float64[:], float64[:])", + ], + "(n),(),(n),()->()", +) +def _owcrps_ensemble_nrg_gufunc( + forecasts: np.ndarray, + observation: np.ndarray, + fw: np.ndarray, + ow: np.ndarray, + out: np.ndarray +): + """Outcome-weighted CRPS estimator based on the energy form.""" + obs = observation[0] + ow = ow[0] + M = forecasts.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i, x_i in enumerate(forecasts): + e_1 += abs(x_i - obs) * fw[i] * ow + for j, x_j in enumerate(forecasts): + e_2 += abs(x_i - x_j) * fw[i] * fw[j] * ow + + wbar = np.mean(fw) + + out[0] = e_1 / (M * wbar) - 0.5 * e_2 / ((M * wbar) ** 2) + + +@guvectorize( + [ + "void(float32[:], float32[:], float32[:], float32[:], float32[:])", + "void(float64[:], float64[:], float64[:], float64[:], float64[:])", + ], + "(n),(),(n),()->()", +) +def _vrcrps_ensemble_nrg_gufunc( + forecasts: np.ndarray, + observation: np.ndarray, + fw: np.ndarray, + ow: np.ndarray, + out: np.ndarray +): + """Vertically re-scaled CRPS estimator based on the energy form.""" + obs = observation[0] + ow = ow[0] + M = forecasts.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i, x_i in enumerate(forecasts): + e_1 += abs(x_i - obs) * fw[i] * ow + for j, x_j in enumerate(forecasts): + e_2 += abs(x_i - x_j) * fw[i] * fw[j] + + wbar = np.mean(fw) + wabs_x = np.mean(np.abs(forecasts) * fw) + wabs_y = abs(obs) * ow + + out[0] = e_1 / M - 0.5 * e_2 / (M ** 2) + (wabs_x - wabs_y)*(wbar - ow) + + @guvectorize( [ "void(float32[:,:], float32[:], float32[:])", @@ -277,7 +364,63 @@ def _energy_score_gufunc( for j in range(M): e_2 += float(np.linalg.norm(forecasts[i] - forecasts[j])) - out[0] = e_1 - 0.5 / (M * (M - 1)) * e_2 + out[0] = e_1 / M - 0.5 / (M**2) * e_2 + + +@guvectorize( + [ + "void(float32[:,:], float32[:], float32[:], float32[:], float32[:])", + "void(float64[:,:], float64[:], float64[:], float64[:], float64[:])", + ], + "(m,d),(d),(m),()->()", +) +def _owenergy_score_gufunc( + forecasts: np.ndarray, observations: np.ndarray, fw: np.ndarray, ow: np.ndarray, out: np.ndarray +): + """Compute the Outcome-Weighted Energy Score for a finite ensemble.""" + M = forecasts.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(forecasts[i] - observations) * fw[i] * ow) + for j in range(M): + e_2 += float(np.linalg.norm(forecasts[i] - forecasts[j]) * fw[i] * fw[j] * ow) + + wbar = np.mean(fw) + + out[0] = e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) + + +@guvectorize( + [ + "void(float32[:,:], float32[:], float32[:], float32[:], float32[:])", + "void(float64[:,:], float64[:], float64[:], float64[:], float64[:])", + ], + "(m,d),(d),(m),()->()", +) +def _vrenergy_score_gufunc( + forecasts: np.ndarray, observations: np.ndarray, fw: np.ndarray, ow: np.ndarray, out: np.ndarray +): + """Compute the Vertically Re-scaled Energy Score for a finite ensemble.""" + M = forecasts.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + wabs_x = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(forecasts[i] - observations) * fw[i] * ow) + wabs_x += np.linalg.norm(forecasts[i]) * fw[i] + for j in range(M): + e_2 += float(np.linalg.norm(forecasts[i] - forecasts[j]) * fw[i] * fw[j]) + + wabs_x = wabs_x / M + wbar = np.mean(fw) + wabs_y = np.linalg.norm(observations) * ow + + out[0] = e_1 / M - 0.5 * e_2 / (M ** 2) + (wabs_x - wabs_y)*(wbar - ow) @vectorize(["float32(float32, float32)", "float64(float64, float64)"]) @@ -298,22 +441,92 @@ def _brier_score_ufunc(forecast, observation): "void(float32[:,:], float32[:], float32, float32[:])", "void(float64[:,:], float64[:], float64, float64[:])", ], - "(d,m),(d),()->()", + "(m,d),(d),()->()", ) def _variogram_score_gufunc(forecasts, observation, p, out): - D = forecasts.shape[-2] - M = forecasts.shape[-1] + M = forecasts.shape[-2] + D = forecasts.shape[-1] out[0] = 0.0 for i in range(D): for j in range(D): vfcts = 0.0 for m in range(M): - vfcts += abs(forecasts[i, m] - forecasts[j, m]) ** p + vfcts += abs(forecasts[m, i] - forecasts[m, j]) ** p vfcts = vfcts / M vobs = abs(observation[i] - observation[j]) ** p out[0] += (vobs - vfcts) ** 2 +@guvectorize( + [ + "void(float32[:,:], float32[:], float32, float32[:], float32, float32[:])", + "void(float64[:,:], float64[:], float64, float64[:], float64, float64[:])", + ], + "(m,d),(d),(),(m),()->()", +) +def _owvariogram_score_gufunc(forecasts, observation, p, fw, ow, out): + M = forecasts.shape[-2] + D = forecasts.shape[-1] + + e_1 = 0.0 + e_2 = 0.0 + for k in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(forecasts[k, i] - forecasts[k, j]) ** p + rho2 = abs(observation[i] - observation[j]) ** p + e_1 += (rho1 - rho2) ** 2 * fw[k] * ow + for m in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(forecasts[k, i] - forecasts[k, j]) ** p + rho2 = abs(forecasts[m, i] - forecasts[m, j]) ** p + e_2 += (rho1 - rho2) ** 2 * fw[k] * fw[m] * ow + + wbar = np.mean(fw) + + out[0] = e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) + + +@guvectorize( + [ + "void(float32[:,:], float32[:], float32, float32[:], float32, float32[:])", + "void(float64[:,:], float64[:], float64, float64[:], float64, float64[:])", + ], + "(m,d),(d),(),(m),()->()", +) +def _vrvariogram_score_gufunc(forecasts, observation, p, fw, ow, out): + M = forecasts.shape[-2] + D = forecasts.shape[-1] + + e_1 = 0.0 + e_2 = 0.0 + e_3_x = 0.0 + for k in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(forecasts[k, i] - forecasts[k, j]) ** p + rho2 = abs(observation[i] - observation[j]) ** p + e_1 += (rho1 - rho2) ** 2 * fw[k] * ow + e_3_x += (rho1) ** 2 * fw[k] + for m in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(forecasts[k, i] - forecasts[k, j]) ** p + rho2 = abs(forecasts[m, i] - forecasts[m, j]) ** p + e_2 += (rho1 - rho2) ** 2 * fw[k] * fw[m] + + e_3_x *= 1/M + wbar = np.mean(fw) + e_3_y = 0.0 + for i in range(D): + for j in range(D): + rho1 = abs(observation[i] - observation[j]) ** p + e_3_y += (rho1) ** 2 * ow + + out[0] = e_1 / M - 0.5 * e_2 / (M ** 2) + (e_3_x - e_3_y)*(wbar - ow) + + __all__ = [ "_crps_ensemble_akr_circperm_gufunc", "_crps_ensemble_akr_gufunc", @@ -324,8 +537,15 @@ def _variogram_score_gufunc(forecasts, observation, p, out): "_crps_ensemble_qd_gufunc", "_crps_normal_ufunc", "_crps_lognormal_ufunc", + "_crps_logistic_ufunc", + "_owcrps_ensemble_nrg_gufunc", + "_vrcrps_ensemble_nrg_gufunc", "_logs_normal_ufunc", "_energy_score_gufunc", + "_owenergy_score_gufunc", + "_vrenergy_score_gufunc", "_brier_score_ufunc", "_variogram_score_gufunc", + "_owvariogram_score_gufunc", + "_vrvariogram_score_gufunc", ] diff --git a/scoringrules/backend/numba.py b/scoringrules/backend/numba.py index 656361b..71cf4e5 100644 --- a/scoringrules/backend/numba.py +++ b/scoringrules/backend/numba.py @@ -1,4 +1,4 @@ -from typing import TypeVar +import typing as tp import numpy as np @@ -15,12 +15,19 @@ _crps_ensemble_qd_gufunc, _crps_lognormal_ufunc, _crps_normal_ufunc, + _crps_logistic_ufunc, + _owcrps_ensemble_nrg_gufunc, + _vrcrps_ensemble_nrg_gufunc, _energy_score_gufunc, + _owenergy_score_gufunc, + _vrenergy_score_gufunc, _logs_normal_ufunc, _variogram_score_gufunc, + _owvariogram_score_gufunc, + _vrvariogram_score_gufunc, ) -ArrayLike = TypeVar("ArrayLike", np.ndarray, float) +ArrayLike = tp.TypeVar("ArrayLike", np.ndarray, float) # ArrayLike = Array | float @@ -85,6 +92,59 @@ def crps_lognormal( ) -> Array: """Compute the CRPS for a log normal distribution.""" return _crps_lognormal_ufunc(mulog, sigmalog, observation) + + @staticmethod + def crps_logistic(mu: ArrayLike, sigma: ArrayLike, observation: ArrayLike) -> Array: + """Compute the CRPS for a logistic distribution.""" + return _crps_logistic_ufunc(mu, sigma, observation) + + @staticmethod + def owcrps_ensemble( + forecasts: Array, + observations: ArrayLike, + w_func: tp.Callable = lambda x, *args: np.ones_like(x), + w_funcargs: tuple = (), + axis=-1, + ) -> Array: + """Compute the Outcome-Weighted CRPS for a finite ensemble.""" + if axis != -1: + forecasts = np.moveaxis(forecasts, axis, -1) + + fw = w_func(forecasts, *w_funcargs) + ow = w_func(observations, *w_funcargs) + return _owcrps_ensemble_nrg_gufunc(forecasts, observations, fw, ow) + + @staticmethod + def twcrps_ensemble( + forecasts: Array, + observations: ArrayLike, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + axis=-1, + ) -> Array: + """Compute the Threshold-Weighted CRPS for a finite ensemble.""" + if axis != -1: + forecasts = np.moveaxis(forecasts, axis, -1) + + v_forecasts = v_func(forecasts, *v_funcargs) + v_observations = v_func(observations, *v_funcargs) + return _crps_ensemble_nrg_gufunc(v_forecasts, v_observations) + + @staticmethod + def vrcrps_ensemble( + forecasts: Array, + observations: ArrayLike, + w_func: tp.Callable = lambda x, *args: np.ones_like(x), + w_funcargs: tuple = (), + axis=-1, + ) -> Array: + """Compute the Vertically Re-scaled CRPS for a finite ensemble.""" + if axis != -1: + forecasts = np.moveaxis(forecasts, axis, -1) + + fw = w_func(forecasts, *w_funcargs) + ow = w_func(observations, *w_funcargs) + return _vrcrps_ensemble_nrg_gufunc(forecasts, observations, fw, ow) @staticmethod def logs_normal( @@ -110,6 +170,69 @@ def energy_score( observations = np.moveaxis(observations, v_axis, -1) return _energy_score_gufunc(forecasts, observations) + @staticmethod + def owenergy_score( + forecasts: Array, + observations: Array, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Outcome-Weighted Energy Score for a finite multivariate ensemble.""" + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = np.apply_along_axis(w_func_wrap, axis=-1, arr=forecasts) + ow = np.apply_along_axis(w_func_wrap, axis=-1, arr=observations) + return _owenergy_score_gufunc(forecasts, observations, fw, ow) + + @staticmethod + def twenergy_score( + forecasts: Array, + observations: Array, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Threshold-Weighted Energy Score for a finite multivariate ensemble.""" + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) + + v_forecasts = v_func(forecasts, *v_funcargs) + v_observations = v_func(observations, *v_funcargs) + return _energy_score_gufunc(v_forecasts, v_observations) + + @staticmethod + def vrenergy_score( + forecasts: Array, + observations: Array, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Vertically Re-scaled Energy Score for a finite multivariate ensemble.""" + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = np.apply_along_axis(w_func_wrap, axis=-1, arr=forecasts) + ow = np.apply_along_axis(w_func_wrap, axis=-1, arr=observations) + + return _vrenergy_score_gufunc(forecasts, observations, fw, ow) + @staticmethod def brier_score(forecasts: ArrayLike, observations: ArrayLike) -> Array: """Compute the Brier Score for predicted probabilities.""" @@ -120,17 +243,82 @@ def variogram_score( forecasts: Array, observations: Array, p: float = 1.0, - m_axis: int = -1, - v_axis: int = -2, + m_axis: int = -2, + v_axis: int = -1, ) -> Array: """Compute the Variogram Score for a finite multivariate ensemble.""" - if m_axis != -1: - forecasts = np.moveaxis(forecasts, m_axis, -1) - v_axis = v_axis - 1 if v_axis != 0 else v_axis - if v_axis != -2: - forecasts = np.moveaxis(forecasts, v_axis, -2) + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) return _variogram_score_gufunc(forecasts, observations, p) + + @staticmethod + def owvariogram_score( + forecasts: Array, + observations: Array, + p: float = 1.0, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Outcome-Weighted Variogram Score for a finite multivariate ensemble.""" + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = np.apply_along_axis(w_func_wrap, axis=-1, arr=forecasts) + ow = np.apply_along_axis(w_func_wrap, axis=-1, arr=observations) + return _owvariogram_score_gufunc(forecasts, observations, p, fw, ow) + + @staticmethod + def twvariogram_score( + forecasts: Array, + observations: Array, + p: float = 1.0, + v_func: tp.Callable = lambda x, *args: x, + v_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Threshold-Weighted Variogram Score for a finite multivariate ensemble.""" + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) + + v_forecasts = v_func(forecasts, *v_funcargs) + v_observations = v_func(observations, *v_funcargs) + return _variogram_score_gufunc(v_forecasts, v_observations, p) + + @staticmethod + def vrvariogram_score( + forecasts: Array, + observations: Array, + p: float = 1.0, + w_func: tp.Callable = lambda x, *args: 1.0, + w_funcargs: tuple = (), + m_axis: int = -2, + v_axis: int = -1, + ) -> Array: + """Compute the Vertically Re-scaled Variogram Score for a finite multivariate ensemble.""" + if m_axis != -2: + forecasts = np.moveaxis(forecasts, m_axis, -2) + if v_axis != -1: + forecasts = np.moveaxis(forecasts, v_axis, -1) + observations = np.moveaxis(observations, v_axis, -1) + + w_func_wrap = lambda x: w_func(x, *w_funcargs) + fw = np.apply_along_axis(w_func_wrap, axis=-1, arr=forecasts) + ow = np.apply_along_axis(w_func_wrap, axis=-1, arr=observations) + return _vrvariogram_score_gufunc(forecasts, observations, p, fw, ow) def __repr__(self) -> str: return "NumbaBackend" diff --git a/tests/test_variogram.py b/tests/test_variogram.py index 3bee54f..3ea0c7d 100644 --- a/tests/test_variogram.py +++ b/tests/test_variogram.py @@ -13,7 +13,7 @@ @pytest.mark.parametrize("backend", BACKENDS) def test_variogram_score(backend): obs = np.random.randn(N, N_VARS) - fcts = np.expand_dims(obs, axis=-1) + np.random.randn(N, N_VARS, ENSEMBLE_SIZE) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) res = variogram_score(fcts, obs, backend=backend) @@ -44,8 +44,8 @@ def test_variogram_score_correctness(backend): obs = np.array([0.2743836, 0.8146400]) - res = variogram_score(fcts, obs, p=0.5, backend=backend) + res = variogram_score(fcts.T, obs, p=0.5, backend=backend) np.testing.assert_allclose(res, 0.05083489, rtol=1e-5) - res = variogram_score(fcts, obs, p=1.0, backend=backend) + res = variogram_score(fcts.T, obs, p=1.0, backend=backend) np.testing.assert_allclose(res, 0.04856365, rtol=1e-5) diff --git a/tests/test_wcrps.py b/tests/test_wcrps.py new file mode 100644 index 0000000..e171b8d --- /dev/null +++ b/tests/test_wcrps.py @@ -0,0 +1,141 @@ +import numpy as np +import pytest +from scoringrules._crps import crps_ensemble +from scoringrules._wcrps import owcrps_ensemble, twcrps_ensemble, vrcrps_ensemble + +ENSEMBLE_SIZE = 51 +N = 100 + +BACKENDS = ["numpy", "numba", "jax"] + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owcrps_vs_crps(backend): + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.1 + sigma = abs(np.random.randn(N)) * 0.3 + fcts = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + res = crps_ensemble(fcts, obs, backend=backend, estimator="nrg") + resw = owcrps_ensemble(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twcrps_vs_crps(backend): + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.1 + sigma = abs(np.random.randn(N)) * 0.3 + fcts = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + res = crps_ensemble(fcts, obs, backend=backend, estimator="nrg") + resw = twcrps_ensemble(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_vrcrps_vs_crps(backend): + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.1 + sigma = abs(np.random.randn(N)) * 0.3 + fcts = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + res = crps_ensemble(fcts, obs, backend=backend, estimator="nrg") + resw = vrcrps_ensemble(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owcrps_score_correctness(backend): + fcts = np.array( + [[-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165], + [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457], + [-1.31340831, -1.43722153, -1.01696021, -0.70790148, -1.20432392], + [ 1.26054027, 1.03477874, 0.61688454, 0.75305795, 1.19364529], + [ 0.63192933, 0.33521695, 0.20626017, 0.43158264, 0.69518928], + [ 0.83305233, 0.71965134, 0.96687378, 1.0000473 , 0.82714425], + [ 0.0128655 , 0.03849841, 0.02459106, -0.02618909, 0.18687008], + [-0.69399216, -0.59432627, -1.43629972, 0.01979857, -0.3286767 ], + [ 1.92958764, 2.2678255 , 1.86036922, 1.84766384, 2.03331138], + [ 0.80847492, 1.11265193, 0.58995365, 1.04838184, 1.10439277]]) + + obs = np.array([ 0.19640722, -0.11300369, -1.0400268 , 0.84306533, 0.36572078, + 0.82487264, 0.14088773, -0.51936113, 1.70706264, 0.75985908]) + + def w_func(x, t): + return (x > t).astype(np.float32) + + t = -1 + res = np.mean(owcrps_ensemble(fcts, obs, w_func, (t,), backend=backend)) + np.testing.assert_allclose(res, 0.09320807, rtol=1e-6) + + def w_func(x, t): + return (x < t).astype(np.float32) + + t = 1.85 + res = np.mean(owcrps_ensemble(fcts, obs, w_func, (t,), backend=backend)) + np.testing.assert_allclose(res, 0.09933139, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twcrps_score_correctness(backend): + fcts = np.array( + [[-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165], + [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457], + [-1.31340831, -1.43722153, -1.01696021, -0.70790148, -1.20432392], + [ 1.26054027, 1.03477874, 0.61688454, 0.75305795, 1.19364529], + [ 0.63192933, 0.33521695, 0.20626017, 0.43158264, 0.69518928], + [ 0.83305233, 0.71965134, 0.96687378, 1.0000473 , 0.82714425], + [ 0.0128655 , 0.03849841, 0.02459106, -0.02618909, 0.18687008], + [-0.69399216, -0.59432627, -1.43629972, 0.01979857, -0.3286767 ], + [ 1.92958764, 2.2678255 , 1.86036922, 1.84766384, 2.03331138], + [ 0.80847492, 1.11265193, 0.58995365, 1.04838184, 1.10439277]]) + + obs = np.array([ 0.19640722, -0.11300369, -1.0400268 , 0.84306533, 0.36572078, + 0.82487264, 0.14088773, -0.51936113, 1.70706264, 0.75985908]) + + def v_func(x, t): + return np.maximum(x, t) + + t = -1 + res = np.mean(twcrps_ensemble(fcts, obs, v_func, (t,), backend=backend)) + np.testing.assert_allclose(res, 0.09489662, rtol=1e-6) + + def v_func(x, t): + return np.minimum(x, t) + + t = 1.85 + res = np.mean(twcrps_ensemble(fcts, obs, v_func, (t,), backend=backend)) + np.testing.assert_allclose(res, 0.0994809, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_vrcrps_score_correctness(backend): + fcts = np.array( + [[-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165], + [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457], + [-1.31340831, -1.43722153, -1.01696021, -0.70790148, -1.20432392], + [ 1.26054027, 1.03477874, 0.61688454, 0.75305795, 1.19364529], + [ 0.63192933, 0.33521695, 0.20626017, 0.43158264, 0.69518928], + [ 0.83305233, 0.71965134, 0.96687378, 1.0000473 , 0.82714425], + [ 0.0128655 , 0.03849841, 0.02459106, -0.02618909, 0.18687008], + [-0.69399216, -0.59432627, -1.43629972, 0.01979857, -0.3286767 ], + [ 1.92958764, 2.2678255 , 1.86036922, 1.84766384, 2.03331138], + [ 0.80847492, 1.11265193, 0.58995365, 1.04838184, 1.10439277]]) + + obs = np.array([ 0.19640722, -0.11300369, -1.0400268 , 0.84306533, 0.36572078, + 0.82487264, 0.14088773, -0.51936113, 1.70706264, 0.75985908]) + + def w_func(x, t): + return (x > t).astype(np.float32) + + t = -1 + res = np.mean(vrcrps_ensemble(fcts, obs, w_func, (t,), backend=backend)) + np.testing.assert_allclose(res, 0.1003983, rtol=1e-6) + + def w_func(x, t): + return (x < t).astype(np.float32) + + t = 1.85 + res = np.mean(vrcrps_ensemble(fcts, obs, w_func, (t,), backend=backend)) + np.testing.assert_allclose(res, 0.1950857, rtol=1e-6) \ No newline at end of file diff --git a/tests/test_wenergy.py b/tests/test_wenergy.py new file mode 100644 index 0000000..2427c1f --- /dev/null +++ b/tests/test_wenergy.py @@ -0,0 +1,85 @@ +import jax +import numpy as np +import pytest +from scoringrules._energy import energy_score +from scoringrules._wenergy import owenergy_score, twenergy_score, vrenergy_score + +ENSEMBLE_SIZE = 51 +N = 100 +N_VARS = 3 + +BACKENDS = ["numpy", "numba", "jax"] + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owes_vs_es(backend): + obs = np.random.randn(N, N_VARS) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res = energy_score(fcts, obs, backend=backend) + resw = owenergy_score(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twes_vs_es(backend): + obs = np.random.randn(N, N_VARS) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res = energy_score(fcts, obs, backend=backend) + resw = twenergy_score(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_vres_vs_es(backend): + obs = np.random.randn(N, N_VARS) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res = energy_score(fcts, obs, backend=backend) + resw = vrenergy_score(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owenergy_score_correctness(backend): + fcts = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def w_func(x, t): + return (x > t).all().astype(np.float32) + + t = 0.2 + res = owenergy_score(fcts, obs, w_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.2274243, rtol=1e-6) + + def w_func(x, t): + return (x < t).all().astype(np.float32) + + t = 1 + res = owenergy_score(fcts, obs, w_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.3345418, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twenergy_score_correctness(backend): + fcts = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def v_func(x, t): + return np.maximum(x, t) + + t = 0.2 + res = twenergy_score(fcts, obs, v_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.3116075, rtol=1e-6) + + def v_func(x, t): + return np.minimum(x, t) + + t = 1 + res = twenergy_score(fcts, obs, v_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.3345418, rtol=1e-6) diff --git a/tests/test_wvariogram.py b/tests/test_wvariogram.py new file mode 100644 index 0000000..61ec85c --- /dev/null +++ b/tests/test_wvariogram.py @@ -0,0 +1,85 @@ +import jax +import numpy as np +import pytest +from scoringrules._variogram import variogram_score +from scoringrules._wvariogram import owvariogram_score, twvariogram_score, vrvariogram_score + +ENSEMBLE_SIZE = 51 +N = 100 +N_VARS = 3 + +BACKENDS = ["numpy", "numba", "jax"] + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owvs_vs_vs(backend): + obs = np.random.randn(N, N_VARS) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res = variogram_score(fcts, obs, backend=backend) + resw = owvariogram_score(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twvs_vs_vs(backend): + obs = np.random.randn(N, N_VARS) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res = variogram_score(fcts, obs, backend=backend) + resw = twvariogram_score(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_vrvs_vs_vs(backend): + obs = np.random.randn(N, N_VARS) + fcts = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res = variogram_score(fcts, obs, backend=backend) + resw = vrvariogram_score(fcts, obs, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owvariogram_score_correctness(backend): + fcts = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def w_func(x, t): + return (x > t).all().astype(np.float32) + + t = 0.2 + res = owvariogram_score(fcts, obs, 0.5, w_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.1929739, rtol=1e-6) + + def w_func(x, t): + return (x < t).all().astype(np.float32) + + t = 1 + res = owvariogram_score(fcts, obs, 1, w_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.04856366, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twvariogram_score_correctness(backend): + fcts = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def v_func(x, t): + return np.maximum(x, t) + + t = 0.2 + res = twvariogram_score(fcts, obs, 0.5, v_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.07594679, rtol=1e-6) + + def v_func(x, t): + return np.minimum(x, t) + + t = 1 + res = twvariogram_score(fcts, obs, 1, v_func, (t,), backend=backend) + np.testing.assert_allclose(res, 0.04856366, rtol=1e-6)