Merge pull request #34 from martinvonk/dev

Update main to v0.4.0
martinvonk · Mar 14, 2024 · 4b8963d · 4b8963d
2 parents a7db309 + 47bc330
commit 4b8963d
Show file tree

Hide file tree

Showing 28 changed files with 1,633 additions and 890 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -13,9 +13,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.x'
     - name: Install dependencies

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -12,11 +12,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - name: Test suite with py38-ubuntu
-            python: "3.8"
-            os: ubuntu-latest
-            toxenv: py38
-            experimental: false
           - name: Test suite with py39-ubuntu
             python: "3.9"
             os: ubuntu-latest
@@ -33,10 +28,10 @@ jobs:
             toxenv: py311
             experimental: false
           - name: Test suite with py312-ubuntu
-            python: "3.12-dev"
+            python: "3.12"
             os: ubuntu-latest
             toxenv: py312
-            experimental: true
+            experimental: false
           - name: Type check with mypy
             python: "3.9"
             os: ubuntu-latest
@@ -72,10 +67,10 @@ jobs:
       # Pytest
       PYTEST_ADDOPTS: "--color=yes"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
           check-latest: true

diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@
 /.pytest_cache
 /htmlcov
 coverage.xml
+*.code-workspace
diff --git a/README.md b/README.md
@@ -43,11 +43,11 @@ To get the development version download or clone the GitHub repository to your l
 
 ## Literature
 
- 1. B. Lloyd-Hughes and M.A. Saunders (2002) - A Drought Climatology for Europe. DOI: 10.1002/joc.846
- 2. S.M. Vicente-Serrano, S. Beguería and J.I. López-Moreno (2010) - A Multi-scalar drought index sensitive to global warming: The Standardized Precipitation Evapotranspiration Index. DOI: 10.1175/2009JCLI2909.1
- 3. J.P.Bloomfield and B.P. Marchant, B. P. (2013) - Analysis of groundwater drought building on the standardised precipitation index approach. DOI: 10.5194/hess-17-4769-2013
- 4. A. Babre, A. Kalvāns, Z. Avotniece, I. Retiķe, J. Bikše, K.P.M. Jemeljanova, A. Zelenkevičs and A. Dēliņa (2022) - The use of predefined drought indices for the assessment of groundwater drought episodes in the Baltic States over the period 1989–2018. DOI: 10.1016/j.ejrh.2022.101049
- 5. E. Tijdeman, K. Stahl and L.M. Tallaksen (2020) - Drought characteristics derived based on the Standardized Streamflow Index: A large sample comparison for parametric and nonparametric methods. DOI: 10.1029/2019WR026315
+1.  B. Lloyd-Hughes and M.A. Saunders (2002) - A Drought Climatology for Europe. DOI: 10.1002/joc.846
+2.  S.M. Vicente-Serrano, S. Beguería and J.I. López-Moreno (2010) - A Multi-scalar drought index sensitive to global warming: The Standardized Precipitation Evapotranspiration Index. DOI: 10.1175/2009JCLI2909.1
+3.  J.P. Bloomfield and B.P. Marchant, B. P. (2013) - Analysis of groundwater drought building on the standardised precipitation index approach. DOI: 10.5194/hess-17-4769-2013
+4.  A. Babre, A. Kalvāns, Z. Avotniece, I. Retiķe, J. Bikše, K.P.M. Jemeljanova, A. Zelenkevičs and A. Dēliņa (2022) - The use of predefined drought indices for the assessment of groundwater drought episodes in the Baltic States over the period 1989–2018. DOI: 10.1016/j.ejrh.2022.101049
+5.  E. Tijdeman, K. Stahl and L.M. Tallaksen (2020) - Drought characteristics derived based on the Standardized Streamflow Index: A large sample comparison for parametric and nonparametric methods. DOI: 10.1029/2019WR026315
 
 Note that the method for calculating the drought indices does not come from these articles and SciPy is used for deriving the distribution. However the literature is helpful as a reference to understand the context and application of drought indices.
 

diff --git a/doc/_static/logo.png b/doc/_static/logo.png
diff --git a/doc/_static/logo.pptx b/doc/_static/logo.pptx
diff --git a/doc/_static/make_logo.py b/doc/_static/make_logo.py
diff --git a/doc/examples/example01_indices.ipynb b/doc/examples/example01_indices.ipynb
diff --git a/doc/examples/example02_distributions.ipynb b/doc/examples/example02_distributions.ipynb
diff --git a/doc/examples/example03_drought_NL.ipynb b/doc/examples/example03_drought_NL.ipynb
diff --git a/doc/examples/example04_package_comparison.ipynb b/doc/examples/example04_package_comparison.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,11 +9,10 @@ authors = [{ name = "Martin Vonk", email = "[email protected]" }]
 description = "A simple Python package to calculate drought indices for time series such as the SPI, SPEI and SGI."
 readme = "README.md"
 license = { file = "LICENSE" }
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = ["numpy", "scipy", "matplotlib", "pandas"]
 classifiers = [
         'Programming Language :: Python :: 3 :: Only',
-        'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: 3.10',
         'Programming Language :: Python :: 3.11',
@@ -63,7 +62,7 @@ line-length = 88
 legacy_tox_ini = """
     [tox]
     requires = tox>=4
-    env_list = format, type, lint, py38, py39, py310, py311, py312
+    env_list = format, type, lint, py39, py310, py311, py312
 
     [testenv]
     description = run unit tests

diff --git a/src/spei/__init__.py b/src/spei/__init__.py
@@ -1,4 +1,4 @@
 # flake8: noqa
-from . import climdex, plot, si, utils
+from . import climdex, dist, plot, si, utils
 from ._version import __version__, show_versions
-from .si import sgi, spei, spi, ssfi
+from .si import SI, sgi, spei, spi, ssfi
diff --git a/src/spei/_version.py b/src/spei/_version.py
@@ -1,10 +1,10 @@
 from importlib import metadata
 from platform import python_version
 
-__version__ = "0.3.5"
+__version__ = "0.4.0"
 
 
-def show_versions() -> None:
+def show_versions() -> str:
     msg = f"Versions\npython: {python_version()}\nspei: {__version__}\n"
 
     requirements = metadata.requires("spei")
@@ -13,4 +13,4 @@ def show_versions() -> None:
         for dep in deps:
             msg += f"{dep}: {metadata.version(dep)}\n"
 
-    print(msg)
+    return msg
diff --git a/src/spei/climdex.py b/src/spei/climdex.py
@@ -33,7 +33,7 @@ def sdii(series: Series, threshold: float = 1.0, period: str = "30D") -> Series:
     return series.loc[w].resample(period).sum() / w.sum()
 
 
-def rnmm(series: Series, threshold: float, period: str = "1Y") -> Series:
+def rnmm(series: Series, threshold: float, period: str = "1YE") -> Series:
     """Annual count of days when precipitation ≥ n mm. n is a user-defined threshold"""
     series = validate_series(series)
     _ = validate_index(series.index)
@@ -43,12 +43,12 @@ def rnmm(series: Series, threshold: float, period: str = "1Y") -> Series:
     return w.resample(period).sum()
 
 
-def r10mm(series: Series, threshold: float = 10.0, period: str = "1Y") -> Series:
+def r10mm(series: Series, threshold: float = 10.0, period: str = "1YE") -> Series:
     """Annual count of days when precipitation ≥ 10 mm"""
     return rnmm(series=series, threshold=threshold, period=period)
 
 
-def r20mm(series: Series, threshold: float = 20.0, period: str = "1Y") -> Series:
+def r20mm(series: Series, threshold: float = 20.0, period: str = "1YE") -> Series:
     """Annual count of days when precipitation ≥ 20 mm"""
     return rnmm(series=series, threshold=threshold, period=period)
 
@@ -75,7 +75,7 @@ def cwd(series: Series, threshold: float = 1.0, period: str = "365D") -> Series:
     return w.diff().rolling(period).sum().dropna().astype(int)
 
 
-def prcptot(series: Series, period: str = "1Y") -> Series:
+def prcptot(series: Series, period: str = "1YE") -> Series:
     """Total precipitation on wet days over a certain period"""
     series = validate_series(series)
     _ = validate_index(series.index)
@@ -84,7 +84,7 @@ def prcptot(series: Series, period: str = "1Y") -> Series:
 
 
 def rnnp(
-    series: Series, quantile: float, threshold: float = 1.0, period: str = "1Y"
+    series: Series, quantile: float, threshold: float = 1.0, period: str = "1YE"
 ) -> Series:
     """Total amount of precipitation on wet days above certain quantile"""
     series = validate_series(series)
@@ -97,21 +97,21 @@ def rnnp(
 
 
 def r95p(
-    series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1Y"
+    series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1YE"
 ) -> Series:
     """Total amount of precipitation on very wet days"""
     return rnnp(series=series, quantile=quantile, threshold=threshold, period=period)
 
 
 def r99p(
-    series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1Y"
+    series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1YE"
 ) -> Series:
     """Total amount of precipitation on extremely wet days"""
     return rnnp(series=series, quantile=quantile, threshold=threshold, period=period)
 
 
 def r95ptot(
-    series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1Y"
+    series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1YE"
 ) -> Series:
     """Contribution to total precipitation from very wet days"""
     r95 = r95p(series=series, quantile=quantile, threshold=threshold, period=period)
@@ -120,7 +120,7 @@ def r95ptot(
 
 
 def r99ptot(
-    series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1Y"
+    series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1YE"
 ) -> Series:
     """Contribution to total precipitation from extremely wet days"""
     r99 = r99p(series=series, quantile=quantile, threshold=threshold, period=period)

diff --git a/src/spei/dist.py b/src/spei/dist.py
@@ -0,0 +1,169 @@
+from dataclasses import dataclass, field
+from typing import List, Literal, Optional, Tuple
+
+from numpy import std
+from pandas import Series
+from scipy.stats import kstest
+
+from ._typing import ContinuousDist
+
+
+@dataclass
+class Dist:
+    data: Series = field(init=True, repr=False)
+    dist: ContinuousDist
+    loc: float = field(init=False, repr=True)
+    scale: float = field(init=False, repr=True)
+    pars: Optional[List[float]] = field(init=False, repr=False)
+    prob_zero: bool = field(default=False, init=True, repr=False)
+    p0: float = field(default=0.0, init=False, repr=False)
+    data_window: Optional[Series] = field(default=None, init=True, repr=False)
+    """
+    Represents a distribution associated with data.
+
+    Parameters
+    ----------
+    data : Series
+        The input data for fitting the distribution.
+    dist : ContinuousDist
+        The SciPy continuous distribution associated to be fitted.
+    prob_zero : bool, default=False
+        Flag indicating whether the probability of zero values in the series is
+        calculated by the occurence.
+    data_window : Optional[Series], default=None
+        Subset of data for fitting more data (if provided).
+
+    Attributes
+    ----------
+    loc : float
+        Location of the distribution
+    scale : float
+        Scale of the distribution
+    pars : Optional[List[float]]
+        Attribute storing additional distribution parameters (if applicable).
+    p0 : float
+        The probability of zero values in the data. Only calculated if prob_zero=True.
+
+    Methods
+    -------
+    __post_init__(self) -> None
+        Initializes the Dist class and fits the distribution.
+    fit_dist(data: Series, dist: ContinuousDist) -> Tuple
+        Fits a Scipy continuous distribution to the data.
+
+    Notes
+    -----
+    The `fit_dist` method uses the `dist.fit` function from Scipy to estimate
+    distribution parameters. If the fitted distribution requires additional
+    parameters beyond `loc` and `scale`, they are stored in the `pars` attribute.
+    """
+
+    def __post_init__(self):
+        """
+        Post initializes the Dist class by fitting the distribution.
+        """
+        data_fit = self.data_window if self.data_window is not None else self.data
+        pars, loc, scale = self.fit_dist(data=data_fit, dist=self.dist)
+        self.loc = loc
+        self.scale = scale
+        self.pars = pars
+
+        if self.prob_zero:
+            self.p0 = (data_fit == 0.0).sum() / len(data_fit)
+
+    @staticmethod
+    def fit_dist(
+        data: Series, dist: ContinuousDist
+    ) -> Tuple[Optional[List[float]], float, float]:
+        """
+        Fits a Scipy continuous distribution to the data.
+
+        Parameters
+        ----------
+        data : Series
+            The input data for fitting.
+        dist : ContinuousDist
+            The continuous distribution to be fitted.
+
+        Returns
+        -------
+        Tuple
+            Tuple containing distribution parameters (pars, loc, scale).
+        """
+        fit_tuple = dist.fit(data, scale=std(data))
+        if len(fit_tuple) == 2:
+            loc, scale = fit_tuple
+            pars = None
+        else:
+            *pars, loc, scale = fit_tuple
+        return pars, loc, scale
+
+    def cdf(self) -> Series:
+        """Compute cumulative density function of a Scipy Continuous Distribution"""
+        if self.pars is not None:
+            cdf = self.dist.cdf(
+                self.data.values, self.pars, loc=self.loc, scale=self.scale
+            )
+        else:
+            cdf = self.dist.cdf(self.data.values, loc=self.loc, scale=self.scale)
+
+        if self.prob_zero:
+            cdf = self.p0 + (1 - self.p0) * cdf
+            cdf[self.data == 0.0] = self.p0
+
+        return Series(cdf, index=self.data.index, dtype=float)
+
+    def pdf(self) -> Series:
+        data_pdf = self.data.sort_values()
+        if self.pars is not None:
+            pdf = self.dist.pdf(
+                data_pdf.values, self.pars, loc=self.loc, scale=self.scale
+            )
+        else:
+            pdf = self.dist.pdf(data_pdf.values, loc=self.loc, scale=self.scale)
+
+        # TODO: check what to do if prob_zero
+
+        return Series(pdf, index=data_pdf.index, dtype=float)
+
+    def ks_test(
+        self,
+        method: Literal["auto", "exact", "approx", "asymp"] = "auto",
+    ) -> float:
+        """Fit a distribution and perform the two-sided
+        Kolmogorov-Smirnov test for goodness of fit. The
+        null hypothesis is that the data and distributions
+        are identical, the alternative is that they are
+        not identical. [scipy_2021]_
+
+        Parameters
+        ----------
+        method : Literal['auto', 'exact', 'approx', 'asymp'], optional
+            Defines the distribution used for calculating the p-value. The
+            following options are available (default is 'auto'): 'auto' selects
+            one of the other options, 'exact' uses the exact distribution of
+            test statistic, 'approx' approximates the two-sided probability
+            with twice the one-sided probability, 'asymp' uses asymptotic
+            distribution of test statistic
+
+        Returns
+        -------
+        float
+            p-value
+
+        References
+        -------
+        .. [scipy_2021] Onnen, H.: Intro to Probability
+        Distributions and Distribution Fitting with Pythons
+        SciPy, 2021.
+        """
+        args = (
+            (self.pars, self.loc, self.scale)
+            if self.pars is not None
+            else (self.loc, self.scale)
+        )
+        kstest_result = kstest(
+            rvs=self.data, cdf=self.dist.name, args=args, method=method
+        )
+        # rej_h0 = kstest_result.pvalue < alpha
+        return kstest_result.pvalue