Merge branch 'master' into lammps

alchemistry · Oct 22, 2024 · 337aac5 · 337aac5
2 parents 1dd796b + 190308e
commit 337aac5
Show file tree

Hide file tree

Showing 12 changed files with 205 additions and 38 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -43,7 +43,7 @@ jobs:
           python-version: 3.11
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     # More info on options: https://github.com/conda-incubator/setup-miniconda
     - uses: mamba-org/setup-micromamba@main

diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
@@ -29,12 +29,12 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
       - name: setup_miniconda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v3
         with:
           python-version: "3.10"
           auto-update-conda: true
@@ -55,7 +55,7 @@ jobs:
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
-          repository_url: https://test.pypi.org/legacy/
+          repository-url: https://test.pypi.org/legacy/
 
       - name: publish_pypi
         if: github.event_name == 'release' && github.event.action == 'published'

diff --git a/CHANGES b/CHANGES
@@ -12,13 +12,24 @@ The rules for this file:
   * accompany each entry with github issue/PR number (Issue #xyz)
   * release numbers follow "Semantic Versioning" https://semver.org
 
-??/??/2024 jaclark5
+**/**/**** xiki-tempula, jaclark5
 
-  * 2.4.1
+  * 2.5.0
 
 Enhancements
+  - Parallelise read and preprocess for ABFE workflow. (PR #371)
   - Add support for LAMMPS FEP files (Issue #349, PR #348)
 
+
+09/19/2024 orbeckst, jaclark5
+
+  * 2.4.1
+
+Fixes
+  - [doc] tutorial: use alchemlyb.concat (PR #399)
+  - Resolve pandas FutureWarnings in bar_.py and mbar_.py (issue #408 PR #407)
+
+
 09/17/2024 jaclark5, orbeckst
 
   * 2.4.0
@@ -31,6 +42,7 @@ Enhancements
 Changes
   - modernize build system: replaced setup.py,cfg with pyproject.toml (#385)
 
+
 08/24/2024 xiki-tempula
 
   * 2.3.2

diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -11,6 +11,7 @@ dependencies:
 - matplotlib>=3.7
 - loguru
 - pyarrow
+- joblib
 
 # Testing
 - pytest

diff --git a/docs/visualisation.rst b/docs/visualisation.rst
@@ -177,7 +177,8 @@ and the corresponding error. ::
     >>>     backward_error.append(estimate.d_delta_f_.iloc[0,-1])
 
     >>> from alchemlyb.visualisation import plot_convergence
-    >>> ax = plot_convergence(forward, forward_error, backward, backward_error)
+    >>> df = pd.DataFrame({'Forward':forward, 'Backward':backward, 'Forward_Error':forward_error, 'Backward_Error':backward_error})
+    >>> ax = plot_convergence(df)
     >>> ax.figure.savefig('dF_t.pdf')
 
 Will give a plot looks like this

diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst
@@ -153,6 +153,43 @@ to the data generated at each stage of the analysis. ::
     >>> # Convergence analysis
     >>> workflow.check_convergence(10, dF_t='dF_t.pdf')
 
+Parallelisation of Data Reading and Decorrelation
+-------------------------------------------------
+
+The estimation step of the workflow is parallelized using JAX. However, the
+reading and decorrelation stages can be parallelized using `joblib`. This is
+achieved by passing the number of jobs to run in parallel via the `n_jobs`
+parameter to the following methods:
+
+- :meth:`~alchemlyb.workflows.ABFE.read`
+- :meth:`~alchemlyb.workflows.ABFE.preprocess`
+
+To enable parallel execution, specify the `n_jobs` parameter. Setting
+`n_jobs=-1` allows the use of all available resources. ::
+
+    >>> workflow = ABFE(units='kcal/mol', software='GROMACS', dir=dir,
+    >>>                 prefix='dhdl', suffix='xvg', T=298, outdirectory='./')
+    >>> workflow.read(n_jobs=-1)
+    >>> workflow.preprocess(n_jobs=-1)
+
+In a fully automated mode, the `n_jobs=-1` parameter can be passed directly to
+the :meth:`~alchemlyb.workflows.ABFE.run` method. This will implicitly
+parallelise the reading and decorrelation stages. ::
+
+    >>> workflow = ABFE(units='kcal/mol', software='GROMACS', dir=dir,
+    >>>                 prefix='dhdl', suffix='xvg', T=298, outdirectory='./')
+    >>> workflow.run(n_jobs=-1)
+
+While the default `joblib` settings are suitable for most environments, you
+can customize the parallelisation backend depending on the infrastructure. For
+example, using the threading backend can be specified as follows. ::
+
+    >>> import joblib
+    >>> workflow = ABFE(units='kcal/mol', software='GROMACS', dir=dir,
+    >>>                 prefix='dhdl', suffix='xvg', T=298, outdirectory='./')
+    >>> with joblib.parallel_config(backend="threading"):
+    >>>     workflow.run(n_jobs=-1)
+
 API Reference
 -------------
 .. autoclass:: alchemlyb.workflows.ABFE

diff --git a/environment.yml b/environment.yml
@@ -11,3 +11,4 @@ dependencies:
 - pyarrow
 - matplotlib>=3.7
 - loguru
+- joblib
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,6 +50,7 @@ dependencies = [
 	     "matplotlib>=3.7",
 	     "loguru",
 	     "pyarrow",
+         "joblib",
 ]
 
 

diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
@@ -97,7 +97,11 @@ def fit(self, u_nk):
         # group u_nk by lambda states
         groups = u_nk.groupby(level=u_nk.index.names[1:])
         N_k = [
-            (len(groups.get_group(i)) if i in groups.groups else 0)
+            (
+                len(groups.get_group(i if isinstance(i, tuple) else (i,)))
+                if i in groups.groups
+                else 0
+            )
             for i in u_nk.columns
         ]
 
@@ -119,12 +123,21 @@ def fit(self, u_nk):
                 continue
 
             # get us from lambda step k
-            uk = groups.get_group(self._states_[k])
+            uk = groups.get_group(
+                self._states_[k]
+                if isinstance(self._states_[k], tuple)
+                else (self._states_[k],)
+            )
             # get w_F
             w_f = uk.iloc[:, k + 1] - uk.iloc[:, k]
 
             # get us from lambda step k+1
-            uk1 = groups.get_group(self._states_[k + 1])
+            uk1 = groups.get_group(
+                self._states_[k + 1]
+                if isinstance(self._states_[k + 1], tuple)
+                else (self._states_[k + 1],)
+            )
+
             # get w_R
             w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]
 

diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py
@@ -127,7 +127,11 @@ def fit(self, u_nk):
 
         groups = u_nk.groupby(level=u_nk.index.names[1:])
         N_k = [
-            (len(groups.get_group(i)) if i in groups.groups else 0)
+            (
+                len(groups.get_group(i if isinstance(i, tuple) else (i,)))
+                if i in groups.groups
+                else 0
+            )
             for i in u_nk.columns
         ]
         self._states_ = u_nk.columns.values.tolist()

diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py
@@ -1,9 +1,11 @@
 import os
 
 import numpy as np
+import pandas as pd
 import pytest
 from alchemtest.amber import load_bace_example
 from alchemtest.gmx import load_ABFE
+import joblib
 
 import alchemlyb.parsing.amber
 from alchemlyb.workflows.abfe import ABFE
@@ -88,15 +90,23 @@ def test_single_estimator(self, workflow, monkeypatch):
         monkeypatch.setattr(workflow, "dHdl_sample_list", [])
         monkeypatch.setattr(workflow, "estimator", dict())
         workflow.run(
-            uncorr=None, estimators="MBAR", overlap=None, breakdown=True, forwrev=None
+            uncorr=None,
+            estimators="MBAR",
+            overlap=None,
+            breakdown=True,
+            forwrev=None,
         )
         assert "MBAR" in workflow.estimator
 
     @pytest.mark.parametrize("forwrev", [None, False, 0])
     def test_no_forwrev(self, workflow, monkeypatch, forwrev):
         monkeypatch.setattr(workflow, "convergence", None)
         workflow.run(
-            uncorr=None, estimators=None, overlap=None, breakdown=None, forwrev=forwrev
+            uncorr=None,
+            estimators=None,
+            overlap=None,
+            breakdown=None,
+            forwrev=forwrev,
         )
         assert workflow.convergence is None
 
@@ -445,3 +455,58 @@ def test_summary(self, workflow):
         """Test if if the summary is right."""
         summary = workflow.generate_result()
         assert np.isclose(summary["BAR"]["Stages"]["TOTAL"], 1.40405980473, 0.1)
+
+
+class TestParallel:
+    @pytest.fixture(scope="class")
+    def workflow(self, tmp_path_factory):
+        outdir = tmp_path_factory.mktemp("out")
+        (outdir / "dhdl_00.xvg").symlink_to(load_ABFE()["data"]["complex"][0])
+        (outdir / "dhdl_01.xvg").symlink_to(load_ABFE()["data"]["complex"][1])
+        workflow = ABFE(
+            units="kcal/mol",
+            software="GROMACS",
+            dir=str(outdir),
+            prefix="dhdl",
+            suffix="xvg",
+            T=310,
+        )
+        workflow.read()
+        workflow.preprocess()
+        return workflow
+
+    @pytest.fixture(scope="class")
+    def parallel_workflow(self, tmp_path_factory):
+        outdir = tmp_path_factory.mktemp("out")
+        (outdir / "dhdl_00.xvg").symlink_to(load_ABFE()["data"]["complex"][0])
+        (outdir / "dhdl_01.xvg").symlink_to(load_ABFE()["data"]["complex"][1])
+        workflow = ABFE(
+            units="kcal/mol",
+            software="GROMACS",
+            dir=str(outdir),
+            prefix="dhdl",
+            suffix="xvg",
+            T=310,
+        )
+        with joblib.parallel_config(backend="threading"):
+            # The default backend is "loky", which is more robust but somehow didn't
+            # play well with pytest, but "loky" is perfectly fine outside pytest.
+            workflow.read(n_jobs=2)
+            workflow.preprocess(n_jobs=2)
+        return workflow
+
+    def test_read(self, workflow, parallel_workflow):
+        pd.testing.assert_frame_equal(
+            workflow.u_nk_list[0], parallel_workflow.u_nk_list[0]
+        )
+        pd.testing.assert_frame_equal(
+            workflow.u_nk_list[1], parallel_workflow.u_nk_list[1]
+        )
+
+    def test_preprocess(self, workflow, parallel_workflow):
+        pd.testing.assert_frame_equal(
+            workflow.u_nk_sample_list[0], parallel_workflow.u_nk_sample_list[0]
+        )
+        pd.testing.assert_frame_equal(
+            workflow.u_nk_sample_list[1], parallel_workflow.u_nk_sample_list[1]
+        )
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,6 +11,7 @@ dependencies: @@
     - matplotlib>=3.7
     - loguru
     - pyarrow
+    - joblib
     # Testing
     - pytest
@@ Expand Down @@