mbar overlap matrix plot (#107)

* partially addresses #73 * add MBAR overlap matrix to the MBAR estimator * add new visualisation module * add plotting function for overlap matrix (from alchemical-analysis, code used under MIT License) * add matplotlib as dependency * add tests * add docs * update CHANGES * update AUTHORS Co-authored-by: Oliver Beckstein <[email protected]>
alchemistry · Aug 26, 2020 · 7be2e41 · 7be2e41
1 parent 7fcdf7a
commit 7be2e41
Show file tree

Hide file tree

Showing 15 changed files with 217 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 *.pyc
 .vscode
+*.DS_Store
+build
diff --git a/.travis.yml b/.travis.yml
@@ -2,6 +2,10 @@ language: python
 sudo: required
 dist: xenial
 
+env:
+  global:
+    - MPLBACKEND=agg
+
 python:
    - "2.7"
    - "3.5"

diff --git a/AUTHORS b/AUTHORS
@@ -33,4 +33,6 @@ Chronological list of authors
 2019
   - Victoria Lim (@vlim)
   - Hyungro Lee (@lee212)
-  - Mohammad S. Barhaghi (@msoroush)
+  - Mohammad S. Barhaghi (@msoroush)
+2020
+  - Zhiyi Wu (@xiki-tempula)
diff --git a/CHANGES b/CHANGES
@@ -15,11 +15,12 @@ The rules for this file:
 ------------------------------------------------------------------------------
 
 
-??/??/2020 wehs7661, dotsdl
+??/??/2020 wehs7661, dotsdl, xiki-tempula
 
 * 0.?.?
 
 Enhancements
+  - Allow the overlap matrix of the MBAR estimator to be plotted. (PR #107)
 
 Deprecations
 

diff --git a/docs/api_principles.rst b/docs/api_principles.rst
@@ -1,5 +1,5 @@
 API principles
-============
+==============
 
 The following is an overview over the guiding principles and ideas that underpin the API of alchemlyb.
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -50,8 +50,10 @@
 
 # General information about the project.
 project = u'alchemlyb'
-author = u'David Dotson and contributors'
-copyright = u'2017-2019, ' + author
+author = u'''David Dotson, Ian Kenney, Oliver Beckstein, Shuai Liu,
+Travis Jensen, Bryce Allen, Dominik Wille, Victoria Lim, Hyungro Lee,
+Mohammad S. Barhaghi, Zhiyi Wu'''
+copyright = u'2017-2020, ' + author
 
 
 # The version info for the project you're documenting, acts as replacement for

diff --git a/docs/images/O_MBAR.png b/docs/images/O_MBAR.png
diff --git a/docs/index.rst b/docs/index.rst
@@ -60,6 +60,7 @@ Contributions are very welcome. If you have bug reports or feature requests or q
     parsing
     preprocessing
     estimators
+    visualisation
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/visualisation.rst b/docs/visualisation.rst
@@ -0,0 +1,39 @@
+Visualisation of the results
+============================
+It is quite often that the user want to visualise the results to gain
+confidence on the computed free energy. **alchemlyb** provides various
+visualisation tools to help user to judge the estimate.
+
+.. _plot_overlap_matrix:
+
+Overlap Matrix of the MBAR
+--------------------------
+The accuracy of the :class:`~alchemlyb.estimators.MBAR` estimator depends on
+the overlap between different lambda states. The overlap matrix from the
+:class:`~alchemlyb.estimators.MBAR` estimator could be plotted using
+:func:`~alchemlyb.visualisation.plot_mbar_overlap_matrix` to check
+the degree of overlap. It is recommended that there should be at least
+**0.03** [Klimovich2015]_ overlap between neighboring states. ::
+
+    >>> import pandas as pd
+    >>> from alchemtest.gmx import load_benzene
+    >>> from alchemlyb.parsing.gmx import extract_u_nk
+    >>> from alchemlyb.estimators import MBAR
+
+    >>> bz = load_benzene().data
+    >>> u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']])
+    >>> mbar_coul = MBAR()
+    >>> mbar_coul.fit(u_nk_coul)
+
+    >>> from alchemlyb.visualisation import plot_mbar_overlap_matrix
+    >>> ax = plot_mbar_overlap_matrix(mbar_coul.overlap_matrix)
+    >>> ax.figure.savefig('O_MBAR.pdf', bbox_inches='tight', pad_inches=0.0)
+
+
+Will give a plot looks like this
+
+.. image:: images/O_MBAR.png
+
+.. [Klimovich2015] Klimovich, P.V., Shirts, M.R. & Mobley, D.L. Guidelines for
+   the analysis of free energy calculations. J Comput Aided Mol Des 29, 397–411
+   (2015). https://doi.org/10.1007/s10822-015-9840-9
diff --git a/docs/visualisation/alchemlyb.visualisation.plot_mbar_overlap_matrix.rst b/docs/visualisation/alchemlyb.visualisation.plot_mbar_overlap_matrix.rst
@@ -0,0 +1,18 @@
+.. _plot_overlap:
+
+Plot Overlap Matrix from MBAR
+=============================
+
+The function :func:`~alchemlyb.visualisation.plot_mbar_overlap_matrix` allows
+the user to plot the overlap matrix from
+:attr:`~alchemlyb.estimators.MBAR.overlap_matrix`. The user can pass
+:class:`matplotlib.axes.Axes` into the function to have the overlap maxtrix
+drawn on a specific axes. The user could also specify a list of lambda states
+to be skipped when labelling the states.
+
+Please check :ref:`How to plot MBAR overlap matrix <plot_overlap_matrix>` for
+usage.
+
+API Reference
+-------------
+.. autofunction:: alchemlyb.visualisation.plot_mbar_overlap_matrix
diff --git a/setup.py b/setup.py
@@ -29,5 +29,5 @@
       license='BSD',
       long_description=open('README.rst').read(),
       tests_require = ['pytest', 'alchemtest'],
-      install_requires=['numpy', 'pandas>=0.23.0', 'pymbar>=3.0.5', 'scipy', 'scikit-learn']
+      install_requires=['numpy', 'pandas>=0.23.0', 'pymbar>=3.0.5', 'scipy', 'scikit-learn', 'matplotlib']
       )
diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py
@@ -45,6 +45,9 @@ class MBAR(BaseEstimator):
     states_ : list
         Lambda states for which free energy differences were obtained.
 
+    overlap_matrix: numpy.matrix
+        The overlap matrix.
+
     """
 
     def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7,
@@ -98,3 +101,19 @@ def fit(self, u_nk):
 
     def predict(self, u_ln):
         pass
+
+    @property
+    def overlap_matrix(self):
+        r"""MBAR overlap matrix.
+        
+        The estimated state overlap matrix :math:`O_{ij}` is an estimate of the probability 
+        of observing a sample from state :math:`i` in state :math:`j`.
+        
+        The :attr:`overlap_matrix` is computed on-the-fly. Assign it to a variable if
+        you plan to re-use it.
+        
+        See Also
+        ---------
+        pymbar.mbar.MBAR.computeOverlap
+        """
+        return self._mbar.computeOverlap()['matrix']
diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py
@@ -0,0 +1,27 @@
+import matplotlib
+import pandas as pd
+
+from alchemtest.gmx import load_benzene
+from alchemlyb.parsing.gmx import extract_u_nk
+from alchemlyb.estimators import MBAR
+from alchemlyb.visualisation.mbar_matrix import plot_mbar_overlap_matrix
+
+def test_plot_mbar_omatrix():
+    '''Just test if the plot runs'''
+    bz = load_benzene().data
+    u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']])
+    mbar_coul = MBAR()
+    mbar_coul.fit(u_nk_coul)
+
+    assert isinstance(plot_mbar_overlap_matrix(mbar_coul.overlap_matrix),
+               matplotlib.axes.Axes)
+    assert isinstance(plot_mbar_overlap_matrix(mbar_coul.overlap_matrix, [1,]),
+               matplotlib.axes.Axes)
+
+    # Bump up coverage
+    overlap_maxtrix = mbar_coul.overlap_matrix
+    overlap_maxtrix[0,0] = 0.0025
+    overlap_maxtrix[-1, -1] = 0.9975
+    assert isinstance(plot_mbar_overlap_matrix(overlap_maxtrix),
+               matplotlib.axes.Axes)
+
diff --git a/src/alchemlyb/visualisation/__init__.py b/src/alchemlyb/visualisation/__init__.py
@@ -0,0 +1 @@
+from .mbar_matrix import plot_mbar_overlap_matrix
diff --git a/src/alchemlyb/visualisation/mbar_matrix.py b/src/alchemlyb/visualisation/mbar_matrix.py
@@ -0,0 +1,95 @@
+"""Functions for Plotting the overlay matrix for the MBAR estimator.
+
+To assess the quality of the MBAR estimator, the overlap matrix between
+the lambda states can be computed and the more overlap is observed between
+the states, the more reliable the estimate is. One way of accessing the
+quality of the overlap matrix is by plotting it.
+
+The code for producing the overlap matrix plot is taken from
+: `Alchemical Analysis <https://github.com/MobleyLab/alchemical-analysis>`_.
+
+"""
+from __future__ import division
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+def plot_mbar_overlap_matrix(matrix, skip_lambda_index=[], ax=None):
+    '''Plot the MBAR overlap matrix.
+
+    Parameters
+    ----------
+    matrix : numpy.matrix
+        DataFrame of the overlap matrix obtained from
+        :attr:`~alchemlyb.estimators.MBAR.overlap_matrix`
+    skip_lambda_index : List
+        list of lambda indices to be omitted from plotting process.
+        Default: [].
+    ax : matplotlib.axes.Axes
+        Matplotlib axes object where the plot will be drawn on. If ax=None,
+        a new axes will be generated.
+
+    Returns
+    -------
+    matplotlib.axes.Axes
+        An axes with the overlap matrix drawn.
+
+    Notes
+    -----
+    The code is taken and modified from
+    : `Alchemical Analysis <https://github.com/MobleyLab/alchemical-analysis>`_
+
+    '''
+    # Compute the size of the figure, if ax is not given.
+    max_prob = matrix.max()
+    size = len(matrix)
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(size / 2, size / 2))
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.axis('off')
+    for i in range(size):
+        if i != 0:
+            ax.axvline(x=i, ls='-', lw=0.5, color='k', alpha=0.25)
+            ax.axhline(y=i, ls='-', lw=0.5, color='k', alpha=0.25)
+        for j in range(size):
+            if matrix[j, i] < 0.005:
+                ii = ''
+            elif matrix[j, i] > 0.995:
+                ii = '1.00'
+            else:
+                ii = ("{:.2f}".format(matrix[j, i])[1:])
+            alf = matrix[j, i] / max_prob
+            ax.fill_between([i, i + 1], [size - j, size - j], [size - (j + 1), size - (j + 1)], color='k', alpha=alf)
+            ax.annotate(ii, xy=(i, j), xytext=(i + 0.5, size - (j + 0.5)), size=8, textcoords='data', va='center',
+                        ha='center', color=('k' if alf < 0.5 else 'w'))
+
+    if skip_lambda_index:
+        ks = [int(l) for l in skip_lambda_index]
+        ks = np.delete(np.arange(size + len(ks)), ks)
+    else:
+        ks = range(size)
+    for i in range(size):
+        ax.annotate(ks[i], xy=(i + 0.5, 1), xytext=(i + 0.5, size + 0.5), size=10, textcoords=('data', 'data'),
+                    va='center', ha='center', color='k')
+        ax.annotate(ks[i], xy=(-0.5, size - (size - 0.5)), xytext=(-0.5, size - (i + 0.5)), size=10, textcoords=('data', 'data'),
+                    va='center', ha='center', color='k')
+    ax.annotate('$\lambda$', xy=(-0.5, size - (size - 0.5)), xytext=(-0.5, size + 0.5), size=10, textcoords=('data', 'data'),
+                va='center', ha='center', color='k')
+    ax.plot([0, size], [0, 0], 'k-', lw=4.0, solid_capstyle='butt')
+    ax.plot([size, size], [0, size], 'k-', lw=4.0, solid_capstyle='butt')
+    ax.plot([0, 0], [0, size], 'k-', lw=2.0, solid_capstyle='butt')
+    ax.plot([0, size], [size, size], 'k-', lw=2.0, solid_capstyle='butt')
+
+    cx = np.repeat(range(size + 1), 2)
+    cy = sorted(np.repeat(range(size + 1), 2), reverse=True)
+    ax.plot(cx[2:-1], cy[1:-2], 'k-', lw=2.0)
+    ax.plot(np.array(cx[2:-3]) + 1, cy[1:-4], 'k-', lw=2.0)
+    ax.plot(cx[1:-2], np.array(cy[:-3]) - 1, 'k-', lw=2.0)
+    ax.plot(cx[1:-4], np.array(cy[:-5]) - 2, 'k-', lw=2.0)
+
+    ax.set_xlim(-1, size)
+    ax.set_ylim(0, size + 1)
+    return ax
+
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,10 @@ language: python @@
     sudo: required
     dist: xenial
+    env:
+      global:
+        - MPLBACKEND=agg
     python:
        - "2.7"
        - "3.5"
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .mbar_matrix import plot_mbar_overlap_matrix