Refactor into backend classes and allow batched inputs.

chemle · Dec 9, 2024 · 39d7c53 · 39d7c53
1 parent 4112be1
commit 39d7c53
Show file tree

Hide file tree

Showing 13 changed files with 1,409 additions and 892 deletions.
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,6 @@ results/
 
 # VSCode config
 .vscode/
+
+# Local test input.
+tests/input/deepmd
diff --git a/emle/_backends/_backend.py b/emle/_backends/_backend.py
@@ -0,0 +1,72 @@
+#######################################################################
+# EMLE-Engine: https://github.com/chemle/emle-engine
+#
+# Copyright: 2023-2024
+#
+# Authors: Lester Hedges   <[email protected]>
+#          Kirill Zinovjev <[email protected]>
+#
+# EMLE-Engine is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# EMLE-Engine is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with EMLE-Engine. If not, see <http://www.gnu.org/licenses/>.
+#####################################################################
+
+"""In-vacuo backend base class."""
+
+__all__ = ["Backend"]
+
+
+class Backend:
+    """
+    Base class for in-vacuo backends.
+
+    This class should not be instantiated directly, but should be subclassed
+    by specific backends.
+    """
+
+    def __init__(self):
+        """
+        Constructor.
+        """
+
+        # Don't allow user to create an instance of the base class.
+        if type(self) is Backend:
+            raise NotImplementedError(
+                "'Backend' is an abstract class and should not be instantiated directly"
+            )
+
+    def calculate(self, atomic_numbers, xyz, forces=True):
+        """
+        Compute the energy and forces.
+
+        Parameters
+        ----------
+
+        atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,)
+            The atomic numbers of the atoms in the QM region.
+
+        xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3)
+            The coordinates of the atoms in the QM region in Angstrom.
+
+        forces: bool
+            Whether to calculate and return forces.
+
+        Returns
+        -------
+
+        energy: float
+            The in-vacuo energy in Eh.
+
+        forces: numpy.ndarray
+            The in-vacuo forces in Eh/Bohr.
+        """
+        raise NotImplementedError
diff --git a/emle/_backends/_deepmd.py b/emle/_backends/_deepmd.py
@@ -20,104 +20,160 @@
 # along with EMLE-Engine. If not, see <http://www.gnu.org/licenses/>.
 #####################################################################
 
-"""DeepMD in-vacuo backend implementation."""
+"""DeePMD in-vacuo backend implementation."""
 
-__all__ = ["calculate_deepmd"]
+__all__ = ["DeePMD"]
 
+import ase as _ase
 import numpy as _np
+import os as _os
 
-from .._constants import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM
+from .._units import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM
 
+from ._backend import Backend as _Backend
 
-def calculate_deepmd(calculator, xyz, elements, gradient=True):
-    """
-    Internal function to compute in vacuo energies and gradients using
-    DeepMD.
-
-    Parameters
-    ----------
-
-    calculator: :class:`emle.calculator.EMLECalculator`
-        The EMLECalculator instance.
-
-    xyz: numpy.array
-        The coordinates of the QM region in Angstrom.
-
-    elements: [str]
-        The list of elements.
 
-    gradient: bool
-        Whether to return the gradient.
-
-    Returns
-    -------
-
-    energy: float
-        The in vacuo ML energy in Eh.
-
-    gradients: numpy.array
-        The in vacuo ML gradient in Eh/Bohr.
+class DeePMD(_Backend):
+    """
+    DeepMD in-vacuo backend implementation.
     """
 
-    if not isinstance(xyz, _np.ndarray):
-        raise TypeError("'xyz' must be of type 'numpy.ndarray'")
-    if xyz.dtype != _np.float64:
-        raise TypeError("'xyz' must have dtype 'float64'.")
-
-    if not isinstance(elements, (list, tuple)):
-        raise TypeError("'elements' must be of type 'list'")
-    if not all(isinstance(element, str) for element in elements):
-        raise TypeError("'elements' must be a 'list' of 'str' types")
-
-    # Reshape to a frames x (natoms x 3) array.
-    xyz = xyz.reshape([1, -1])
-
-    e_list = []
-    f_list = []
-
-    # Run a calculation for each model and take the average.
-    for dp in calculator._deepmd_potential:
-        # Work out the mapping between the elements and the type indices
-        # used by the model.
-        try:
-            mapping = {
-                element: index for index, element in enumerate(dp.get_type_map())
-            }
-        except:
-            raise ValueError(f"DeePMD model doesnt' support element '{element}'")
-
-        # Now determine the atom types based on the mapping.
-        atom_types = [mapping[element] for element in elements]
-
-        e, f, _ = dp.eval(xyz, cells=None, atom_types=atom_types)
-        e_list.append(e)
-        f_list.append(f)
-
-    # Write the maximum DeePMD force deviation to file.
-    if calculator._deepmd_deviation:
-        from deepmd.infer.model_devi import calc_model_devi_f
-
-        max_f_std = calc_model_devi_f(_np.array(f_list))[0][0]
-        if (
-            calculator._deepmd_deviation_threshold
-            and max_f_std > calculator._deepmd_deviation_threshold
-        ):
-            msg = "Force deviation threshold reached!"
-            _logger.error(msg)
-            raise ValueError(msg)
-        with open(calculator._deepmd_deviation, "a") as f:
-            f.write(f"{max_f_std:12.5f}\n")
-        # To be written to qm_xyz_file.
-        calculator._max_f_std = max_f_std
-
-    # Take averages and return. (Gradient equals minus the force.)
-    e_mean = _np.mean(_np.array(e_list), axis=0)
-    grad_mean = -_np.mean(_np.array(f_list), axis=0)
-    return (
-        (
-            e_mean[0][0] * _EV_TO_HARTREE,
-            grad_mean[0] * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM,
+    def __init__(self, model, deviation=None, deviation_threshold=None):
+        # We support a str, or list/tuple of strings.
+        if not isinstance(model, (str, list, tuple)):
+            raise TypeError("'model' must be of type 'str', or a list of 'str' types")
+        else:
+            # Make sure all values are strings.
+            if isinstance(model, (list, tuple)):
+                for m in model:
+                    if not isinstance(m, str):
+                        raise TypeError(
+                            "'model' must be of type 'str', or a list of 'str' types"
+                        )
+            # Convert to a list.
+            else:
+                model = [model]
+
+            # Make sure all of the model files exist.
+            for m in model:
+                if not _os.path.isfile(m):
+                    raise IOError(f"Unable to locate DeePMD model file: '{m}'")
+
+            # Validate the deviation file.
+            if deviation is not None:
+                if not isinstance(deviation, str):
+                    raise TypeError("'deviation' must be of type 'str'")
+
+                self._deviation = deviation
+
+                if deviation_threshold is not None:
+                    try:
+                        deviation_threshold = float(deviation_threshold)
+                    except:
+                        raise TypeError("'deviation_threshold' must be of type 'float'")
+
+                self._deviation_threshold = deviation_threshold
+            else:
+                self._deviation = None
+                self._deviation_threshold = None
+
+            # Store the list of model files, removing any duplicates.
+            self._model = list(set(model))
+            if len(self._model) == 1 and deviation:
+                raise IOError(
+                    "More that one DeePMD model needed to calculate the deviation!"
+                )
+
+            # Initialise DeePMD backend attributes.
+            try:
+                from deepmd.infer import DeepPot as _DeepPot
+
+                self._potential = [_DeepPot(m) for m in self._model]
+                self._z_map = []
+                for dp in self._potential:
+                    self._z_map.append(
+                        {
+                            element: index
+                            for index, element in enumerate(dp.get_type_map())
+                        }
+                    )
+            except:
+                raise RuntimeError("Unable to create the DeePMD potentials!")
+
+        self._max_f_std = None
+
+    def calculate(self, atomic_numbers, xyz, forces=True):
+        """
+        Compute the energy and forces.
+
+        Parameters
+        ----------
+
+        atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,)
+            The atomic numbers of the atoms in the QM region.
+
+        xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3)
+            The coordinates of the atoms in the QM region in Angstrom.
+
+        forces: bool
+            Whether to calculate and return forces.
+
+        Returns
+        -------
+
+        energy: float
+            The in-vacuo energy in Eh.
+
+        forces: numpy.ndarray
+            The in-vacuo forces in Eh/Bohr.
+        """
+
+        if not isinstance(atomic_numbers, _np.ndarray):
+            raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'")
+        if not isinstance(xyz, _np.ndarray):
+            raise TypeError("'xyz' must be of type 'numpy.ndarray'")
+
+        if len(atomic_numbers) != len(xyz):
+            raise ValueError(
+                f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not "
+                f"match length of 'xyz' ({len(xyz)})"
+            )
+
+        e_list = []
+        f_list = []
+
+        # Run a calculation for each model and take the average.
+        for i, dp in enumerate(self._potential):
+            # Assume all frames have the same number of atoms.
+            atom_types = [
+                self._z_map[i][_ase.Atom(z).symbol] for z in atomic_numbers[0]
+            ]
+            e, f, _ = dp.eval(xyz, cells=None, atom_types=atom_types)
+            e_list.append(e)
+            f_list.append(f)
+
+        # Write the maximum DeePMD force deviation to file.
+        if self._deviation:
+            from deepmd.infer.model_devi import calc_model_devi_f
+
+            max_f_std = calc_model_devi_f(_np.array(f_list))[0][0]
+            if self._deviation_threshold and max_f_std > self._deviation_threshold:
+                msg = "Force deviation threshold reached!"
+                self._max_f_std = max_f_std
+                raise ValueError(msg)
+            with open(self._deviation, "a") as f:
+                f.write(f"{max_f_std:12.5f}\n")
+            # To be written to qm_xyz_file.
+            self._max_f_std = max_f_std
+
+        # Take averages over models and return.
+        e_mean = _np.mean(_np.array(e_list), axis=0)
+        f_mean = -_np.mean(_np.array(f_list), axis=0)
+
+        # Covert units.
+        e, f = (
+            e_mean.flatten() * _EV_TO_HARTREE,
+            f_mean * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM,
         )
-        if gradient
-        else e_mean[0][0] * _EV_TO_HARTREE
-    )
+
+        return e, f if forces else e