Skip to content

Commit

Permalink
Refactor into backend classes and allow batched inputs.
Browse files Browse the repository at this point in the history
  • Loading branch information
lohedges committed Dec 9, 2024
1 parent 4112be1 commit 39d7c53
Show file tree
Hide file tree
Showing 13 changed files with 1,409 additions and 892 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ results/

# VSCode config
.vscode/

# Local test input.
tests/input/deepmd
72 changes: 72 additions & 0 deletions emle/_backends/_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#######################################################################
# EMLE-Engine: https://github.com/chemle/emle-engine
#
# Copyright: 2023-2024
#
# Authors: Lester Hedges <[email protected]>
# Kirill Zinovjev <[email protected]>
#
# EMLE-Engine is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# EMLE-Engine is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EMLE-Engine. If not, see <http://www.gnu.org/licenses/>.
#####################################################################

"""In-vacuo backend base class."""

__all__ = ["Backend"]


class Backend:
"""
Base class for in-vacuo backends.
This class should not be instantiated directly, but should be subclassed
by specific backends.
"""

def __init__(self):
"""
Constructor.
"""

# Don't allow user to create an instance of the base class.
if type(self) is Backend:
raise NotImplementedError(
"'Backend' is an abstract class and should not be instantiated directly"
)

def calculate(self, atomic_numbers, xyz, forces=True):
"""
Compute the energy and forces.
Parameters
----------
atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,)
The atomic numbers of the atoms in the QM region.
xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3)
The coordinates of the atoms in the QM region in Angstrom.
forces: bool
Whether to calculate and return forces.
Returns
-------
energy: float
The in-vacuo energy in Eh.
forces: numpy.ndarray
The in-vacuo forces in Eh/Bohr.
"""
raise NotImplementedError
238 changes: 147 additions & 91 deletions emle/_backends/_deepmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,104 +20,160 @@
# along with EMLE-Engine. If not, see <http://www.gnu.org/licenses/>.
#####################################################################

"""DeepMD in-vacuo backend implementation."""
"""DeePMD in-vacuo backend implementation."""

__all__ = ["calculate_deepmd"]
__all__ = ["DeePMD"]

import ase as _ase
import numpy as _np
import os as _os

from .._constants import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM
from .._units import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM

from ._backend import Backend as _Backend

def calculate_deepmd(calculator, xyz, elements, gradient=True):
"""
Internal function to compute in vacuo energies and gradients using
DeepMD.
Parameters
----------
calculator: :class:`emle.calculator.EMLECalculator`
The EMLECalculator instance.
xyz: numpy.array
The coordinates of the QM region in Angstrom.
elements: [str]
The list of elements.

gradient: bool
Whether to return the gradient.
Returns
-------
energy: float
The in vacuo ML energy in Eh.
gradients: numpy.array
The in vacuo ML gradient in Eh/Bohr.
class DeePMD(_Backend):
"""
DeepMD in-vacuo backend implementation.
"""

if not isinstance(xyz, _np.ndarray):
raise TypeError("'xyz' must be of type 'numpy.ndarray'")
if xyz.dtype != _np.float64:
raise TypeError("'xyz' must have dtype 'float64'.")

if not isinstance(elements, (list, tuple)):
raise TypeError("'elements' must be of type 'list'")
if not all(isinstance(element, str) for element in elements):
raise TypeError("'elements' must be a 'list' of 'str' types")

# Reshape to a frames x (natoms x 3) array.
xyz = xyz.reshape([1, -1])

e_list = []
f_list = []

# Run a calculation for each model and take the average.
for dp in calculator._deepmd_potential:
# Work out the mapping between the elements and the type indices
# used by the model.
try:
mapping = {
element: index for index, element in enumerate(dp.get_type_map())
}
except:
raise ValueError(f"DeePMD model doesnt' support element '{element}'")

# Now determine the atom types based on the mapping.
atom_types = [mapping[element] for element in elements]

e, f, _ = dp.eval(xyz, cells=None, atom_types=atom_types)
e_list.append(e)
f_list.append(f)

# Write the maximum DeePMD force deviation to file.
if calculator._deepmd_deviation:
from deepmd.infer.model_devi import calc_model_devi_f

max_f_std = calc_model_devi_f(_np.array(f_list))[0][0]
if (
calculator._deepmd_deviation_threshold
and max_f_std > calculator._deepmd_deviation_threshold
):
msg = "Force deviation threshold reached!"
_logger.error(msg)
raise ValueError(msg)
with open(calculator._deepmd_deviation, "a") as f:
f.write(f"{max_f_std:12.5f}\n")
# To be written to qm_xyz_file.
calculator._max_f_std = max_f_std

# Take averages and return. (Gradient equals minus the force.)
e_mean = _np.mean(_np.array(e_list), axis=0)
grad_mean = -_np.mean(_np.array(f_list), axis=0)
return (
(
e_mean[0][0] * _EV_TO_HARTREE,
grad_mean[0] * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM,
def __init__(self, model, deviation=None, deviation_threshold=None):
# We support a str, or list/tuple of strings.
if not isinstance(model, (str, list, tuple)):
raise TypeError("'model' must be of type 'str', or a list of 'str' types")
else:
# Make sure all values are strings.
if isinstance(model, (list, tuple)):
for m in model:
if not isinstance(m, str):
raise TypeError(
"'model' must be of type 'str', or a list of 'str' types"
)
# Convert to a list.
else:
model = [model]

# Make sure all of the model files exist.
for m in model:
if not _os.path.isfile(m):
raise IOError(f"Unable to locate DeePMD model file: '{m}'")

# Validate the deviation file.
if deviation is not None:
if not isinstance(deviation, str):
raise TypeError("'deviation' must be of type 'str'")

self._deviation = deviation

if deviation_threshold is not None:
try:
deviation_threshold = float(deviation_threshold)
except:
raise TypeError("'deviation_threshold' must be of type 'float'")

self._deviation_threshold = deviation_threshold
else:
self._deviation = None
self._deviation_threshold = None

# Store the list of model files, removing any duplicates.
self._model = list(set(model))
if len(self._model) == 1 and deviation:
raise IOError(
"More that one DeePMD model needed to calculate the deviation!"
)

# Initialise DeePMD backend attributes.
try:
from deepmd.infer import DeepPot as _DeepPot

self._potential = [_DeepPot(m) for m in self._model]
self._z_map = []
for dp in self._potential:
self._z_map.append(
{
element: index
for index, element in enumerate(dp.get_type_map())
}
)
except:
raise RuntimeError("Unable to create the DeePMD potentials!")

self._max_f_std = None

def calculate(self, atomic_numbers, xyz, forces=True):
"""
Compute the energy and forces.
Parameters
----------
atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,)
The atomic numbers of the atoms in the QM region.
xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3)
The coordinates of the atoms in the QM region in Angstrom.
forces: bool
Whether to calculate and return forces.
Returns
-------
energy: float
The in-vacuo energy in Eh.
forces: numpy.ndarray
The in-vacuo forces in Eh/Bohr.
"""

if not isinstance(atomic_numbers, _np.ndarray):
raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'")
if not isinstance(xyz, _np.ndarray):
raise TypeError("'xyz' must be of type 'numpy.ndarray'")

if len(atomic_numbers) != len(xyz):
raise ValueError(
f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not "
f"match length of 'xyz' ({len(xyz)})"
)

e_list = []
f_list = []

# Run a calculation for each model and take the average.
for i, dp in enumerate(self._potential):
# Assume all frames have the same number of atoms.
atom_types = [
self._z_map[i][_ase.Atom(z).symbol] for z in atomic_numbers[0]
]
e, f, _ = dp.eval(xyz, cells=None, atom_types=atom_types)
e_list.append(e)
f_list.append(f)

# Write the maximum DeePMD force deviation to file.
if self._deviation:
from deepmd.infer.model_devi import calc_model_devi_f

max_f_std = calc_model_devi_f(_np.array(f_list))[0][0]
if self._deviation_threshold and max_f_std > self._deviation_threshold:
msg = "Force deviation threshold reached!"
self._max_f_std = max_f_std
raise ValueError(msg)
with open(self._deviation, "a") as f:
f.write(f"{max_f_std:12.5f}\n")
# To be written to qm_xyz_file.
self._max_f_std = max_f_std

# Take averages over models and return.
e_mean = _np.mean(_np.array(e_list), axis=0)
f_mean = -_np.mean(_np.array(f_list), axis=0)

# Covert units.
e, f = (
e_mean.flatten() * _EV_TO_HARTREE,
f_mean * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM,
)
if gradient
else e_mean[0][0] * _EV_TO_HARTREE
)

return e, f if forces else e
Loading

0 comments on commit 39d7c53

Please sign in to comment.