From e5ccd503f1f2ddde0b496d0ab4fc4af4c2613be5 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 10:52:13 -0500 Subject: [PATCH 01/59] Add lammps file generation and parsing --- src/alchemlyb/parsing/lammps.py | 837 ++++++++++++++++++++++++++++++++ 1 file changed, 837 insertions(+) create mode 100644 src/alchemlyb/parsing/lammps.py diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py new file mode 100644 index 00000000..df22b36b --- /dev/null +++ b/src/alchemlyb/parsing/lammps.py @@ -0,0 +1,837 @@ +""" Parsers for extracting alchemical data from [LAMMPS](https://docs.lammps.org/Manual.html) output files. + +For clarity, we would like to distinguish the difference between $\lambda$ and $\lambda'$. We refer to $\lambda$ as +the potential scaling of the equilibrated system, so that when this value is changed, the system undergoes another equilibration +step. One the otherhand, $\lambda'$ is the value used to scaled the potentials for the configurations of the system equilibrated +for $\lambda$. The value of $\lambda'$ is used in two instances. First, in thermodynamic integration (TI), values of $\lambda'$ +that are very close to $\lambda$ can be used to calculate the derivative. This is needed because LAMMPS does not compute +explicit derivatives, although one should check whether they can derive an explicit expression, they cannot for changes of +$\lambda'$ in the soft Lennard-Jones (LJ) potential. + +The parsers featured in this module are constructed to parse LAMMPS output files output using the +[`fix ave/time command`](https://docs.lammps.org/fix_ave_time.html), containing data for given potential energy values (an +approximation of the Hamiltonian) at specified values of $\lambda$ and $\lambda'$, $U_{\lambda,\lambda'}$. Because generating +the input files can be combersome, functions have been included to generate the appropriate sections. If a linear approximation +can be made to calculate $U_{\lambda,\lambda'}$ from $U_{\lambda}$ in post-processing, we recommend using +:func:`alchemlyb.parsing.generate_input_linear_approximation()`. If a linear approximation cannot be made (such as changing +$\lambda$ in the soft-LJ potential) we recommend running a loop over all values of $\lambda$ saving frames spaced to be +independent samples, and an output file with small perturbations with $\lambda'$ to calculate the derivative for TI in +post-processing. This is achieved with `alchemlyb.parsing.generate_traj_input()`. After this first simulation, we then +recommend the files needed for MBAR are generated using the [rerun](https://docs.lammps.org/rerun.html) feature in LAMMPS. +Breaking up the computation like this will allow one to add additional points to their MBAR analysis without repeating the +points from an initial simulation. Generating the file for a rerun is achieved with +:func:`alchemlyb.parsing.generate_rerun_mbar()`. Notice that the output files do not contain the header information expected +in LAMMPS as that is system specific and left to the user. + +Note that in LAMMPS, [fix adapt/fep](https://docs.lammps.org/fix_adapt_fep.html) changes $\lambda$ and +[compute fep](https://docs.lammps.org/compute_fep.html) changes $\lambda'$. + +.. versionadded:: 1.0.0 + +""" + +import os +import warnings +import numpy as np +import pandas as pd +import glob + +from . import _init_attrs +from ..postprocessors.units import R_kJmol, kJ2kcal + +k_b = R_kJmol * kJ2kcal + +def _isfloat(x): + try: + float(x) + return True + except ValueError: + return False + + +def generate_input_linear_approximation(parameter, parameter_range, parameter_change, pair_style, types_solvent, types_solute, + output_file=None, parameter2=None, parameter2_value=None, pair_style2=None): + """ Outputs the section of a LAMMPS input file that separates the Coulomb, nonbonded, and bond/angle/torsional contributions + of the solute and solvent. As long as the parameter being changed is linearly dependent on the potential energy, these files for + each value of the parameter can be used for thermodynamic integration (TI) or multi-state Bennett acceptance ratio (MBAR). + + The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains + the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS + + Parameters + ---------- + parameter : str + Parameter being varied, see table in `compute fep `_ for the options in + your pair-potential + parameter_range : list[float] + Range of parameter values to be changed where the first value should be the value with which the system has been + equilibrated. + parameter_change : float + The size of the step between parameter values. Take care that number of points needed to traverse the given range + should result in an integer, otherwise LAMMPS will not end at the desired value. + pair_style : str + String with LAMMPS pair style being altered + types_solvent : str + String defining atom types in the solvent (with no spaces, e.g., *4) + types_solute : str + String defining atom types in the solute (with no spaces, e.g., 5*9) + output_file : str, default=None + File name and path for optional output file + parameter2 : str, default=None + Parameter that has been varied and is set to another value in this simulation, e.g., lambda when the Coulomb potential + is set to zero. Using this feature avoids complications with writing the pair potential information in the data file. + See table in `compute fep `_ for the options in your pair-potential + pair_style2 : str, default=None + String with LAMMPS pair style for ``parameter2`` + parameter2_value : float, default=None + Value to set ``parameter2`` + + Returns + ------- + file : list[str] + List of strings representing lines in a file + + """ + nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + if nblocks % 1 > 0: + raise ValueError("The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( + parameter_range, parameter_change)) + else: + nblocks = int(nblocks) + + if (any([x is not None for x in [parameter2, pair_style2, parameter2_value]]) and + not all([x is not None for x in [parameter2, pair_style2, parameter2_value]])): + raise ValueError((f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}")) + name1 = "-".join([pair_style.replace("/","-"), parameter]) + file = [ + "\n# Variables and System Conditions\n", + "variable freq equal 1000 # Consider changing\n", + "variable runtime equal 1000000\n", + f"variable delta equal {parameter_change} \n", + f"variable nblocks equal {nblocks} \n", + f"variable paramstart equal {parameter_range[0]}\n", + "variable TK equal TEMP\n", + "variable PBAR equal PRESS\n", + "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", + "thermo ${freq}\n", + "\n# Group atoms\n", + f"group solute type {types_solute}\n", + f"group solvent type {types_solvent}\n", + "\n# Set-up Loop\n", + "variable runid loop 0 ${nblocks} pad\n", + " label runloop1\n", + "\n# Adjust param for the box and equilibrate\n", + " variable param equal v_paramstart-v_runid*v_delta\n", + ' if "${runid} == 0" then &\n', + ' "jump SELF skipequil"\n', + " variable param0 equal v_paramstart-(v_runid-1)*v_delta\n", + " variable paramramp equal ramp(v_param0,v_param)\n", + " fix ADAPT all adapt/fep ${freq} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", + " thermo_style custom v_vstep v_time v_paramramp temp press pe evdwl enthalpy\n", + " run ${runtime} # Run Ramp\n", + " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " run ${runtime} # Run Equil\n", + "\n label skipequil\n\n", + f" write_data files/npt_{name1}_"+"${param}.data\n", + "\n # Initialize computes\n", + " ## Compute PE for contributions for bonds, angles, dihedrals, and impropers\n", + " compute pe_solute_bond solute pe/atom bond angle dihedral improper # PE from nonpair/noncharged intramolecular interactions\n", + " compute pe_solute_1 solute reduce sum c_pe_solute_bond\n", + " compute pe_solvent_bond solvent pe/atom bond angle dihedral improper # PE from nonpair/noncharged intramolecular interactions\n", + " compute pe_solvent_1 solvent reduce sum c_pe_solvent_bond\n", + "\n ## Compute PE for contributions for pair and charges\n", + " compute pe_solute_2 solute group/group solute pair yes kspace no\n", + " compute pe_solute_3 solute group/group solute pair no kspace yes\n", + " compute pe_solvent_2 solvent group/group solvent pair yes kspace no\n", + " compute pe_solvent_3 solvent group/group solvent pair no kspace yes\n", + " compute pe_inter_2 solute group/group solvent pair yes kspace no\n", + " compute pe_inter_3 solute group/group solvent pair no kspace yes\n", + " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy &\n", + " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n", + " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3 &\n", + f" file files/linear_{name1}_"+"${param}.txt\n", + "\n run ${runtime}\n\n", + " uncompute pe_solute_bond\n", + " uncompute pe_solute_1\n", + " uncompute pe_solvent_bond\n", + " uncompute pe_solvent_1\n", + " uncompute pe_solute_2\n", + " uncompute pe_solute_3\n", + " uncompute pe_solvent_2\n", + " uncompute pe_solvent_3\n", + " uncompute pe_inter_2\n", + " uncompute pe_inter_3\n", + ' if "${runid} != 0" then &\n', + ' "unfix ADAPT"\n', + " unfix FEPout\n", + "\n next runid\n", + " jump SELF runloop1\n", + "write_data npt.data nocoeff\n", + ] + + if parameter2 is not None: + name2 = "-".join([pair_style2.replace("/","-"), parameter2]) + file2 = [ + "\n# Set Previous Change\n", + f"variable param2 equal {parameter2_value}\n", + "fix ADAPT2 all adapt/fep 1 &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", + ] + file[13:13] = file2 + file[-1:-1] = "unfix ADAPT2\n" + ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] + file[ind] = " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_param2 v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" + file[ind+2] = f" file files/linear_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.txt\n" + ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] + file[ind] = f" write_data files/npt_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.data\n" + + if output_file is not None: + with open(output_file, "w") as f: + for line in file: + f.write(line) + + return file + + +def generate_traj_input(parameter, parameter_range, parameter_change, pair_style, types_solvent, types_solute, del_parameter=0.01, + output_file=None, parameter2=None, parameter2_value=None, pair_style2=None, del_parameter2=None): + """ Outputs the section of a LAMMPS input file that loops over the values of parameter being changed (e.g., lambda) + Small perturbations in the potential energy are also output so that the derivative can be calculated for thermodynamic + integration. Trajectories are produces so that files for MBAR analysis may be generated in post-processing. + + The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains + the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS + + Parameters + ---------- + parameter : str + Parameter being varied, see table in `compute fep `_ for the options in + your pair-potential + parameter_range : list[float] + Range of parameter values to be changed where the first value should be the value with which the system has been + equilibrated. + parameter_change : float + The size of the step between parameter values. Take care that number of points needed to traverse the given range + should result in an integer, otherwise LAMMPS will not end at the desired value. + pair_style : str + String of LAMMPS pair style being changes + types_solvent : str + String defining atom types in the solvent (not spaces) + types_solute : str + String defining atom types in the solute (not spaces) + del_parameter : float, default=0.1 + Change used to calculate the forward and backward difference used to compute the derivative through a central difference + approximation. + output_file : str, default=None + File name and path for optional output file + parameter2 : str, default=None + Parameter that has been varied and is set to another value in this simulation, e.g., lambda when the Coulomb potential + is set to zero. Using this feature avoids complications with writing the pair potential information in the data file. + See table in `compute fep `_ for the options in your pair-potential + pair_style2 : str, default=None + String with LAMMPS pair style being set for ``parameter2`` + parameter2_value : float, default=None + Value to set ``parameter2`` + del_parameter2 : float, default=None + Change used to calculate the forward and backward difference used to compute the derivative through a central difference + approximation for parameter2. + + Returns + ------- + file : list[str] + List of strings representing lines in a file + + """ + nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + if nblocks % 1 > 0: + raise ValueError(f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer") + else: + nblocks = int(nblocks) + + if (any([x is not None for x in [parameter2, pair_style2, parameter2_value, del_parameter2]]) and + not all([x is not None for x in [parameter2, pair_style2, parameter2_value, del_parameter2]])): + raise ValueError((f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}, del_parameter2={del_parameter2}")) + name1 = "-".join([pair_style.replace("/","-"), parameter]) + file = [ + "\n# Variables and System Conditions\n", + "variable freq equal 1000 # Consider changing\n", + "variable runtime equal 1000000\n", + f"variable delta equal {parameter_change} \n", + f"variable nblocks equal {nblocks} \n", + f"variable deltacdm equal {del_parameter} # delta used in central different method for derivative in TI\n", + f"variable paramstart equal {parameter_range[0]}\n", + "variable TK equal TEMP\n", + "variable PBAR equal PRESS\n", + "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", + "thermo ${freq}\n", + "\n# Set-up Loop\n", + "variable nblocks equal 1/v_delta", + "variable runid loop 0 ${nblocks} pad\n", + " label runloop1\n", + "\n# Adjust param for the box and equilibrate\n", + " variable param equal v_paramstart-v_runid*v_delta\n", + ' if "${runid} == 0" then &\n', + ' "jump SELF skipequil"\n', + " variable param0 equal v_paramstart-(v_runid-1)*v_delta\n", + " variable paramramp equal ramp(v_param0,v_param)\n", + " fix ADAPT all adapt/fep ${freq} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", + " thermo_style custom v_vstep v_time v_paramramp temp press pe evdwl enthalpy\n", + " run ${runtime} # Run Ramp\n", + " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " run ${runtime} # Run Equil\n", + "\n label skipequil\n\n", + f" write_data files/npt_{name1}_"+"${param}.data\n", + "\n # Initialize computes\n", + " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " variable deltacdm2 equal -v_deltacdm\n", + " compute FEPdb all fep ${TK} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", + " compute FEPdf all fep ${TK} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n", + f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_"+"${param}.txt\n", + "\n dump TRAJ all custom ${freq} "+f"files/dump_{name1}_"+"${param}.lammpstrj id mol type element xu yu zu\n", + "\n run ${runtime}\n\n", + " uncompute FEPdb\n", + " uncompute FEPdf\n", + ' if "${runid} != 0" then &\n', + ' "unfix ADAPT"\n', + " unfix FEPout\n", + " undump TRAJ\n", + "\n next runid\n", + " jump SELF runloop1\n", + "write_data npt.data nocoeff\n", + ] + + if parameter2 is not None: + name2 = "-".join([pair_style2.replace("/","-"), parameter2]) + file[6:6] = f"variable delta2cdm equal {del_parameter2}\n", + file2 = [ + "\n# Set Previous Change\n", + f"variable param2 equal {parameter2_value}\n", + "fix ADAPT2 all adapt/fep 1 &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", + "variable delta2cdm2 equal -v_delta2cdm\n", + "compute FEP2db all fep ${TK} &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm2\n", + "compute FEP2df all fep ${TK} &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm\n", + ] + file[11:11] = file2 + file[-1:-1] = "unfix ADAPT2\n" + file[-1:-1] = "uncompute FEP2db\n" + file[-1:-1] = "uncompute FEP2df\n" + ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] + file[ind] = f" write_data files/npt_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.data\n" + ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] + file[ind] = " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" + file[ind+1] = f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.txt\n" + file[ind+2] = "\n dump TRAJ all custom ${freq} "+f"files/dump_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.lammpstrj id mol type element xu yu zu\n" + + if output_file is not None: + with open(output_file, "w") as f: + for line in file: + f.write(line) + + return file + + +def generate_rerun_mbar(parameter_value, parameter, parameter_range, parameter_change, pair_style, types_solvent, + types_solute, output_file=None, parameter2=None, pair_style2=None, parameter2_value=None): + """ Outputs the section of a LAMMPS input file that reruns trajectories for different lambda values and calculates + the potential energy for all other lambda values with this set of configurations. + + Parameters + ---------- + parameter_value : float + Value of parameter being varied (e.g., lambda) + parameter : str + Parameter being varied, see table in `compute fep `_ for the options in + your pair-potential + parameter_range : list[float] + Range of parameter values to be changed where the first value should be the value with which the system has been + equilibrated. + parameter_change : float + The size of the step between parameter values. Take care that number of points needed to traverse the given range + should result in an integer, otherwise lammps will not end at the desired value. + pair_style : str + String of LAMMPS pair style being changes + types_solvent : str + String defining atom types in the solvent (not spaces) + types_solute : str + String defining atom types in the solute (not spaces) + output_file : str, default=None + File name and path for optional output file + parameter2 : str, default=None + Parameter that has been varied and is set to another value in this simulation, e.g., lambda for the coulombic potential + is set to zero. Using this feature avoids complicaitons with writing the pair potential information in the data file. + See table in `compute fep `_ for the options in your pair-potential + pair_style2 : str, default=None + String with LAMMPS pair style being set for ``parameter2`` + parameter2_value : float, default=None + Value to set ``parameter2`` + + Returns + ------- + file : list[str] + List of strings representing lines in a file + + """ + nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + if nblocks % 1 > 0: + raise ValueError("The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( + parameter_range, parameter_change)) + else: + nblocks = int(nblocks) + + if (any([x is not None for x in [parameter2, pair_style2, parameter2_value]]) and + not all([x is not None for x in [parameter2, pair_style2, parameter2_value]])): + raise ValueError((f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}")) + + if np.isclose(parameter_range[0], 0): + prec = int(np.abs(int(np.log10(np.abs(parameter_change))))) + else: + prec = max(int(np.abs(int(np.log10(np.abs(parameter_range[0]))))), int(np.abs(int(np.log10(np.abs(parameter_change)))+1))) + name1 = "-".join([pair_style.replace("/","-"), parameter]) + file = [ + "\n# Variables and System Conditions\n", + f"variable param equal {parameter_value}\n", + "variable freq equal 1000 # Consider changing\n", + "variable runtime equal 1000000\n", + f"variable delta equal {parameter_change}\n", + "variable TK equal TEMP\n", + "\nthermo ${freq}\n", + f"read_data files/npt_{name1}_"+"${param}.data\n", + "\n# Initialize computes\n", + ] + if parameter2 is not None: + file2 = [ + "\n# Set Previous Change\n", + "variable param2 equal {parameter2_value}\n", + "fix ADAPT2 all adapt/fep 1 &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", + ] + file[8:8] = file2 + name2 = "-".join([pair_style2.replace("/","-"), parameter2]) + ind = [ii for ii, x in enumerate(file) if "read_data files/npt" in x][0] + file[ind] = f"read_data files/npt_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.data\n" + + for i in range(nblocks): + value2 = parameter_range[0] + parameter_change * i + delta = value2 - parameter_value + tmp = "variable delta{0:0d}".format(i)+" equal {0:."+str(prec)+"f}\n" + tmp = [ + tmp.format(delta), + "compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_delta{i}\n", + "variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), + "fix FEPout{0:03d} all".format(i)+" ave/time ${freq} 1 ${freq} "+"v_time v_param v_param{0:03d} &\n".format(i), + " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i)+f" file files/mbar_{name1}"+"_${param}_${param"+str( + "{0:03d}".format(i))+"}.txt\n\n", + ] + if parameter2 is not None: + ind = [ii for ii, x in enumerate(tmp) if "fix FEPout" in x][0] + tmp[ind:ind+2] = [ + "fix FEPout{0:03d} all".format(i)+" ave/time ${freq} 1 ${freq} "+"v_time v_param v_param{0:03d} v_param2 &\n".format(i), + " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i)+f" file files/mbar_{name1}"+"_${param}_${param"+str( + "{0:03d}".format(i))+"}_"+"{}_{}.txt\n\n".format(name2, parameter2_value), + ] + file.extend(tmp) + + if parameter2 is not None: + file.append(f"\nrerun files/dump_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.lammpstrj " + + "every ${freq} dump xu yu zu\n\n") + else: + file.append(f"\nrerun files/dump_{name1}"+"_${param}.lammpstrj every ${freq} dump xu yu zu\n\n") + + if output_file is not None: + with open(output_file, "w") as f: + for line in file: + f.write(line) + + return file + + +def _get_bar_lambdas(fep_files, indices=[2,3]): + """Retrieves all lambda values from FEP filenames. + + Parameters + ---------- + fep_files: str or list of str + Path(s) to fepout files to extract data from. + indices : list[int], default=[1,2] + In provided file names, using underscore as a separator, these indices mark the part of the filename + containing the lambda information. + + Returns + ------- + lambda_values : list + List of tuples lambda values contained in the file. + lambda_pairs : list + List of tuples containing two floats, lambda and lambda'. + + """ + + def tuple_from_filename(filename, separator="_", indices=[2,3]): + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) + if not _isfloat(name_array[indices[0]]): + raise ValueError(f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}") + if not _isfloat(name_array[indices[1]]): + raise ValueError(f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}") + return (float(name_array[indices[0]]), float(name_array[indices[1]])) + def lambda2_from_filename(filename, separator="_", index=-1): + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) + if not _isfloat(name_array[index]): + raise ValueError(f"Entry, {index} in filename cannot be converted to float: {name_array[index]}") + return float(name_array[index]) + + lambda_pairs = [tuple_from_filename(y, indices=indices) for y in fep_files] + if len(indices) == 3: + lambda2 = list(set([lambda2_from_filename(y, index=indices[2]) for y in fep_files])) + if len(lambda2) > 1: + raise ValueError("More than one value of lambda2 is present in the provided files." + f" Restrict filename input to one of: {lambda2}") + else: + lambda2 = None + + lambda_values = sorted(list(set([x for y in lambda_pairs for x in y]))) + check_float = [x for x in lambda_values if not _isfloat(x)] + if check_float: + raise ValueError("Lambda values must be convertible to floats: {}".format(check_float)) + if [x for x in lambda_values if float(x) < 0]: + raise ValueError("Lambda values must be positive: {}".format(lambda_values)) + + # check that all needed lamba combinations are present + lamda_dict = {x: [y[1] for y in lambda_pairs if y[0] == x] for x in lambda_values} + + # Check for MBAR content + missing_combinations_mbar = [] + missing_combinations_bar = [] + for lambda_value, lambda_array in lamda_dict.items(): + missing_combinations_mbar.extend([(lambda_value, x) for x in lambda_values if x not in lambda_array]) + + if missing_combinations_mbar: + warnings.warn( + "The following combinations of lambda and lambda prime are missing for MBAR analysis: {}".format(missing_combinations_mbar) + ) + else: + return lambda_values, lambda_pairs, lambda2 + + # Check for BAR content + missing_combinations_bar = [] + extra_combinations_bar = [] + lambda_values.sort() + for ind, (lambda_value, lambda_array) in enumerate(lamda_dict.items()): + if ind == 0: + tmp_array = [lambda_values[ind], lambda_values[ind+1]] + elif ind == len(lamda_dict) - 1: + tmp_array = [lambda_values[ind-1], lambda_values[ind]] + else: + tmp_array = [lambda_values[ind-1], lambda_values[ind], lambda_values[ind+1]] + + missing_combinations_bar.extend([(lambda_value, x) for x in tmp_array if x not in lambda_array]) + extra_combinations_bar.extend([(lambda_value, x) for x in lambda_array if x not in tmp_array]) + + if missing_combinations_bar: + raise ValueError( + "BAR calculation cannot be performed without the following lambda-lambda prime combinations: {}".format(missing_combinations_bar) + ) + if extra_combinations_bar: + warnings.warn( + "The following combinations of lambda and lambda prime are extra and being discarded for BAR analysis: {}".format(extra_combinations_bar) + ) + lambda_pairs = [x for x in lambda_pairs if x not in extra_combinations_bar] + + return lambda_values, lambda_pairs, lambda2 + +@_init_attrs +def extract_u_nk(fep_files, T, columns_lambda1=[2,3], column_u_nk=4, column_lambda2=None, indices=[1,2], units="real", + vdw_lambda=1): + """ This function will go into alchemlyb.parsing.lammps + + Each file is imported as a data frame where the columns kept are either: + [0, columns_lambda1[0] columns_lambda1[1], column_u_nk] + or if columns_lambda2 is not None: + [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_u_nk] + + Parameters + ---------- + filenames : str + Path to fepout file(s) to extract data from. Filenames and paths are + aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". + temperature : float + Temperature in Kelvin at which the simulation was sampled. + columns_lambda1 : list[int] + Indices for columns (column number minus one) representing (1) the lambda at which the system is equilibrated and (2) the lambda used + in the computation of the potential energy. + column_u_nk : int, default=4 + Index for the column (column number minus one) representing the potential energy + column_lambda2 : int + Index for column (column number minus one) for the unchanging value of lambda for another potential. + If ``None`` then we do not expect two lambda values being varied. + indices : list[int], default=[1,2] + In provided file names, using underscore as a separator, these indices mark the part of the filename + containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` + this list should be of length three, where the last value represents the invariant lambda. + units : str, default="real" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + vdw_lambda : int, default=1 + In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. + + Results + ------- + u_nk_df : pandas.Dataframe + Dataframe of potential energy for each alchemical state (k) for each frame (n). + Note that the units for timestamps are not considered in the calculation. + + Attributes + + - temperature in K + - energy unit in kT + + """ + + # Collect Files + files = glob.glob(fep_files) + if not files: + raise ValueError(f"No files have been found that match: {fep_files}") + + if units == "real": + beta = 1 / (k_b * T) + elif units == "lj": + beta = 1 / T + else: + raise ValueError(f"LAMMPS unit type, {units}, is not supported. Supported types are: real and lj") + + if len(columns_lambda1) != 2: + raise ValueError(f"Provided columns for lambda1 must have a length of two, columns_lambda1: {columns_lambda1}") + if not np.all([isinstance(x, int) for x in columns_lambda1]): + raise ValueError( + f"Provided column for columns_lambda1 must be type int. columns_lambda1: {columns_lambda1}, type: {[type(x) for x in columns_lambda1]}" + ) + if column_lambda2 is not None and not isinstance(column_lambda2, int): + raise ValueError( + f"Provided column for u_nk must be type int. column_u_nk: {column_lambda2}, type: {type(column_lambda2)}" + ) + if not isinstance(column_u_nk, int): + raise ValueError( + f"Provided column for u_nk must be type int. column_u_nk: {column_u_nk}, type: {type(column_u_nk)}" + ) + + lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices) + + if column_lambda2 is None: + u_nk = pd.DataFrame(columns=["time", "fep-lambda"]+lambda_values) + lc = len(lambda_values) + col_indices = [0] + list(columns_lambda1) + [column_u_nk] + else: + u_nk = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda"]) + lc = len(lambda_values)**2 + col_indices = [0] + list(columns_lambda1) + [column_lambda2, column_u_nk] + + for file in files: + if not os.path.isfile(file): + raise ValueError("File not found: {}".format(file)) + + data = pd.read_csv(file, sep=" ", comment="#") + lx = len(data.columns) + if [False for x in col_indices if x > lx]: + raise ValueError("Number of columns, {}, is less than index: {}".format(lx, col_indices)) + data = data.iloc[ :, col_indices] + if column_lambda2 is None: + data.columns=["time", "fep-lambda", "fep-lambda2", "u_nk"] + lambda1_col, lambda1_2_col = "fep-lambda", "fep-lambda2" + columns_a = ["time", "fep-lambda"] + columns_b = lambda_values + else: + columns_a = ["time", "coul-lambda", "vdw-lambda"] + if vdw_lambda == 1: + data.columns=["time", "vdw-lambda", "vdw-lambda2", "coul-lambda", "u_nk"] + lambda1_col, lambda1_2_col = "vdw-lambda", "vdw-lambda2" + columns_b = [(lambda2, x) for x in lambda_values] + elif vdw_lambda == 2: + data.columns=["time", "coul-lambda", "coul-lambda2", "vdw-lambda", "u_nk"] + lambda1_col, lambda1_2_col = "coul-lambda", "coul-lambda2" + columns_b = [(x, lambda2) for x in lambda_values] + else: + raise ValueError(f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'") + + for lambda1 in list(data[lambda1_col].unique()): + tmp_df = data.loc[ data[lambda1_col] == lambda1 ] + + for lambda12 in list(tmp_df[lambda1_2_col].unique()): + tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12 ] + + lr = tmp_df2.shape[0] + if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: + u_nk = pd.concat([ + u_nk, + pd.concat([ + tmp_df2[columns_a], + pd.DataFrame( + np.zeros((lr,lc)), + columns=columns_b, + ) + ], axis=1) + ], axis=0, sort=False) + + column_name = lambda_values[ + [ii for ii,x in enumerate(lambda_values) if float(x) == lambda12][0] + ] + if column_lambda2 is not None: + column_name = (lambda2, column_name) if vdw_lambda == 1 else (column_name, lambda2) + if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0: + raise ValueError("Energy values already available for lambda, {}, lambda', {}.".format(lambda1, lambda12)) + + if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] != tmp_df2["u_nk"].shape[0]: + raise ValueError("Number of energy values in file, {}, N={}, inconsistent with previous files of length, {}.".format( + file, + tmp_df2["u_nk"].shape[0], + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0], + )) + + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = beta * tmp_df2["u_nk"] + + if column_lambda2 is None: + u_nk.set_index(["time", "fep-lambda"], inplace=True) + else: + u_nk.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) + + return u_nk + +@_init_attrs +def extract_dHdl(fep_files, T, column_lambda1=2, column_dlambda1=3, column_lambda2=None, + column_dlambda2=None, columns_derivative1=[10,11], columns_derivative2=[12,13], index=-1, units="real"): + """ This function will go into alchemlyb.parsing.lammps + + Each file is imported as a data frame where the columns kept are either: + [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] + or if columns_lambda2 is not None: + [ + 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, + columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], columns_derivative2[1] + ] + + Parameters + ---------- + filenames : str + Path to fepout file(s) to extract data from. Filenames and paths are + aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". + temperature : float + Temperature in Kelvin at which the simulation was sampled. + column_lambda1 : int, default=2 + Index for column (column number minus one) representing the lambda at which the system is equilibrated. + column_dlambda1 : int, default=3 + Index for column (column number minus one) for the change in lambda. + column_lambda2 : int, default=None + Index for column (column number minus one) for a second value of lambda. + If this array is ``None`` then we do not expect two lambda values. + column_dlambda2 : int, default=None + Index for column (column number minus one) for the change in lambda2. + columns_derivative : list[int], default=[10,11] + Indices for columns (column number minus one) representing the lambda at which to find the forward + and backward distance. + index : int, default=-1 + In provided file names, using underscore as a separator, this index marks the part of the filename + containing the lambda information for :func:`alchemlyb.parsing._get_ti_lambdas`. + units : str, default="real" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + + Results + ------- + dHdl : pandas.Dataframe + Dataframe of potential energy for each alchemical state (k) for each frame (n). + Note that the units for timestamps are not considered in the calculation. + + Attributes + + - temperature in K or dimensionless + - energy unit in kT + + """ + + # Collect Files + files = glob.glob(fep_files) + if not files: + raise ValueError("No files have been found that match: {}".format(fep_files)) + + if units == "real": + beta = 1 / (k_b * T) + elif units == "lj": + beta = 1 / T + else: + raise ValueError("LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format(units)) + + if not isinstance(column_lambda1, int): + raise ValueError("Provided column_lambda1 must be type 'int', instead: {}".format(type(column_lambda1))) + if column_lambda2 is not None and not isinstance(column_lambda2, int): + raise ValueError("Provided column_lambda2 must be type 'int', instead: {}".format(type(column_lambda2))) + if not isinstance(column_dlambda1, int): + raise ValueError("Provided column_dlambda1 must be type 'int', instead: {}".format(type(column_dlambda1))) + if column_dlambda2 is not None and not isinstance(column_dlambda2, int): + raise ValueError("Provided column_dlambda2 must be type 'int', instead: {}".format(type(column_dlambda2))) + + if len(columns_derivative1) != 2: + raise ValueError("Provided columns for derivative values must have a length of two, columns_derivative1: {}".format( + columns_derivative1)) + if not np.all([isinstance(x, int) for x in columns_derivative1]): + raise ValueError( + "Provided column for columns_derivative1 must be type int. columns_derivative1: {}, type: {}".format( + columns_derivative1, type([type(x) for x in columns_derivative1]) + )) + if len(columns_derivative2) != 2: + raise ValueError("Provided columns for derivative values must have a length of two, columns_derivative2: {}".format( + columns_derivative2)) + if not np.all([isinstance(x, int) for x in columns_derivative2]): + raise ValueError( + "Provided column for columns_derivative1 must be type int. columns_derivative1: {}, type: {}".format( + columns_derivative2, type([type(x) for x in columns_derivative2]) + )) + + if column_lambda2 is None: + dHdl = pd.DataFrame(columns=["time", "fep-lambda", "fep"]) + col_indices = [0, column_lambda1, column_dlambda1] + list(columns_derivative1) + else: + dHdl = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda", "coul", "vdw"]) + col_indices = ([0, column_lambda2, column_lambda1, column_dlambda1, column_dlambda2] + + list(columns_derivative1) + list(columns_derivative2)) + + for file in files: + if not os.path.isfile(file): + raise ValueError("File not found: {}".format(file)) + + data = pd.read_csv(file, sep=" ", comment="#") + lx = len(data.columns) + if [False for x in col_indices if x > lx]: + raise ValueError("Number of columns, {}, is less than index: {}".format(lx, col_indices)) + + data = data.iloc[:, col_indices] + if column_lambda2 is None: + data.columns = ["time", "fep-lambda", "dlambda", "dU_back", "dU_forw"] + data["fep"] = ( data.dU_forw - data.dU_back ) / ( 2 * data.dlambda ) + data.drop(columns=["dlambda", "dU_back", "dU_forw"], inplace=True) + dHdl = pd.concat([ dHdl, data], axis=0, sort=False) + else: + data.columns = [ + "time", "coul-lambda", "vdw-lambda", "dlambda_vdw", "dlambda_coul", + "dU_back_vdw", "dU_forw_vdw", "dU_back_coul", "dU_forw_coul", + ] + data["coul"] = ( data.dU_forw_coul - data.dU_back_coul ) / ( 2 * data.dlambda_coul ) + data["vdw"] = ( data.dU_forw_vdw - data.dU_back_vdw ) / ( 2 * data.dlambda_vdw ) + data.drop(columns=["dlambda_vdw", "dlambda_coul", "dU_back_coul", "dU_forw_coul", + "dU_back_vdw", "dU_forw_vdw"], inplace=True) + + if column_lambda2 is None: + dHdl.set_index(["time", "fep-lambda"], inplace=True) + dHdl.mul({"fep": beta}) + else: + dHdl.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) + dHdl.mul({"coul": beta, "vdw": beta}) + + return dHdl \ No newline at end of file From 93bc158e7809cd3731cb349e7ea715fe7f5df556 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 12:24:38 -0500 Subject: [PATCH 02/59] Update formatting --- src/alchemlyb/parsing/lammps.py | 681 ++++++++++++++++++++++---------- 1 file changed, 475 insertions(+), 206 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index df22b36b..2259b6c6 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -41,33 +41,44 @@ k_b = R_kJmol * kJ2kcal + def _isfloat(x): try: float(x) return True except ValueError: return False - - -def generate_input_linear_approximation(parameter, parameter_range, parameter_change, pair_style, types_solvent, types_solute, - output_file=None, parameter2=None, parameter2_value=None, pair_style2=None): - """ Outputs the section of a LAMMPS input file that separates the Coulomb, nonbonded, and bond/angle/torsional contributions + + +def generate_input_linear_approximation( + parameter, + parameter_range, + parameter_change, + pair_style, + types_solvent, + types_solute, + output_file=None, + parameter2=None, + parameter2_value=None, + pair_style2=None, +): + """Outputs the section of a LAMMPS input file that separates the Coulomb, nonbonded, and bond/angle/torsional contributions of the solute and solvent. As long as the parameter being changed is linearly dependent on the potential energy, these files for each value of the parameter can be used for thermodynamic integration (TI) or multi-state Bennett acceptance ratio (MBAR). - The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains + The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS Parameters ---------- parameter : str - Parameter being varied, see table in `compute fep `_ for the options in + Parameter being varied, see table in `compute fep `_ for the options in your pair-potential parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been + Range of parameter values to be changed where the first value should be the value with which the system has been equilibrated. parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range + The size of the step between parameter values. Take care that number of points needed to traverse the given range should result in an integer, otherwise LAMMPS will not end at the desired value. pair_style : str String with LAMMPS pair style being altered @@ -90,20 +101,28 @@ def generate_input_linear_approximation(parameter, parameter_range, parameter_ch ------- file : list[str] List of strings representing lines in a file - + """ nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: - raise ValueError("The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( - parameter_range, parameter_change)) + raise ValueError( + "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( + parameter_range, parameter_change + ) + ) else: nblocks = int(nblocks) - - if (any([x is not None for x in [parameter2, pair_style2, parameter2_value]]) and - not all([x is not None for x in [parameter2, pair_style2, parameter2_value]])): - raise ValueError((f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}")) - name1 = "-".join([pair_style.replace("/","-"), parameter]) + + if any( + [x is not None for x in [parameter2, pair_style2, parameter2_value]] + ) and not all([x is not None for x in [parameter2, pair_style2, parameter2_value]]): + raise ValueError( + ( + f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}" + ) + ) + name1 = "-".join([pair_style.replace("/", "-"), parameter]) file = [ "\n# Variables and System Conditions\n", "variable freq equal 1000 # Consider changing\n", @@ -134,7 +153,7 @@ def generate_input_linear_approximation(parameter, parameter_range, parameter_ch " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", " run ${runtime} # Run Equil\n", "\n label skipequil\n\n", - f" write_data files/npt_{name1}_"+"${param}.data\n", + f" write_data files/npt_{name1}_" + "${param}.data\n", "\n # Initialize computes\n", " ## Compute PE for contributions for bonds, angles, dihedrals, and impropers\n", " compute pe_solute_bond solute pe/atom bond angle dihedral improper # PE from nonpair/noncharged intramolecular interactions\n", @@ -152,7 +171,7 @@ def generate_input_linear_approximation(parameter, parameter_range, parameter_ch " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3\n", " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n", " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3 &\n", - f" file files/linear_{name1}_"+"${param}.txt\n", + f" file files/linear_{name1}_" + "${param}.txt\n", "\n run ${runtime}\n\n", " uncompute pe_solute_bond\n", " uncompute pe_solute_1\n", @@ -171,9 +190,9 @@ def generate_input_linear_approximation(parameter, parameter_range, parameter_ch " jump SELF runloop1\n", "write_data npt.data nocoeff\n", ] - + if parameter2 is not None: - name2 = "-".join([pair_style2.replace("/","-"), parameter2]) + name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) file2 = [ "\n# Set Previous Change\n", f"variable param2 equal {parameter2_value}\n", @@ -183,38 +202,60 @@ def generate_input_linear_approximation(parameter, parameter_range, parameter_ch file[13:13] = file2 file[-1:-1] = "unfix ADAPT2\n" ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] - file[ind] = " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_param2 v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" - file[ind+2] = f" file files/linear_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.txt\n" + file[ind] = ( + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_param2 v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" + ) + file[ind + 2] = ( + f" file files/linear_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.txt\n" + ) ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] - file[ind] = f" write_data files/npt_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.data\n" + file[ind] = ( + f" write_data files/npt_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.data\n" + ) if output_file is not None: with open(output_file, "w") as f: for line in file: f.write(line) - return file - - -def generate_traj_input(parameter, parameter_range, parameter_change, pair_style, types_solvent, types_solute, del_parameter=0.01, - output_file=None, parameter2=None, parameter2_value=None, pair_style2=None, del_parameter2=None): - """ Outputs the section of a LAMMPS input file that loops over the values of parameter being changed (e.g., lambda) - Small perturbations in the potential energy are also output so that the derivative can be calculated for thermodynamic + return file + + +def generate_traj_input( + parameter, + parameter_range, + parameter_change, + pair_style, + types_solvent, + types_solute, + del_parameter=0.01, + output_file=None, + parameter2=None, + parameter2_value=None, + pair_style2=None, + del_parameter2=None, +): + """Outputs the section of a LAMMPS input file that loops over the values of parameter being changed (e.g., lambda) + Small perturbations in the potential energy are also output so that the derivative can be calculated for thermodynamic integration. Trajectories are produces so that files for MBAR analysis may be generated in post-processing. - The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains + The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS Parameters ---------- parameter : str - Parameter being varied, see table in `compute fep `_ for the options in + Parameter being varied, see table in `compute fep `_ for the options in your pair-potential parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been + Range of parameter values to be changed where the first value should be the value with which the system has been equilibrated. parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range + The size of the step between parameter values. Take care that number of points needed to traverse the given range should result in an integer, otherwise LAMMPS will not end at the desired value. pair_style : str String of LAMMPS pair style being changes @@ -223,7 +264,7 @@ def generate_traj_input(parameter, parameter_range, parameter_change, pair_style types_solute : str String defining atom types in the solute (not spaces) del_parameter : float, default=0.1 - Change used to calculate the forward and backward difference used to compute the derivative through a central difference + Change used to calculate the forward and backward difference used to compute the derivative through a central difference approximation. output_file : str, default=None File name and path for optional output file @@ -236,26 +277,41 @@ def generate_traj_input(parameter, parameter_range, parameter_change, pair_style parameter2_value : float, default=None Value to set ``parameter2`` del_parameter2 : float, default=None - Change used to calculate the forward and backward difference used to compute the derivative through a central difference + Change used to calculate the forward and backward difference used to compute the derivative through a central difference approximation for parameter2. Returns ------- file : list[str] List of strings representing lines in a file - + """ nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: - raise ValueError(f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer") + raise ValueError( + f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" + ) else: nblocks = int(nblocks) - - if (any([x is not None for x in [parameter2, pair_style2, parameter2_value, del_parameter2]]) and - not all([x is not None for x in [parameter2, pair_style2, parameter2_value, del_parameter2]])): - raise ValueError((f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}, del_parameter2={del_parameter2}")) - name1 = "-".join([pair_style.replace("/","-"), parameter]) + + if any( + [ + x is not None + for x in [parameter2, pair_style2, parameter2_value, del_parameter2] + ] + ) and not all( + [ + x is not None + for x in [parameter2, pair_style2, parameter2_value, del_parameter2] + ] + ): + raise ValueError( + ( + f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}, del_parameter2={del_parameter2}" + ) + ) + name1 = "-".join([pair_style.replace("/", "-"), parameter]) file = [ "\n# Variables and System Conditions\n", "variable freq equal 1000 # Consider changing\n", @@ -285,7 +341,7 @@ def generate_traj_input(parameter, parameter_range, parameter_change, pair_style " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", " run ${runtime} # Run Equil\n", "\n label skipequil\n\n", - f" write_data files/npt_{name1}_"+"${param}.data\n", + f" write_data files/npt_{name1}_" + "${param}.data\n", "\n # Initialize computes\n", " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", " variable deltacdm2 equal -v_deltacdm\n", @@ -294,8 +350,10 @@ def generate_traj_input(parameter, parameter_range, parameter_change, pair_style " compute FEPdf all fep ${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n", - f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_"+"${param}.txt\n", - "\n dump TRAJ all custom ${freq} "+f"files/dump_{name1}_"+"${param}.lammpstrj id mol type element xu yu zu\n", + f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", + "\n dump TRAJ all custom ${freq} " + + f"files/dump_{name1}_" + + "${param}.lammpstrj id mol type element xu yu zu\n", "\n run ${runtime}\n\n", " uncompute FEPdb\n", " uncompute FEPdf\n", @@ -307,10 +365,10 @@ def generate_traj_input(parameter, parameter_range, parameter_change, pair_style " jump SELF runloop1\n", "write_data npt.data nocoeff\n", ] - + if parameter2 is not None: - name2 = "-".join([pair_style2.replace("/","-"), parameter2]) - file[6:6] = f"variable delta2cdm equal {del_parameter2}\n", + name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) + file[6:6] = (f"variable delta2cdm equal {del_parameter2}\n",) file2 = [ "\n# Set Previous Change\n", f"variable param2 equal {parameter2_value}\n", @@ -327,37 +385,63 @@ def generate_traj_input(parameter, parameter_range, parameter_change, pair_style file[-1:-1] = "uncompute FEP2db\n" file[-1:-1] = "uncompute FEP2df\n" ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] - file[ind] = f" write_data files/npt_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.data\n" + file[ind] = ( + f" write_data files/npt_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.data\n" + ) ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] - file[ind] = " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" - file[ind+1] = f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.txt\n" - file[ind+2] = "\n dump TRAJ all custom ${freq} "+f"files/dump_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.lammpstrj id mol type element xu yu zu\n" + file[ind] = ( + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" + ) + file[ind + 1] = ( + f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.txt\n" + ) + file[ind + 2] = ( + "\n dump TRAJ all custom ${freq} " + + f"files/dump_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.lammpstrj id mol type element xu yu zu\n" + ) if output_file is not None: with open(output_file, "w") as f: for line in file: f.write(line) - return file - - -def generate_rerun_mbar(parameter_value, parameter, parameter_range, parameter_change, pair_style, types_solvent, - types_solute, output_file=None, parameter2=None, pair_style2=None, parameter2_value=None): - """ Outputs the section of a LAMMPS input file that reruns trajectories for different lambda values and calculates - the potential energy for all other lambda values with this set of configurations. + return file + + +def generate_rerun_mbar( + parameter_value, + parameter, + parameter_range, + parameter_change, + pair_style, + types_solvent, + types_solute, + output_file=None, + parameter2=None, + pair_style2=None, + parameter2_value=None, +): + """Outputs the section of a LAMMPS input file that reruns trajectories for different lambda values and calculates + the potential energy for all other lambda values with this set of configurations. Parameters ---------- parameter_value : float Value of parameter being varied (e.g., lambda) parameter : str - Parameter being varied, see table in `compute fep `_ for the options in + Parameter being varied, see table in `compute fep `_ for the options in your pair-potential parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been + Range of parameter values to be changed where the first value should be the value with which the system has been equilibrated. parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range + The size of the step between parameter values. Take care that number of points needed to traverse the given range should result in an integer, otherwise lammps will not end at the desired value. pair_style : str String of LAMMPS pair style being changes @@ -380,25 +464,36 @@ def generate_rerun_mbar(parameter_value, parameter, parameter_range, parameter_c ------- file : list[str] List of strings representing lines in a file - + """ nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: - raise ValueError("The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( - parameter_range, parameter_change)) + raise ValueError( + "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( + parameter_range, parameter_change + ) + ) else: nblocks = int(nblocks) - - if (any([x is not None for x in [parameter2, pair_style2, parameter2_value]]) and - not all([x is not None for x in [parameter2, pair_style2, parameter2_value]])): - raise ValueError((f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}")) - + + if any( + [x is not None for x in [parameter2, pair_style2, parameter2_value]] + ) and not all([x is not None for x in [parameter2, pair_style2, parameter2_value]]): + raise ValueError( + ( + f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}" + ) + ) + if np.isclose(parameter_range[0], 0): prec = int(np.abs(int(np.log10(np.abs(parameter_change))))) else: - prec = max(int(np.abs(int(np.log10(np.abs(parameter_range[0]))))), int(np.abs(int(np.log10(np.abs(parameter_change)))+1))) - name1 = "-".join([pair_style.replace("/","-"), parameter]) + prec = max( + int(np.abs(int(np.log10(np.abs(parameter_range[0]))))), + int(np.abs(int(np.log10(np.abs(parameter_change))) + 1)), + ) + name1 = "-".join([pair_style.replace("/", "-"), parameter]) file = [ "\n# Variables and System Conditions\n", f"variable param equal {parameter_value}\n", @@ -407,7 +502,7 @@ def generate_rerun_mbar(parameter_value, parameter, parameter_range, parameter_c f"variable delta equal {parameter_change}\n", "variable TK equal TEMP\n", "\nthermo ${freq}\n", - f"read_data files/npt_{name1}_"+"${param}.data\n", + f"read_data files/npt_{name1}_" + "${param}.data\n", "\n# Initialize computes\n", ] if parameter2 is not None: @@ -418,47 +513,69 @@ def generate_rerun_mbar(parameter_value, parameter, parameter_range, parameter_c f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", ] file[8:8] = file2 - name2 = "-".join([pair_style2.replace("/","-"), parameter2]) + name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) ind = [ii for ii, x in enumerate(file) if "read_data files/npt" in x][0] - file[ind] = f"read_data files/npt_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.data\n" - + file[ind] = ( + f"read_data files/npt_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.data\n" + ) + for i in range(nblocks): value2 = parameter_range[0] + parameter_change * i delta = value2 - parameter_value - tmp = "variable delta{0:0d}".format(i)+" equal {0:."+str(prec)+"f}\n" + tmp = "variable delta{0:0d}".format(i) + " equal {0:." + str(prec) + "f}\n" tmp = [ tmp.format(delta), "compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_delta{i}\n", "variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), - "fix FEPout{0:03d} all".format(i)+" ave/time ${freq} 1 ${freq} "+"v_time v_param v_param{0:03d} &\n".format(i), - " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i)+f" file files/mbar_{name1}"+"_${param}_${param"+str( - "{0:03d}".format(i))+"}.txt\n\n", + "fix FEPout{0:03d} all".format(i) + + " ave/time ${freq} 1 ${freq} " + + "v_time v_param v_param{0:03d} &\n".format(i), + " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + + f" file files/mbar_{name1}" + + "_${param}_${param" + + str("{0:03d}".format(i)) + + "}.txt\n\n", ] if parameter2 is not None: ind = [ii for ii, x in enumerate(tmp) if "fix FEPout" in x][0] - tmp[ind:ind+2] = [ - "fix FEPout{0:03d} all".format(i)+" ave/time ${freq} 1 ${freq} "+"v_time v_param v_param{0:03d} v_param2 &\n".format(i), - " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i)+f" file files/mbar_{name1}"+"_${param}_${param"+str( - "{0:03d}".format(i))+"}_"+"{}_{}.txt\n\n".format(name2, parameter2_value), + tmp[ind : ind + 2] = [ + "fix FEPout{0:03d} all".format(i) + + " ave/time ${freq} 1 ${freq} " + + "v_time v_param v_param{0:03d} v_param2 &\n".format(i), + " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + + f" file files/mbar_{name1}" + + "_${param}_${param" + + str("{0:03d}".format(i)) + + "}_" + + "{}_{}.txt\n\n".format(name2, parameter2_value), ] file.extend(tmp) - + if parameter2 is not None: - file.append(f"\nrerun files/dump_{name1}_"+"${param}_"+f"{name2}_{parameter2_value}.lammpstrj " - + "every ${freq} dump xu yu zu\n\n") + file.append( + f"\nrerun files/dump_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.lammpstrj " + + "every ${freq} dump xu yu zu\n\n" + ) else: - file.append(f"\nrerun files/dump_{name1}"+"_${param}.lammpstrj every ${freq} dump xu yu zu\n\n") + file.append( + f"\nrerun files/dump_{name1}" + + "_${param}.lammpstrj every ${freq} dump xu yu zu\n\n" + ) if output_file is not None: with open(output_file, "w") as f: for line in file: f.write(line) - return file + return file -def _get_bar_lambdas(fep_files, indices=[2,3]): +def _get_bar_lambdas(fep_files, indices=[2, 3]): """Retrieves all lambda values from FEP filenames. Parameters @@ -475,35 +592,52 @@ def _get_bar_lambdas(fep_files, indices=[2,3]): List of tuples lambda values contained in the file. lambda_pairs : list List of tuples containing two floats, lambda and lambda'. - + """ - def tuple_from_filename(filename, separator="_", indices=[2,3]): - name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) + def tuple_from_filename(filename, separator="_", indices=[2, 3]): + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split( + separator + ) if not _isfloat(name_array[indices[0]]): - raise ValueError(f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}") + raise ValueError( + f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" + ) if not _isfloat(name_array[indices[1]]): - raise ValueError(f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}") + raise ValueError( + f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" + ) return (float(name_array[indices[0]]), float(name_array[indices[1]])) + def lambda2_from_filename(filename, separator="_", index=-1): - name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split( + separator + ) if not _isfloat(name_array[index]): - raise ValueError(f"Entry, {index} in filename cannot be converted to float: {name_array[index]}") + raise ValueError( + f"Entry, {index} in filename cannot be converted to float: {name_array[index]}" + ) return float(name_array[index]) - + lambda_pairs = [tuple_from_filename(y, indices=indices) for y in fep_files] if len(indices) == 3: - lambda2 = list(set([lambda2_from_filename(y, index=indices[2]) for y in fep_files])) + lambda2 = list( + set([lambda2_from_filename(y, index=indices[2]) for y in fep_files]) + ) if len(lambda2) > 1: - raise ValueError("More than one value of lambda2 is present in the provided files." - f" Restrict filename input to one of: {lambda2}") + raise ValueError( + "More than one value of lambda2 is present in the provided files." + f" Restrict filename input to one of: {lambda2}" + ) else: lambda2 = None lambda_values = sorted(list(set([x for y in lambda_pairs for x in y]))) check_float = [x for x in lambda_values if not _isfloat(x)] if check_float: - raise ValueError("Lambda values must be convertible to floats: {}".format(check_float)) + raise ValueError( + "Lambda values must be convertible to floats: {}".format(check_float) + ) if [x for x in lambda_values if float(x) < 0]: raise ValueError("Lambda values must be positive: {}".format(lambda_values)) @@ -514,48 +648,73 @@ def lambda2_from_filename(filename, separator="_", index=-1): missing_combinations_mbar = [] missing_combinations_bar = [] for lambda_value, lambda_array in lamda_dict.items(): - missing_combinations_mbar.extend([(lambda_value, x) for x in lambda_values if x not in lambda_array]) - + missing_combinations_mbar.extend( + [(lambda_value, x) for x in lambda_values if x not in lambda_array] + ) + if missing_combinations_mbar: warnings.warn( - "The following combinations of lambda and lambda prime are missing for MBAR analysis: {}".format(missing_combinations_mbar) + "The following combinations of lambda and lambda prime are missing for MBAR analysis: {}".format( + missing_combinations_mbar + ) ) else: return lambda_values, lambda_pairs, lambda2 - + # Check for BAR content missing_combinations_bar = [] extra_combinations_bar = [] lambda_values.sort() for ind, (lambda_value, lambda_array) in enumerate(lamda_dict.items()): if ind == 0: - tmp_array = [lambda_values[ind], lambda_values[ind+1]] + tmp_array = [lambda_values[ind], lambda_values[ind + 1]] elif ind == len(lamda_dict) - 1: - tmp_array = [lambda_values[ind-1], lambda_values[ind]] + tmp_array = [lambda_values[ind - 1], lambda_values[ind]] else: - tmp_array = [lambda_values[ind-1], lambda_values[ind], lambda_values[ind+1]] + tmp_array = [ + lambda_values[ind - 1], + lambda_values[ind], + lambda_values[ind + 1], + ] + + missing_combinations_bar.extend( + [(lambda_value, x) for x in tmp_array if x not in lambda_array] + ) + extra_combinations_bar.extend( + [(lambda_value, x) for x in lambda_array if x not in tmp_array] + ) - missing_combinations_bar.extend([(lambda_value, x) for x in tmp_array if x not in lambda_array]) - extra_combinations_bar.extend([(lambda_value, x) for x in lambda_array if x not in tmp_array]) - if missing_combinations_bar: raise ValueError( - "BAR calculation cannot be performed without the following lambda-lambda prime combinations: {}".format(missing_combinations_bar) + "BAR calculation cannot be performed without the following lambda-lambda prime combinations: {}".format( + missing_combinations_bar + ) ) if extra_combinations_bar: warnings.warn( - "The following combinations of lambda and lambda prime are extra and being discarded for BAR analysis: {}".format(extra_combinations_bar) + "The following combinations of lambda and lambda prime are extra and being discarded for BAR analysis: {}".format( + extra_combinations_bar + ) ) lambda_pairs = [x for x in lambda_pairs if x not in extra_combinations_bar] return lambda_values, lambda_pairs, lambda2 -@_init_attrs -def extract_u_nk(fep_files, T, columns_lambda1=[2,3], column_u_nk=4, column_lambda2=None, indices=[1,2], units="real", - vdw_lambda=1): - """ This function will go into alchemlyb.parsing.lammps - Each file is imported as a data frame where the columns kept are either: +@_init_attrs +def extract_u_nk( + fep_files, + T, + columns_lambda1=[2, 3], + column_u_nk=4, + column_lambda2=None, + indices=[1, 2], + units="real", + vdw_lambda=1, +): + """This function will go into alchemlyb.parsing.lammps + + Each file is imported as a data frame where the columns kept are either: [0, columns_lambda1[0] columns_lambda1[1], column_u_nk] or if columns_lambda2 is not None: [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_u_nk] @@ -580,7 +739,7 @@ def extract_u_nk(fep_files, T, columns_lambda1=[2,3], column_u_nk=4, column_lamb containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` this list should be of length three, where the last value represents the invariant lambda. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" vdw_lambda : int, default=1 In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. @@ -594,27 +753,31 @@ def extract_u_nk(fep_files, T, columns_lambda1=[2,3], column_u_nk=4, column_lamb - temperature in K - energy unit in kT - + """ - # Collect Files + # Collect Files files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") if units == "real": - beta = 1 / (k_b * T) + beta = 1 / (k_b * T) elif units == "lj": beta = 1 / T else: - raise ValueError(f"LAMMPS unit type, {units}, is not supported. Supported types are: real and lj") + raise ValueError( + f"LAMMPS unit type, {units}, is not supported. Supported types are: real and lj" + ) if len(columns_lambda1) != 2: - raise ValueError(f"Provided columns for lambda1 must have a length of two, columns_lambda1: {columns_lambda1}") + raise ValueError( + f"Provided columns for lambda1 must have a length of two, columns_lambda1: {columns_lambda1}" + ) if not np.all([isinstance(x, int) for x in columns_lambda1]): raise ValueError( f"Provided column for columns_lambda1 must be type int. columns_lambda1: {columns_lambda1}, type: {[type(x) for x in columns_lambda1]}" - ) + ) if column_lambda2 is not None and not isinstance(column_lambda2, int): raise ValueError( f"Provided column for u_nk must be type int. column_u_nk: {column_lambda2}, type: {type(column_lambda2)}" @@ -627,76 +790,118 @@ def extract_u_nk(fep_files, T, columns_lambda1=[2,3], column_u_nk=4, column_lamb lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices) if column_lambda2 is None: - u_nk = pd.DataFrame(columns=["time", "fep-lambda"]+lambda_values) + u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) col_indices = [0] + list(columns_lambda1) + [column_u_nk] else: u_nk = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda"]) - lc = len(lambda_values)**2 + lc = len(lambda_values) ** 2 col_indices = [0] + list(columns_lambda1) + [column_lambda2, column_u_nk] for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - + data = pd.read_csv(file, sep=" ", comment="#") lx = len(data.columns) if [False for x in col_indices if x > lx]: - raise ValueError("Number of columns, {}, is less than index: {}".format(lx, col_indices)) - data = data.iloc[ :, col_indices] + raise ValueError( + "Number of columns, {}, is less than index: {}".format(lx, col_indices) + ) + data = data.iloc[:, col_indices] if column_lambda2 is None: - data.columns=["time", "fep-lambda", "fep-lambda2", "u_nk"] + data.columns = ["time", "fep-lambda", "fep-lambda2", "u_nk"] lambda1_col, lambda1_2_col = "fep-lambda", "fep-lambda2" columns_a = ["time", "fep-lambda"] columns_b = lambda_values else: columns_a = ["time", "coul-lambda", "vdw-lambda"] if vdw_lambda == 1: - data.columns=["time", "vdw-lambda", "vdw-lambda2", "coul-lambda", "u_nk"] + data.columns = [ + "time", + "vdw-lambda", + "vdw-lambda2", + "coul-lambda", + "u_nk", + ] lambda1_col, lambda1_2_col = "vdw-lambda", "vdw-lambda2" columns_b = [(lambda2, x) for x in lambda_values] elif vdw_lambda == 2: - data.columns=["time", "coul-lambda", "coul-lambda2", "vdw-lambda", "u_nk"] + data.columns = [ + "time", + "coul-lambda", + "coul-lambda2", + "vdw-lambda", + "u_nk", + ] lambda1_col, lambda1_2_col = "coul-lambda", "coul-lambda2" columns_b = [(x, lambda2) for x in lambda_values] else: - raise ValueError(f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'") - + raise ValueError( + f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'" + ) + for lambda1 in list(data[lambda1_col].unique()): - tmp_df = data.loc[ data[lambda1_col] == lambda1 ] + tmp_df = data.loc[data[lambda1_col] == lambda1] for lambda12 in list(tmp_df[lambda1_2_col].unique()): - tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12 ] - + tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12] + lr = tmp_df2.shape[0] if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: - u_nk = pd.concat([ - u_nk, - pd.concat([ - tmp_df2[columns_a], - pd.DataFrame( - np.zeros((lr,lc)), - columns=columns_b, - ) - ], axis=1) - ], axis=0, sort=False) + u_nk = pd.concat( + [ + u_nk, + pd.concat( + [ + tmp_df2[columns_a], + pd.DataFrame( + np.zeros((lr, lc)), + columns=columns_b, + ), + ], + axis=1, + ), + ], + axis=0, + sort=False, + ) column_name = lambda_values[ - [ii for ii,x in enumerate(lambda_values) if float(x) == lambda12][0] + [ii for ii, x in enumerate(lambda_values) if float(x) == lambda12][ + 0 + ] ] if column_lambda2 is not None: - column_name = (lambda2, column_name) if vdw_lambda == 1 else (column_name, lambda2) + column_name = ( + (lambda2, column_name) + if vdw_lambda == 1 + else (column_name, lambda2) + ) if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0: - raise ValueError("Energy values already available for lambda, {}, lambda', {}.".format(lambda1, lambda12)) - - if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] != tmp_df2["u_nk"].shape[0]: - raise ValueError("Number of energy values in file, {}, N={}, inconsistent with previous files of length, {}.".format( - file, - tmp_df2["u_nk"].shape[0], - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0], - )) - - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = beta * tmp_df2["u_nk"] + raise ValueError( + "Energy values already available for lambda, {}, lambda', {}.".format( + lambda1, lambda12 + ) + ) + + if ( + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] + != tmp_df2["u_nk"].shape[0] + ): + raise ValueError( + "Number of energy values in file, {}, N={}, inconsistent with previous files of length, {}.".format( + file, + tmp_df2["u_nk"].shape[0], + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[ + 0 + ], + ) + ) + + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = ( + beta * tmp_df2["u_nk"] + ) if column_lambda2 is None: u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -705,12 +910,23 @@ def extract_u_nk(fep_files, T, columns_lambda1=[2,3], column_u_nk=4, column_lamb return u_nk -@_init_attrs -def extract_dHdl(fep_files, T, column_lambda1=2, column_dlambda1=3, column_lambda2=None, - column_dlambda2=None, columns_derivative1=[10,11], columns_derivative2=[12,13], index=-1, units="real"): - """ This function will go into alchemlyb.parsing.lammps - Each file is imported as a data frame where the columns kept are either: +@_init_attrs +def extract_dHdl( + fep_files, + T, + column_lambda1=2, + column_dlambda1=3, + column_lambda2=None, + column_dlambda2=None, + columns_derivative1=[10, 11], + columns_derivative2=[12, 13], + index=-1, + units="real", +): + """This function will go into alchemlyb.parsing.lammps + + Each file is imported as a data frame where the columns kept are either: [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] or if columns_lambda2 is not None: [ @@ -728,12 +944,12 @@ def extract_dHdl(fep_files, T, column_lambda1=2, column_dlambda1=3, column_lambd column_lambda1 : int, default=2 Index for column (column number minus one) representing the lambda at which the system is equilibrated. column_dlambda1 : int, default=3 - Index for column (column number minus one) for the change in lambda. + Index for column (column number minus one) for the change in lambda. column_lambda2 : int, default=None - Index for column (column number minus one) for a second value of lambda. + Index for column (column number minus one) for a second value of lambda. If this array is ``None`` then we do not expect two lambda values. column_dlambda2 : int, default=None - Index for column (column number minus one) for the change in lambda2. + Index for column (column number minus one) for the change in lambda2. columns_derivative : list[int], default=[10,11] Indices for columns (column number minus one) representing the lambda at which to find the forward and backward distance. @@ -741,7 +957,7 @@ def extract_dHdl(fep_files, T, column_lambda1=2, column_dlambda1=3, column_lambd In provided file names, using underscore as a separator, this index marks the part of the filename containing the lambda information for :func:`alchemlyb.parsing._get_ti_lambdas`. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" Results ------- @@ -753,80 +969,133 @@ def extract_dHdl(fep_files, T, column_lambda1=2, column_dlambda1=3, column_lambd - temperature in K or dimensionless - energy unit in kT - + """ - # Collect Files + # Collect Files files = glob.glob(fep_files) if not files: raise ValueError("No files have been found that match: {}".format(fep_files)) if units == "real": - beta = 1 / (k_b * T) + beta = 1 / (k_b * T) elif units == "lj": beta = 1 / T else: - raise ValueError("LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format(units)) + raise ValueError( + "LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format( + units + ) + ) if not isinstance(column_lambda1, int): - raise ValueError("Provided column_lambda1 must be type 'int', instead: {}".format(type(column_lambda1))) + raise ValueError( + "Provided column_lambda1 must be type 'int', instead: {}".format( + type(column_lambda1) + ) + ) if column_lambda2 is not None and not isinstance(column_lambda2, int): - raise ValueError("Provided column_lambda2 must be type 'int', instead: {}".format(type(column_lambda2))) + raise ValueError( + "Provided column_lambda2 must be type 'int', instead: {}".format( + type(column_lambda2) + ) + ) if not isinstance(column_dlambda1, int): - raise ValueError("Provided column_dlambda1 must be type 'int', instead: {}".format(type(column_dlambda1))) + raise ValueError( + "Provided column_dlambda1 must be type 'int', instead: {}".format( + type(column_dlambda1) + ) + ) if column_dlambda2 is not None and not isinstance(column_dlambda2, int): - raise ValueError("Provided column_dlambda2 must be type 'int', instead: {}".format(type(column_dlambda2))) + raise ValueError( + "Provided column_dlambda2 must be type 'int', instead: {}".format( + type(column_dlambda2) + ) + ) if len(columns_derivative1) != 2: - raise ValueError("Provided columns for derivative values must have a length of two, columns_derivative1: {}".format( - columns_derivative1)) + raise ValueError( + "Provided columns for derivative values must have a length of two, columns_derivative1: {}".format( + columns_derivative1 + ) + ) if not np.all([isinstance(x, int) for x in columns_derivative1]): raise ValueError( "Provided column for columns_derivative1 must be type int. columns_derivative1: {}, type: {}".format( columns_derivative1, type([type(x) for x in columns_derivative1]) - )) + ) + ) if len(columns_derivative2) != 2: - raise ValueError("Provided columns for derivative values must have a length of two, columns_derivative2: {}".format( - columns_derivative2)) + raise ValueError( + "Provided columns for derivative values must have a length of two, columns_derivative2: {}".format( + columns_derivative2 + ) + ) if not np.all([isinstance(x, int) for x in columns_derivative2]): raise ValueError( "Provided column for columns_derivative1 must be type int. columns_derivative1: {}, type: {}".format( columns_derivative2, type([type(x) for x in columns_derivative2]) - )) + ) + ) if column_lambda2 is None: dHdl = pd.DataFrame(columns=["time", "fep-lambda", "fep"]) col_indices = [0, column_lambda1, column_dlambda1] + list(columns_derivative1) else: - dHdl = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda", "coul", "vdw"]) - col_indices = ([0, column_lambda2, column_lambda1, column_dlambda1, column_dlambda2] - + list(columns_derivative1) + list(columns_derivative2)) + dHdl = pd.DataFrame( + columns=["time", "coul-lambda", "vdw-lambda", "coul", "vdw"] + ) + col_indices = ( + [0, column_lambda2, column_lambda1, column_dlambda1, column_dlambda2] + + list(columns_derivative1) + + list(columns_derivative2) + ) for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - + data = pd.read_csv(file, sep=" ", comment="#") lx = len(data.columns) if [False for x in col_indices if x > lx]: - raise ValueError("Number of columns, {}, is less than index: {}".format(lx, col_indices)) - + raise ValueError( + "Number of columns, {}, is less than index: {}".format(lx, col_indices) + ) + data = data.iloc[:, col_indices] if column_lambda2 is None: data.columns = ["time", "fep-lambda", "dlambda", "dU_back", "dU_forw"] - data["fep"] = ( data.dU_forw - data.dU_back ) / ( 2 * data.dlambda ) + data["fep"] = (data.dU_forw - data.dU_back) / (2 * data.dlambda) data.drop(columns=["dlambda", "dU_back", "dU_forw"], inplace=True) - dHdl = pd.concat([ dHdl, data], axis=0, sort=False) + dHdl = pd.concat([dHdl, data], axis=0, sort=False) else: data.columns = [ - "time", "coul-lambda", "vdw-lambda", "dlambda_vdw", "dlambda_coul", - "dU_back_vdw", "dU_forw_vdw", "dU_back_coul", "dU_forw_coul", - ] - data["coul"] = ( data.dU_forw_coul - data.dU_back_coul ) / ( 2 * data.dlambda_coul ) - data["vdw"] = ( data.dU_forw_vdw - data.dU_back_vdw ) / ( 2 * data.dlambda_vdw ) - data.drop(columns=["dlambda_vdw", "dlambda_coul", "dU_back_coul", "dU_forw_coul", - "dU_back_vdw", "dU_forw_vdw"], inplace=True) - + "time", + "coul-lambda", + "vdw-lambda", + "dlambda_vdw", + "dlambda_coul", + "dU_back_vdw", + "dU_forw_vdw", + "dU_back_coul", + "dU_forw_coul", + ] + data["coul"] = (data.dU_forw_coul - data.dU_back_coul) / ( + 2 * data.dlambda_coul + ) + data["vdw"] = (data.dU_forw_vdw - data.dU_back_vdw) / (2 * data.dlambda_vdw) + data.drop( + columns=[ + "dlambda_vdw", + "dlambda_coul", + "dU_back_coul", + "dU_forw_coul", + "dU_back_vdw", + "dU_forw_vdw", + ], + inplace=True, + ) + if column_lambda2 is None: dHdl.set_index(["time", "fep-lambda"], inplace=True) dHdl.mul({"fep": beta}) @@ -834,4 +1103,4 @@ def extract_dHdl(fep_files, T, column_lambda1=2, column_dlambda1=3, column_lambd dHdl.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) dHdl.mul({"coul": beta, "vdw": beta}) - return dHdl \ No newline at end of file + return dHdl From 606d7f17e79bbefd8ed387586b4c3eac996ac95d Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 13:48:39 -0500 Subject: [PATCH 03/59] Update variables in generated files --- src/alchemlyb/parsing/lammps.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 2259b6c6..d6271e54 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -132,6 +132,9 @@ def generate_input_linear_approximation( f"variable paramstart equal {parameter_range[0]}\n", "variable TK equal TEMP\n", "variable PBAR equal PRESS\n", + "variable pinst equal press\n", + "variable tinst equal temp\n", + "variable pe equal pe\n", "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", "thermo ${freq}\n", "\n# Group atoms\n", @@ -169,7 +172,7 @@ def generate_input_linear_approximation( " compute pe_inter_3 solute group/group solvent pair no kspace yes\n", " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy &\n", " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_tinst v_pinst v_pe &\n", " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3 &\n", f" file files/linear_{name1}_" + "${param}.txt\n", "\n run ${runtime}\n\n", @@ -203,7 +206,7 @@ def generate_input_linear_approximation( file[-1:-1] = "unfix ADAPT2\n" ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_param2 v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_param2 v_tinst v_pinst v_pe&\n" ) file[ind + 2] = ( f" file files/linear_{name1}_" @@ -322,6 +325,9 @@ def generate_traj_input( f"variable paramstart equal {parameter_range[0]}\n", "variable TK equal TEMP\n", "variable PBAR equal PRESS\n", + "variable pinst equal press\n", + "variable tinst equal temp\n", + "variable pe equal pe\n", "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", "thermo ${freq}\n", "\n# Set-up Loop\n", @@ -349,7 +355,7 @@ def generate_traj_input( f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", " compute FEPdf all fep ${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe &\n", f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", "\n dump TRAJ all custom ${freq} " + f"files/dump_{name1}_" @@ -392,7 +398,7 @@ def generate_traj_input( ) ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe v_evdwl v_enthalpy &\n" + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" ) file[ind + 1] = ( f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" From ed085b64d2a9e43d816a687be3b33ef26ed279f9 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 14:48:21 -0500 Subject: [PATCH 04/59] Add input for mbar --- src/alchemlyb/parsing/lammps.py | 239 +++++++++++++++++++++++++++++++- 1 file changed, 238 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index d6271e54..c51ae303 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -328,13 +328,14 @@ def generate_traj_input( "variable pinst equal press\n", "variable tinst equal temp\n", "variable pe equal pe\n", + "\n", "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", "thermo ${freq}\n", "\n# Set-up Loop\n", "variable nblocks equal 1/v_delta", "variable runid loop 0 ${nblocks} pad\n", " label runloop1\n", - "\n# Adjust param for the box and equilibrate\n", + "\n # Adjust param for the box and equilibrate\n", " variable param equal v_paramstart-v_runid*v_delta\n", ' if "${runid} == 0" then &\n', ' "jump SELF skipequil"\n', @@ -420,6 +421,242 @@ def generate_traj_input( return file +def generate_mbar_input( + parameter, + parameter_range, + parameter_change, + pair_style, + types_solvent, + types_solute, + del_parameter=0.01, + output_file=None, + parameter2=None, + parameter2_value=None, + pair_style2=None, + del_parameter2=None, +): + """Outputs the section of a LAMMPS input file that loops over the values of parameter being changed (e.g., lambda) + Small perturbations in the potential energy are also output so that the derivative can be calculated for thermodynamic + integration. Trajectories are produces so that files for MBAR analysis may be generated in post-processing. + + The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains + the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS + + Parameters + ---------- + parameter : str + Parameter being varied, see table in `compute fep `_ for the options in + your pair-potential + parameter_range : list[float] + Range of parameter values to be changed where the first value should be the value with which the system has been + equilibrated. + parameter_change : float + The size of the step between parameter values. Take care that number of points needed to traverse the given range + should result in an integer, otherwise LAMMPS will not end at the desired value. + pair_style : str + String of LAMMPS pair style being changes + types_solvent : str + String defining atom types in the solvent (not spaces) + types_solute : str + String defining atom types in the solute (not spaces) + del_parameter : float, default=0.1 + Change used to calculate the forward and backward difference used to compute the derivative through a central difference + approximation. + output_file : str, default=None + File name and path for optional output file + parameter2 : str, default=None + Parameter that has been varied and is set to another value in this simulation, e.g., lambda when the Coulomb potential + is set to zero. Using this feature avoids complications with writing the pair potential information in the data file. + See table in `compute fep `_ for the options in your pair-potential + pair_style2 : str, default=None + String with LAMMPS pair style being set for ``parameter2`` + parameter2_value : float, default=None + Value to set ``parameter2`` + del_parameter2 : float, default=None + Change used to calculate the forward and backward difference used to compute the derivative through a central difference + approximation for parameter2. + + Returns + ------- + file : list[str] + List of strings representing lines in a file + + """ + nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + if nblocks % 1 > 0: + raise ValueError( + f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" + ) + else: + nblocks = int(nblocks) + + if any( + [ + x is not None + for x in [parameter2, pair_style2, parameter2_value, del_parameter2] + ] + ) and not all( + [ + x is not None + for x in [parameter2, pair_style2, parameter2_value, del_parameter2] + ] + ): + raise ValueError( + ( + f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " + + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}, del_parameter2={del_parameter2}" + ) + ) + name1 = "-".join([pair_style.replace("/", "-"), parameter]) + file = [ + "\n# Variables and System Conditions\n", + "variable freq equal 1000 # Consider changing\n", + "variable runtime equal 1000000\n", + f"variable delta equal {parameter_change} \n", + f"variable nblocks equal {nblocks} \n", + f"variable deltacdm equal {del_parameter} # delta used in central different method for derivative in TI\n", + f"variable paramstart equal {parameter_range[0]}\n", + "variable TK equal TEMP\n", + "variable PBAR equal PRESS\n", + "variable pinst equal press\n", + "variable tinst equal temp\n", + "variable pe equal pe\n", + "\n", + "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", + "thermo ${freq}\n", + "\n# Set-up Loop\n", + "variable nblocks equal 1/v_delta", + "variable runid loop 0 ${nblocks} pad\n", + " label runloop1\n", + "\n # Adjust param for the box and equilibrate\n", + " variable param equal v_paramstart-v_runid*v_delta\n", + ' if "${runid} == 0" then &\n', + ' "jump SELF skipequil"\n', + " variable param0 equal v_paramstart-(v_runid-1)*v_delta\n", + " variable paramramp equal ramp(v_param0,v_param)\n", + " fix ADAPT all adapt/fep ${freq} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", + " thermo_style custom v_vstep v_time v_paramramp temp press pe evdwl enthalpy\n", + " run ${runtime} # Run Ramp\n", + " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " run ${runtime} # Run Equil\n", + "\n label skipequil\n\n", + f" write_data files/npt_{name1}_" + "${param}.data\n", + "\n # Initialize computes\n", + " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " variable deltacdm2 equal -v_deltacdm\n", + " compute FEPdb all fep ${TK} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", + " compute FEPdf all fep ${TK} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe &\n", + f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", + "\n dump TRAJ all custom ${freq} " + + f"files/dump_{name1}_" + + "${param}.lammpstrj id mol type element xu yu zu\n", + "\n run ${runtime}\n\n", + " uncompute FEPdb\n", + " uncompute FEPdf\n", + ' if "${runid} != 0" then &\n', + ' "unfix ADAPT"\n', + " unfix FEPout\n", + " undump TRAJ\n", + "\n next runid\n", + " jump SELF runloop1\n", + "write_data npt.data nocoeff\n", + ] + + file2 = [] + for i in range(nblocks): + tmp = [ + " variable delta{0:0d} ".format(i) + f"(v_runid-{i})*v_delta\n", + " compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", + f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_delta{i}\n", + " variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), + " fix FEPout{0:03d} all".format(i) + + " ave/time ${freq} 1 ${freq} " + + "v_time v_param v_param{0:03d} &\n".format(i), + " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + + f" file files/mbar_{name1}" + + "_${param}_${param" + + str("{0:03d}".format(i)) + + "}.txt\n\n", + ] + if parameter2 is not None: + ind = [ii for ii, x in enumerate(tmp) if "fix FEPout" in x][0] + tmp[ind : ind + 2] = [ + " fix FEPout{0:03d} all".format(i) + + " ave/time ${freq} 1 ${freq} " + + "v_time v_param v_param{0:03d} v_param2 &\n".format(i), + " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + + f" file files/mbar_{name1}" + + "_${param}_${param" + + str("{0:03d}".format(i)) + + "}_" + + "{}_{}.txt\n\n".format(name2, parameter2_value), + ] + file2.extend(tmp) + file[39:39] = file2 + + file2 = [] + for i in range(nblocks): + file2.extend( + [ + " uncompute FEP{0:03d}\n".format(i), + " unfix FEPout{0:03d}\n".format(i), + ] + ) + print(file2) + file[-7:-7] = file2 + + if parameter2 is not None: + name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) + file[6:6] = (f"variable delta2cdm equal {del_parameter2}\n",) + file2 = [ + "\n# Set Previous Change\n", + f"variable param2 equal {parameter2_value}\n", + "fix ADAPT2 all adapt/fep 1 &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", + "variable delta2cdm2 equal -v_delta2cdm\n", + "compute FEP2db all fep ${TK} &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm2\n", + "compute FEP2df all fep ${TK} &\n", + f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm\n", + ] + file[14:14] = file2 + file[-1:-1] = "unfix ADAPT2\n" + file[-1:-1] = "uncompute FEP2db\n" + file[-1:-1] = "uncompute FEP2df\n" + ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] + file[ind] = ( + f" write_data files/npt_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.data\n" + ) + ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] + file[ind] = ( + " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" + ) + file[ind + 1] = ( + f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.txt\n" + ) + file[ind + 2] = ( + "\n dump TRAJ all custom ${freq} " + + f"files/dump_{name1}_" + + "${param}_" + + f"{name2}_{parameter2_value}.lammpstrj id mol type element xu yu zu\n" + ) + + if output_file is not None: + with open(output_file, "w") as f: + for line in file: + f.write(line) + + return file + + def generate_rerun_mbar( parameter_value, parameter, From d64101adf819e854f1ed7606ce3cf7ed77d946b6 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 14:49:28 -0500 Subject: [PATCH 05/59] Remove print --- src/alchemlyb/parsing/lammps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index c51ae303..445fdfc5 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -606,7 +606,6 @@ def generate_mbar_input( " unfix FEPout{0:03d}\n".format(i), ] ) - print(file2) file[-7:-7] = file2 if parameter2 is not None: From df567ea56cd8a81b4e12e0b75dc3e1a02b3893f4 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 15:15:48 -0500 Subject: [PATCH 06/59] Bug fix file outputs --- src/alchemlyb/parsing/lammps.py | 35 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 445fdfc5..39652d9e 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -151,9 +151,9 @@ def generate_input_linear_approximation( " variable paramramp equal ramp(v_param0,v_param)\n", " fix ADAPT all adapt/fep ${freq} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", - " thermo_style custom v_vstep v_time v_paramramp temp press pe evdwl enthalpy\n", + " thermo_style custom v_paramramp temp press pe evdwl enthalpy\n", " run ${runtime} # Run Ramp\n", - " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " thermo_style custom v_param temp press pe evdwl enthalpy\n", " run ${runtime} # Run Equil\n", "\n label skipequil\n\n", f" write_data files/npt_{name1}_" + "${param}.data\n", @@ -170,7 +170,7 @@ def generate_input_linear_approximation( " compute pe_solvent_3 solvent group/group solvent pair no kspace yes\n", " compute pe_inter_2 solute group/group solvent pair yes kspace no\n", " compute pe_inter_3 solute group/group solvent pair no kspace yes\n", - " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy &\n", + " thermo_style custom v_param temp press pe evdwl enthalpy &\n", " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3\n", " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_tinst v_pinst v_pe &\n", " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3 &\n", @@ -206,7 +206,7 @@ def generate_input_linear_approximation( file[-1:-1] = "unfix ADAPT2\n" ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_param2 v_tinst v_pinst v_pe&\n" + " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_param2 v_tinst v_pinst v_pe&\n" ) file[ind + 2] = ( f" file files/linear_{name1}_" @@ -332,7 +332,6 @@ def generate_traj_input( "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", "thermo ${freq}\n", "\n# Set-up Loop\n", - "variable nblocks equal 1/v_delta", "variable runid loop 0 ${nblocks} pad\n", " label runloop1\n", "\n # Adjust param for the box and equilibrate\n", @@ -343,20 +342,20 @@ def generate_traj_input( " variable paramramp equal ramp(v_param0,v_param)\n", " fix ADAPT all adapt/fep ${freq} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", - " thermo_style custom v_vstep v_time v_paramramp temp press pe evdwl enthalpy\n", + " thermo_style custom v_paramramp temp press pe evdwl enthalpy\n", " run ${runtime} # Run Ramp\n", - " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " thermo_style custom v_param temp press pe evdwl enthalpy\n", " run ${runtime} # Run Equil\n", "\n label skipequil\n\n", f" write_data files/npt_{name1}_" + "${param}.data\n", "\n # Initialize computes\n", - " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " thermo_style custom v_param temp press pe evdwl enthalpy\n", " variable deltacdm2 equal -v_deltacdm\n", " compute FEPdb all fep ${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", " compute FEPdf all fep ${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe &\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_tinst v_pinst v_pe &\n", f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", "\n dump TRAJ all custom ${freq} " + f"files/dump_{name1}_" @@ -399,7 +398,7 @@ def generate_traj_input( ) ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" + " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" ) file[ind + 1] = ( f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" @@ -525,7 +524,7 @@ def generate_mbar_input( "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", "thermo ${freq}\n", "\n# Set-up Loop\n", - "variable nblocks equal 1/v_delta", + "variable nblocks equal 1/v_delta\n", "variable runid loop 0 ${nblocks} pad\n", " label runloop1\n", "\n # Adjust param for the box and equilibrate\n", @@ -536,20 +535,20 @@ def generate_mbar_input( " variable paramramp equal ramp(v_param0,v_param)\n", " fix ADAPT all adapt/fep ${freq} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", - " thermo_style custom v_vstep v_time v_paramramp temp press pe evdwl enthalpy\n", + " thermo_style custom v_paramramp temp press pe evdwl enthalpy\n", " run ${runtime} # Run Ramp\n", - " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " thermo_style custom v_param temp press pe evdwl enthalpy\n", " run ${runtime} # Run Equil\n", "\n label skipequil\n\n", f" write_data files/npt_{name1}_" + "${param}.data\n", "\n # Initialize computes\n", - " thermo_style custom v_vstep v_time v_param temp press pe evdwl enthalpy\n", + " thermo_style custom v_param temp press pe evdwl enthalpy\n", " variable deltacdm2 equal -v_deltacdm\n", " compute FEPdb all fep ${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", " compute FEPdf all fep ${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_tinst v_pinst v_pe &\n", + " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_tinst v_pinst v_pe &\n", f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", "\n dump TRAJ all custom ${freq} " + f"files/dump_{name1}_" @@ -569,7 +568,7 @@ def generate_mbar_input( file2 = [] for i in range(nblocks): tmp = [ - " variable delta{0:0d} ".format(i) + f"(v_runid-{i})*v_delta\n", + " variable delta{0:0d} equal ".format(i) + f"(v_runid-{i})*v_delta\n", " compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_delta{i}\n", " variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), @@ -596,7 +595,7 @@ def generate_mbar_input( + "{}_{}.txt\n\n".format(name2, parameter2_value), ] file2.extend(tmp) - file[39:39] = file2 + file[40:40] = file2 file2 = [] for i in range(nblocks): @@ -634,7 +633,7 @@ def generate_mbar_input( ) ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_vstep v_time v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" + " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" ) file[ind + 1] = ( f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" From b92901e9bb8c216e56750877d947fef8996b3ad0 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 5 Feb 2024 15:18:21 -0500 Subject: [PATCH 07/59] Bug fix file format --- src/alchemlyb/parsing/lammps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 39652d9e..7c2a432d 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -574,7 +574,7 @@ def generate_mbar_input( " variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), " fix FEPout{0:03d} all".format(i) + " ave/time ${freq} 1 ${freq} " - + "v_time v_param v_param{0:03d} &\n".format(i), + + "v_param v_param{0:03d} &\n".format(i), " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + f" file files/mbar_{name1}" + "_${param}_${param" @@ -586,7 +586,7 @@ def generate_mbar_input( tmp[ind : ind + 2] = [ " fix FEPout{0:03d} all".format(i) + " ave/time ${freq} 1 ${freq} " - + "v_time v_param v_param{0:03d} v_param2 &\n".format(i), + + "v_param v_param{0:03d} v_param2 &\n".format(i), " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + f" file files/mbar_{name1}" + "_${param}_${param" @@ -773,7 +773,7 @@ def generate_rerun_mbar( "variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), "fix FEPout{0:03d} all".format(i) + " ave/time ${freq} 1 ${freq} " - + "v_time v_param v_param{0:03d} &\n".format(i), + + "v_param v_param{0:03d} &\n".format(i), " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + f" file files/mbar_{name1}" + "_${param}_${param" @@ -785,7 +785,7 @@ def generate_rerun_mbar( tmp[ind : ind + 2] = [ "fix FEPout{0:03d} all".format(i) + " ave/time ${freq} 1 ${freq} " - + "v_time v_param v_param{0:03d} v_param2 &\n".format(i), + + "v_param v_param{0:03d} v_param2 &\n".format(i), " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) + f" file files/mbar_{name1}" + "_${param}_${param" From a3c53556fd1fb9e755aa0c578fa923ef915c2e7c Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 7 Feb 2024 15:24:16 -0500 Subject: [PATCH 08/59] Bug fix param start, add dataframe for potential --- src/alchemlyb/parsing/lammps.py | 140 ++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 6 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 7c2a432d..1a92c1ff 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -129,7 +129,7 @@ def generate_input_linear_approximation( "variable runtime equal 1000000\n", f"variable delta equal {parameter_change} \n", f"variable nblocks equal {nblocks} \n", - f"variable paramstart equal {parameter_range[0]}\n", + f"variable paramstart equal {parameter_range[1]}\n", "variable TK equal TEMP\n", "variable PBAR equal PRESS\n", "variable pinst equal press\n", @@ -322,7 +322,7 @@ def generate_traj_input( f"variable delta equal {parameter_change} \n", f"variable nblocks equal {nblocks} \n", f"variable deltacdm equal {del_parameter} # delta used in central different method for derivative in TI\n", - f"variable paramstart equal {parameter_range[0]}\n", + f"variable paramstart equal {parameter_range[1]}\n", "variable TK equal TEMP\n", "variable PBAR equal PRESS\n", "variable pinst equal press\n", @@ -514,7 +514,7 @@ def generate_mbar_input( f"variable delta equal {parameter_change} \n", f"variable nblocks equal {nblocks} \n", f"variable deltacdm equal {del_parameter} # delta used in central different method for derivative in TI\n", - f"variable paramstart equal {parameter_range[0]}\n", + f"variable paramstart equal {parameter_range[1]}\n", "variable TK equal TEMP\n", "variable PBAR equal PRESS\n", "variable pinst equal press\n", @@ -582,6 +582,7 @@ def generate_mbar_input( + "}.txt\n\n", ] if parameter2 is not None: + name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) ind = [ii for ii, x in enumerate(tmp) if "fix FEPout" in x][0] tmp[ind : ind + 2] = [ " fix FEPout{0:03d} all".format(i) @@ -1203,8 +1204,8 @@ def extract_dHdl( Results ------- dHdl : pandas.Dataframe - Dataframe of potential energy for each alchemical state (k) for each frame (n). - Note that the units for timestamps are not considered in the calculation. + Dataframe of the derivative for the potential energy for each alchemical state (k) + for each frame (n). Note that the units for timestamps are not considered in the calculation. Attributes @@ -1305,10 +1306,10 @@ def extract_dHdl( data = data.iloc[:, col_indices] if column_lambda2 is None: + # dU_back: U(l-dl) - U(l); dU_forw: U(l+dl) - U(l) data.columns = ["time", "fep-lambda", "dlambda", "dU_back", "dU_forw"] data["fep"] = (data.dU_forw - data.dU_back) / (2 * data.dlambda) data.drop(columns=["dlambda", "dU_back", "dU_forw"], inplace=True) - dHdl = pd.concat([dHdl, data], axis=0, sort=False) else: data.columns = [ "time", @@ -1336,6 +1337,7 @@ def extract_dHdl( ], inplace=True, ) + dHdl = pd.concat([dHdl, data], axis=0, sort=False) if column_lambda2 is None: dHdl.set_index(["time", "fep-lambda"], inplace=True) @@ -1345,3 +1347,129 @@ def extract_dHdl( dHdl.mul({"coul": beta, "vdw": beta}) return dHdl + + +@_init_attrs +def extract_H( + fep_files, + T, + column_lambda1=2, + column_pe=5, + column_lambda2=None, + units="real", +): + """This function will go into alchemlyb.parsing.lammps + + Each file is imported as a data frame where the columns kept are either: + [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] + or if columns_lambda2 is not None: + [ + 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, + columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], columns_derivative2[1] + ] + + Parameters + ---------- + filenames : str + Path to fepout file(s) to extract data from. Filenames and paths are + aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". + temperature : float + Temperature in Kelvin at which the simulation was sampled. + column_lambda1 : int, default=2 + Index for column (column number minus one) representing the lambda at which the system is equilibrated. + column_pe : int, default=5 + Index for column (column number minus one) representing the potential energy of the system. + column_lambda2 : int, default=None + Index for column (column number minus one) for a second value of lambda. + If this array is ``None`` then we do not expect two lambda values. + units : str, default="real" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + + Results + ------- + H : pandas.Dataframe + Dataframe of potential energy for each alchemical state (k) for each frame (n). + Note that the units for timestamps are not considered in the calculation. + + Attributes + + - temperature in K or dimensionless + - energy unit in kT + + """ + + # Collect Files + files = glob.glob(fep_files) + if not files: + raise ValueError("No files have been found that match: {}".format(fep_files)) + + if units == "real": + beta = 1 / (k_b * T) + elif units == "lj": + beta = 1 / T + else: + raise ValueError( + "LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format( + units + ) + ) + + if not isinstance(column_lambda1, int): + raise ValueError( + "Provided column_lambda1 must be type 'int', instead: {}".format( + type(column_lambda1) + ) + ) + if not isinstance(column_pe, int): + raise ValueError( + "Provided column_pe must be type 'int', instead: {}".format( + type(column_pe) + ) + ) + if column_lambda2 is not None and not isinstance(column_lambda2, int): + raise ValueError( + "Provided column_lambda2 must be type 'int', instead: {}".format( + type(column_lambda2) + ) + ) + + if column_lambda2 is None: + df_H = pd.DataFrame(columns=["time", "fep-lambda", "U"]) + col_indices = [0, column_lambda1, column_pe] + else: + df_H = pd.DataFrame( + columns=["time", "coul-lambda", "vdw-lambda", "U"] + ) + col_indices = [0, column_lambda2, column_lambda1, column_pe] + + for file in files: + if not os.path.isfile(file): + raise ValueError("File not found: {}".format(file)) + + data = pd.read_csv(file, sep=" ", comment="#") + lx = len(data.columns) + if [False for x in col_indices if x > lx]: + raise ValueError( + "Number of columns, {}, is less than index: {}".format(lx, col_indices) + ) + + data = data.iloc[:, col_indices] + if column_lambda2 is None: + data.columns = ["time", "fep-lambda", "U"] + else: + data.columns = [ + "time", + "coul-lambda", + "vdw-lambda", + "U", + ] + df_H = pd.concat([df_H, data], axis=0, sort=False) + + + if column_lambda2 is None: + df_H.set_index(["time", "fep-lambda"], inplace=True) + else: + df_H.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) + df_H.mul({"U": beta}) + + return df_H \ No newline at end of file From 0508cbc35462b8c3360369363f5651e7e75bf4ed Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 7 Feb 2024 16:02:19 -0500 Subject: [PATCH 09/59] Bug fix loops in mbar generation --- src/alchemlyb/parsing/lammps.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 1a92c1ff..8c5ab74d 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -566,7 +566,7 @@ def generate_mbar_input( ] file2 = [] - for i in range(nblocks): + for i in range(nblocks+1): tmp = [ " variable delta{0:0d} equal ".format(i) + f"(v_runid-{i})*v_delta\n", " compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", @@ -599,7 +599,7 @@ def generate_mbar_input( file[40:40] = file2 file2 = [] - for i in range(nblocks): + for i in range(nblocks+1): file2.extend( [ " uncompute FEP{0:03d}\n".format(i), @@ -763,7 +763,7 @@ def generate_rerun_mbar( + f"{name2}_{parameter2_value}.data\n" ) - for i in range(nblocks): + for i in range(nblocks+1): value2 = parameter_range[0] + parameter_change * i delta = value2 - parameter_value tmp = "variable delta{0:0d}".format(i) + " equal {0:." + str(prec) + "f}\n" From 79c7d7d13afc406292001756b7a28cfacaf9a95f Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 20 Feb 2024 10:47:34 -0500 Subject: [PATCH 10/59] Add precision values for processing lammps input files --- src/alchemlyb/parsing/lammps.py | 128 ++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 39 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 8c5ab74d..ce11d16d 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -2,7 +2,7 @@ For clarity, we would like to distinguish the difference between $\lambda$ and $\lambda'$. We refer to $\lambda$ as the potential scaling of the equilibrated system, so that when this value is changed, the system undergoes another equilibration -step. One the otherhand, $\lambda'$ is the value used to scaled the potentials for the configurations of the system equilibrated +step. One the other hand, $\lambda'$ is the value used to scaled the potentials for the configurations of the system equilibrated for $\lambda$. The value of $\lambda'$ is used in two instances. First, in thermodynamic integration (TI), values of $\lambda'$ that are very close to $\lambda$ can be used to calculate the derivative. This is needed because LAMMPS does not compute explicit derivatives, although one should check whether they can derive an explicit expression, they cannot for changes of @@ -816,8 +816,69 @@ def generate_rerun_mbar( return file +def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): + """ Pull a tuple representing the lambda values used, as defined by the filenames. -def _get_bar_lambdas(fep_files, indices=[2, 3]): + Parameters + ---------- + filename : str + Filename and path + separator : str, default="_" + Separator used to breakup the filename. The choice in ``indices`` is dependent on this choice. + indices : list, default=[2, 3] + Indices used to pull :math:`\lambda` and :math:`\lambda'` + prec : int, default=4 + Number of decimal points in the output. + + Returns + ------- + tuple[float] + Tuple of lambda values + + """ + + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) + if not _isfloat(name_array[indices[0]]): + raise ValueError( + f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" + ) + if not _isfloat(name_array[indices[1]]): + raise ValueError( + f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" + ) + return (round(float(name_array[indices[0]]), prec), round(float(name_array[indices[1]]), prec)) + +def _lambda2_from_filename(filename, separator="_", index=-1, prec=4): + """ Pull the :math:`\lambda'` value, as defined by the filenames. + + Here :math:`\lambda'` is the scaling value applied to a configuration that is equilibrated to + a different value of :math:`\lambda`. + + Parameters + ---------- + filename : str + Filename and path + separator : str, default="_" + Separator used to breakup the filename. The choice in ``index`` is dependent on this choice. + index : list, default=1 + Index used to pull :math:`\lambda'` + prec : int, default=4 + Number of decimal points in the output. + + Returns + ------- + float + Lambda prime value + + """ + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) + if not _isfloat(name_array[index]): + raise ValueError( + f"Entry, {index} in filename cannot be converted to float: {name_array[index]}" + ) + return round(float(name_array[index]), prec) + +def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4): """Retrieves all lambda values from FEP filenames. Parameters @@ -827,6 +888,8 @@ def _get_bar_lambdas(fep_files, indices=[2, 3]): indices : list[int], default=[1,2] In provided file names, using underscore as a separator, these indices mark the part of the filename containing the lambda information. + prec : int, default=4 + Number of decimal places defined used in ``round()`` function. Returns ------- @@ -837,34 +900,10 @@ def _get_bar_lambdas(fep_files, indices=[2, 3]): """ - def tuple_from_filename(filename, separator="_", indices=[2, 3]): - name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split( - separator - ) - if not _isfloat(name_array[indices[0]]): - raise ValueError( - f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" - ) - if not _isfloat(name_array[indices[1]]): - raise ValueError( - f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" - ) - return (float(name_array[indices[0]]), float(name_array[indices[1]])) - - def lambda2_from_filename(filename, separator="_", index=-1): - name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split( - separator - ) - if not _isfloat(name_array[index]): - raise ValueError( - f"Entry, {index} in filename cannot be converted to float: {name_array[index]}" - ) - return float(name_array[index]) - - lambda_pairs = [tuple_from_filename(y, indices=indices) for y in fep_files] + lambda_pairs = [_tuple_from_filename(y, indices=indices, prec=prec) for y in fep_files] if len(indices) == 3: lambda2 = list( - set([lambda2_from_filename(y, index=indices[2]) for y in fep_files]) + set([_lambda2_from_filename(y, index=indices[2], prec=prec) for y in fep_files]) ) if len(lambda2) > 1: raise ValueError( @@ -880,7 +919,7 @@ def lambda2_from_filename(filename, separator="_", index=-1): raise ValueError( "Lambda values must be convertible to floats: {}".format(check_float) ) - if [x for x in lambda_values if float(x) < 0]: + if [x for x in lambda_values if round(float(x), prec) < 0]: raise ValueError("Lambda values must be positive: {}".format(lambda_values)) # check that all needed lamba combinations are present @@ -947,12 +986,13 @@ def lambda2_from_filename(filename, separator="_", index=-1): def extract_u_nk( fep_files, T, - columns_lambda1=[2, 3], + columns_lambda1=[1,2], column_u_nk=4, column_lambda2=None, indices=[1, 2], units="real", vdw_lambda=1, + prec=4, ): """This function will go into alchemlyb.parsing.lammps @@ -968,7 +1008,7 @@ def extract_u_nk( aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". temperature : float Temperature in Kelvin at which the simulation was sampled. - columns_lambda1 : list[int] + columns_lambda1 : list[int], default=[1,2] Indices for columns (column number minus one) representing (1) the lambda at which the system is equilibrated and (2) the lambda used in the computation of the potential energy. column_u_nk : int, default=4 @@ -984,7 +1024,9 @@ def extract_u_nk( Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" vdw_lambda : int, default=1 In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. - + prec : int, default=4 + Number of decimal places defined used in ``round()`` function. + Results ------- u_nk_df : pandas.Dataframe @@ -1029,7 +1071,7 @@ def extract_u_nk( f"Provided column for u_nk must be type int. column_u_nk: {column_u_nk}, type: {type(column_u_nk)}" ) - lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices) + lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices, prec=prec) if column_lambda2 is None: u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) @@ -1056,6 +1098,9 @@ def extract_u_nk( lambda1_col, lambda1_2_col = "fep-lambda", "fep-lambda2" columns_a = ["time", "fep-lambda"] columns_b = lambda_values + data[[lambda1_col, lambda1_2_col]] = data[[lambda1_col, lambda1_2_col]].apply( + lambda x: round(x, prec) + ) else: columns_a = ["time", "coul-lambda", "vdw-lambda"] if vdw_lambda == 1: @@ -1082,6 +1127,9 @@ def extract_u_nk( raise ValueError( f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'" ) + data[columns_a[1:]+[lambda1_2_col]] = data[columns_a[1:]+[lambda1_2_col]].apply( + lambda x: round(x, prec) + ) for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] @@ -1108,12 +1156,14 @@ def extract_u_nk( axis=0, sort=False, ) - - column_name = lambda_values[ - [ii for ii, x in enumerate(lambda_values) if float(x) == lambda12][ - 0 - ] - ] + + column_list = [ii for ii, x in enumerate(lambda_values) if round(float(x), prec) == lambda12] + if not column_list: + raise ValueError("Lambda values found in files do not align with those in the filenames. " \ + "Check that 'column_indices' are defined correctly.") + else: + column_name = lambda_values[column_list[0]] + if column_lambda2 is not None: column_name = ( (lambda2, column_name) From 7d01677e40d70e5bcd9825393f18de73987f1618 Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 20 Feb 2024 14:42:00 -0500 Subject: [PATCH 11/59] Add error catch for column definition in extract_u_nk --- src/alchemlyb/parsing/lammps.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index ce11d16d..a1602bd1 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -987,7 +987,7 @@ def extract_u_nk( fep_files, T, columns_lambda1=[1,2], - column_u_nk=4, + column_u_nk=3, column_lambda2=None, indices=[1, 2], units="real", @@ -1160,7 +1160,7 @@ def extract_u_nk( column_list = [ii for ii, x in enumerate(lambda_values) if round(float(x), prec) == lambda12] if not column_list: raise ValueError("Lambda values found in files do not align with those in the filenames. " \ - "Check that 'column_indices' are defined correctly.") + "Check that 'columns_lambda' are defined correctly.") else: column_name = lambda_values[column_list[0]] @@ -1194,6 +1194,9 @@ def extract_u_nk( u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = ( beta * tmp_df2["u_nk"] ) + if lambda1 == lambda12 and u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0: + raise ValueError(f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," \ + " Check that 'column_u_nk' was defined correctly.") if column_lambda2 is None: u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -1207,8 +1210,8 @@ def extract_u_nk( def extract_dHdl( fep_files, T, - column_lambda1=2, - column_dlambda1=3, + column_lambda1=1, + column_dlambda1=2, column_lambda2=None, column_dlambda2=None, columns_derivative1=[10, 11], From 1288fd01ce6e55e30f33e9f13b9eca76fe6fda32 Mon Sep 17 00:00:00 2001 From: jac16 Date: Thu, 22 Feb 2024 15:51:54 -0500 Subject: [PATCH 12/59] Add abs to nblocks calc --- src/alchemlyb/parsing/lammps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index a1602bd1..5971f58f 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -103,7 +103,7 @@ def generate_input_linear_approximation( List of strings representing lines in a file """ - nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: raise ValueError( "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( @@ -289,7 +289,7 @@ def generate_traj_input( List of strings representing lines in a file """ - nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: raise ValueError( f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" @@ -481,7 +481,7 @@ def generate_mbar_input( List of strings representing lines in a file """ - nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: raise ValueError( f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" @@ -708,7 +708,7 @@ def generate_rerun_mbar( List of strings representing lines in a file """ - nblocks = (parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change if nblocks % 1 > 0: raise ValueError( "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( From 304b92402a9a2a1a1bea9c99b22038b30bdb9874 Mon Sep 17 00:00:00 2001 From: jac16 Date: Thu, 22 Feb 2024 15:55:31 -0500 Subject: [PATCH 13/59] Bug fix, nblocks --- src/alchemlyb/parsing/lammps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 5971f58f..c2bb2d61 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -103,7 +103,7 @@ def generate_input_linear_approximation( List of strings representing lines in a file """ - nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) if nblocks % 1 > 0: raise ValueError( "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( @@ -289,7 +289,7 @@ def generate_traj_input( List of strings representing lines in a file """ - nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) if nblocks % 1 > 0: raise ValueError( f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" @@ -481,7 +481,7 @@ def generate_mbar_input( List of strings representing lines in a file """ - nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) if nblocks % 1 > 0: raise ValueError( f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" @@ -708,7 +708,7 @@ def generate_rerun_mbar( List of strings representing lines in a file """ - nblocks = abs(parameter_range[1] - parameter_range[0]) / parameter_change + nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) if nblocks % 1 > 0: raise ValueError( "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( From 552c0d3f1093dad33cad71216ab7e0857c2ff96b Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 10:01:38 -0400 Subject: [PATCH 14/59] Add calculation of beta based on lammps units Add generation of u_nk and dHdl from linearly related dependencies --- src/alchemlyb/parsing/lammps.py | 1095 +++++++++---------------------- 1 file changed, 293 insertions(+), 802 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index c2bb2d61..6ea00078 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -11,7 +11,7 @@ The parsers featured in this module are constructed to parse LAMMPS output files output using the [`fix ave/time command`](https://docs.lammps.org/fix_ave_time.html), containing data for given potential energy values (an approximation of the Hamiltonian) at specified values of $\lambda$ and $\lambda'$, $U_{\lambda,\lambda'}$. Because generating -the input files can be combersome, functions have been included to generate the appropriate sections. If a linear approximation +the input files can be cumbersome, functions have been included to generate the appropriate sections. If a linear approximation can be made to calculate $U_{\lambda,\lambda'}$ from $U_{\lambda}$ in post-processing, we recommend using :func:`alchemlyb.parsing.generate_input_linear_approximation()`. If a linear approximation cannot be made (such as changing $\lambda$ in the soft-LJ potential) we recommend running a loop over all values of $\lambda$ saving frames spaced to be @@ -35,13 +35,11 @@ import numpy as np import pandas as pd import glob +from scipy import constants from . import _init_attrs from ..postprocessors.units import R_kJmol, kJ2kcal -k_b = R_kJmol * kJ2kcal - - def _isfloat(x): try: float(x) @@ -49,772 +47,59 @@ def _isfloat(x): except ValueError: return False - -def generate_input_linear_approximation( - parameter, - parameter_range, - parameter_change, - pair_style, - types_solvent, - types_solute, - output_file=None, - parameter2=None, - parameter2_value=None, - pair_style2=None, -): - """Outputs the section of a LAMMPS input file that separates the Coulomb, nonbonded, and bond/angle/torsional contributions - of the solute and solvent. As long as the parameter being changed is linearly dependent on the potential energy, these files for - each value of the parameter can be used for thermodynamic integration (TI) or multi-state Bennett acceptance ratio (MBAR). - - The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains - the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS - - Parameters - ---------- - parameter : str - Parameter being varied, see table in `compute fep `_ for the options in - your pair-potential - parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been - equilibrated. - parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range - should result in an integer, otherwise LAMMPS will not end at the desired value. - pair_style : str - String with LAMMPS pair style being altered - types_solvent : str - String defining atom types in the solvent (with no spaces, e.g., *4) - types_solute : str - String defining atom types in the solute (with no spaces, e.g., 5*9) - output_file : str, default=None - File name and path for optional output file - parameter2 : str, default=None - Parameter that has been varied and is set to another value in this simulation, e.g., lambda when the Coulomb potential - is set to zero. Using this feature avoids complications with writing the pair potential information in the data file. - See table in `compute fep `_ for the options in your pair-potential - pair_style2 : str, default=None - String with LAMMPS pair style for ``parameter2`` - parameter2_value : float, default=None - Value to set ``parameter2`` - - Returns - ------- - file : list[str] - List of strings representing lines in a file - - """ - nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) - if nblocks % 1 > 0: - raise ValueError( - "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( - parameter_range, parameter_change - ) - ) - else: - nblocks = int(nblocks) - - if any( - [x is not None for x in [parameter2, pair_style2, parameter2_value]] - ) and not all([x is not None for x in [parameter2, pair_style2, parameter2_value]]): - raise ValueError( - ( - f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}" - ) - ) - name1 = "-".join([pair_style.replace("/", "-"), parameter]) - file = [ - "\n# Variables and System Conditions\n", - "variable freq equal 1000 # Consider changing\n", - "variable runtime equal 1000000\n", - f"variable delta equal {parameter_change} \n", - f"variable nblocks equal {nblocks} \n", - f"variable paramstart equal {parameter_range[1]}\n", - "variable TK equal TEMP\n", - "variable PBAR equal PRESS\n", - "variable pinst equal press\n", - "variable tinst equal temp\n", - "variable pe equal pe\n", - "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", - "thermo ${freq}\n", - "\n# Group atoms\n", - f"group solute type {types_solute}\n", - f"group solvent type {types_solvent}\n", - "\n# Set-up Loop\n", - "variable runid loop 0 ${nblocks} pad\n", - " label runloop1\n", - "\n# Adjust param for the box and equilibrate\n", - " variable param equal v_paramstart-v_runid*v_delta\n", - ' if "${runid} == 0" then &\n', - ' "jump SELF skipequil"\n', - " variable param0 equal v_paramstart-(v_runid-1)*v_delta\n", - " variable paramramp equal ramp(v_param0,v_param)\n", - " fix ADAPT all adapt/fep ${freq} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", - " thermo_style custom v_paramramp temp press pe evdwl enthalpy\n", - " run ${runtime} # Run Ramp\n", - " thermo_style custom v_param temp press pe evdwl enthalpy\n", - " run ${runtime} # Run Equil\n", - "\n label skipequil\n\n", - f" write_data files/npt_{name1}_" + "${param}.data\n", - "\n # Initialize computes\n", - " ## Compute PE for contributions for bonds, angles, dihedrals, and impropers\n", - " compute pe_solute_bond solute pe/atom bond angle dihedral improper # PE from nonpair/noncharged intramolecular interactions\n", - " compute pe_solute_1 solute reduce sum c_pe_solute_bond\n", - " compute pe_solvent_bond solvent pe/atom bond angle dihedral improper # PE from nonpair/noncharged intramolecular interactions\n", - " compute pe_solvent_1 solvent reduce sum c_pe_solvent_bond\n", - "\n ## Compute PE for contributions for pair and charges\n", - " compute pe_solute_2 solute group/group solute pair yes kspace no\n", - " compute pe_solute_3 solute group/group solute pair no kspace yes\n", - " compute pe_solvent_2 solvent group/group solvent pair yes kspace no\n", - " compute pe_solvent_3 solvent group/group solvent pair no kspace yes\n", - " compute pe_inter_2 solute group/group solvent pair yes kspace no\n", - " compute pe_inter_3 solute group/group solvent pair no kspace yes\n", - " thermo_style custom v_param temp press pe evdwl enthalpy &\n", - " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_tinst v_pinst v_pe &\n", - " c_pe_solute_1 c_pe_solute_2 c_pe_solute_3 c_pe_solvent_1 c_pe_solvent_2 c_pe_solvent_3 c_pe_inter_2 c_pe_inter_3 &\n", - f" file files/linear_{name1}_" + "${param}.txt\n", - "\n run ${runtime}\n\n", - " uncompute pe_solute_bond\n", - " uncompute pe_solute_1\n", - " uncompute pe_solvent_bond\n", - " uncompute pe_solvent_1\n", - " uncompute pe_solute_2\n", - " uncompute pe_solute_3\n", - " uncompute pe_solvent_2\n", - " uncompute pe_solvent_3\n", - " uncompute pe_inter_2\n", - " uncompute pe_inter_3\n", - ' if "${runid} != 0" then &\n', - ' "unfix ADAPT"\n', - " unfix FEPout\n", - "\n next runid\n", - " jump SELF runloop1\n", - "write_data npt.data nocoeff\n", - ] - - if parameter2 is not None: - name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) - file2 = [ - "\n# Set Previous Change\n", - f"variable param2 equal {parameter2_value}\n", - "fix ADAPT2 all adapt/fep 1 &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", - ] - file[13:13] = file2 - file[-1:-1] = "unfix ADAPT2\n" - ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] - file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_param2 v_tinst v_pinst v_pe&\n" - ) - file[ind + 2] = ( - f" file files/linear_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.txt\n" - ) - ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] - file[ind] = ( - f" write_data files/npt_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.data\n" - ) - - if output_file is not None: - with open(output_file, "w") as f: - for line in file: - f.write(line) - - return file - - -def generate_traj_input( - parameter, - parameter_range, - parameter_change, - pair_style, - types_solvent, - types_solute, - del_parameter=0.01, - output_file=None, - parameter2=None, - parameter2_value=None, - pair_style2=None, - del_parameter2=None, -): - """Outputs the section of a LAMMPS input file that loops over the values of parameter being changed (e.g., lambda) - Small perturbations in the potential energy are also output so that the derivative can be calculated for thermodynamic - integration. Trajectories are produces so that files for MBAR analysis may be generated in post-processing. - - The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains - the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS - - Parameters - ---------- - parameter : str - Parameter being varied, see table in `compute fep `_ for the options in - your pair-potential - parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been - equilibrated. - parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range - should result in an integer, otherwise LAMMPS will not end at the desired value. - pair_style : str - String of LAMMPS pair style being changes - types_solvent : str - String defining atom types in the solvent (not spaces) - types_solute : str - String defining atom types in the solute (not spaces) - del_parameter : float, default=0.1 - Change used to calculate the forward and backward difference used to compute the derivative through a central difference - approximation. - output_file : str, default=None - File name and path for optional output file - parameter2 : str, default=None - Parameter that has been varied and is set to another value in this simulation, e.g., lambda when the Coulomb potential - is set to zero. Using this feature avoids complications with writing the pair potential information in the data file. - See table in `compute fep `_ for the options in your pair-potential - pair_style2 : str, default=None - String with LAMMPS pair style being set for ``parameter2`` - parameter2_value : float, default=None - Value to set ``parameter2`` - del_parameter2 : float, default=None - Change used to calculate the forward and backward difference used to compute the derivative through a central difference - approximation for parameter2. - - Returns - ------- - file : list[str] - List of strings representing lines in a file - - """ - nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) - if nblocks % 1 > 0: - raise ValueError( - f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" - ) - else: - nblocks = int(nblocks) - - if any( - [ - x is not None - for x in [parameter2, pair_style2, parameter2_value, del_parameter2] - ] - ) and not all( - [ - x is not None - for x in [parameter2, pair_style2, parameter2_value, del_parameter2] - ] - ): - raise ValueError( - ( - f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}, del_parameter2={del_parameter2}" - ) - ) - name1 = "-".join([pair_style.replace("/", "-"), parameter]) - file = [ - "\n# Variables and System Conditions\n", - "variable freq equal 1000 # Consider changing\n", - "variable runtime equal 1000000\n", - f"variable delta equal {parameter_change} \n", - f"variable nblocks equal {nblocks} \n", - f"variable deltacdm equal {del_parameter} # delta used in central different method for derivative in TI\n", - f"variable paramstart equal {parameter_range[1]}\n", - "variable TK equal TEMP\n", - "variable PBAR equal PRESS\n", - "variable pinst equal press\n", - "variable tinst equal temp\n", - "variable pe equal pe\n", - "\n", - "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", - "thermo ${freq}\n", - "\n# Set-up Loop\n", - "variable runid loop 0 ${nblocks} pad\n", - " label runloop1\n", - "\n # Adjust param for the box and equilibrate\n", - " variable param equal v_paramstart-v_runid*v_delta\n", - ' if "${runid} == 0" then &\n', - ' "jump SELF skipequil"\n', - " variable param0 equal v_paramstart-(v_runid-1)*v_delta\n", - " variable paramramp equal ramp(v_param0,v_param)\n", - " fix ADAPT all adapt/fep ${freq} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", - " thermo_style custom v_paramramp temp press pe evdwl enthalpy\n", - " run ${runtime} # Run Ramp\n", - " thermo_style custom v_param temp press pe evdwl enthalpy\n", - " run ${runtime} # Run Equil\n", - "\n label skipequil\n\n", - f" write_data files/npt_{name1}_" + "${param}.data\n", - "\n # Initialize computes\n", - " thermo_style custom v_param temp press pe evdwl enthalpy\n", - " variable deltacdm2 equal -v_deltacdm\n", - " compute FEPdb all fep ${TK} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", - " compute FEPdf all fep ${TK} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_tinst v_pinst v_pe &\n", - f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", - "\n dump TRAJ all custom ${freq} " - + f"files/dump_{name1}_" - + "${param}.lammpstrj id mol type element xu yu zu\n", - "\n run ${runtime}\n\n", - " uncompute FEPdb\n", - " uncompute FEPdf\n", - ' if "${runid} != 0" then &\n', - ' "unfix ADAPT"\n', - " unfix FEPout\n", - " undump TRAJ\n", - "\n next runid\n", - " jump SELF runloop1\n", - "write_data npt.data nocoeff\n", - ] - - if parameter2 is not None: - name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) - file[6:6] = (f"variable delta2cdm equal {del_parameter2}\n",) - file2 = [ - "\n# Set Previous Change\n", - f"variable param2 equal {parameter2_value}\n", - "fix ADAPT2 all adapt/fep 1 &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", - "variable delta2cdm2 equal -v_delta2cdm\n", - "compute FEP2db all fep ${TK} &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm2\n", - "compute FEP2df all fep ${TK} &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm\n", - ] - file[11:11] = file2 - file[-1:-1] = "unfix ADAPT2\n" - file[-1:-1] = "uncompute FEP2db\n" - file[-1:-1] = "uncompute FEP2df\n" - ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] - file[ind] = ( - f" write_data files/npt_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.data\n" - ) - ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] - file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" - ) - file[ind + 1] = ( - f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.txt\n" - ) - file[ind + 2] = ( - "\n dump TRAJ all custom ${freq} " - + f"files/dump_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.lammpstrj id mol type element xu yu zu\n" - ) - - if output_file is not None: - with open(output_file, "w") as f: - for line in file: - f.write(line) - - return file - - -def generate_mbar_input( - parameter, - parameter_range, - parameter_change, - pair_style, - types_solvent, - types_solute, - del_parameter=0.01, - output_file=None, - parameter2=None, - parameter2_value=None, - pair_style2=None, - del_parameter2=None, -): - """Outputs the section of a LAMMPS input file that loops over the values of parameter being changed (e.g., lambda) - Small perturbations in the potential energy are also output so that the derivative can be calculated for thermodynamic - integration. Trajectories are produces so that files for MBAR analysis may be generated in post-processing. - - The input data file for this script should be an equilibrated frame in the NPT ensemble. Notice that the input file contains - the following keywords that you might replace with the values for your simulation using `sed`: TEMP, PRESS - - Parameters - ---------- - parameter : str - Parameter being varied, see table in `compute fep `_ for the options in - your pair-potential - parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been - equilibrated. - parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range - should result in an integer, otherwise LAMMPS will not end at the desired value. - pair_style : str - String of LAMMPS pair style being changes - types_solvent : str - String defining atom types in the solvent (not spaces) - types_solute : str - String defining atom types in the solute (not spaces) - del_parameter : float, default=0.1 - Change used to calculate the forward and backward difference used to compute the derivative through a central difference - approximation. - output_file : str, default=None - File name and path for optional output file - parameter2 : str, default=None - Parameter that has been varied and is set to another value in this simulation, e.g., lambda when the Coulomb potential - is set to zero. Using this feature avoids complications with writing the pair potential information in the data file. - See table in `compute fep `_ for the options in your pair-potential - pair_style2 : str, default=None - String with LAMMPS pair style being set for ``parameter2`` - parameter2_value : float, default=None - Value to set ``parameter2`` - del_parameter2 : float, default=None - Change used to calculate the forward and backward difference used to compute the derivative through a central difference - approximation for parameter2. - - Returns - ------- - file : list[str] - List of strings representing lines in a file - - """ - nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) - if nblocks % 1 > 0: - raise ValueError( - f"The number of steps needed to traverse the parameter range, {parameter_range}, with a step size of, {parameter_change} is not an integer" - ) - else: - nblocks = int(nblocks) - - if any( - [ - x is not None - for x in [parameter2, pair_style2, parameter2_value, del_parameter2] - ] - ) and not all( - [ - x is not None - for x in [parameter2, pair_style2, parameter2_value, del_parameter2] - ] - ): - raise ValueError( - ( - f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}, del_parameter2={del_parameter2}" - ) - ) - name1 = "-".join([pair_style.replace("/", "-"), parameter]) - file = [ - "\n# Variables and System Conditions\n", - "variable freq equal 1000 # Consider changing\n", - "variable runtime equal 1000000\n", - f"variable delta equal {parameter_change} \n", - f"variable nblocks equal {nblocks} \n", - f"variable deltacdm equal {del_parameter} # delta used in central different method for derivative in TI\n", - f"variable paramstart equal {parameter_range[1]}\n", - "variable TK equal TEMP\n", - "variable PBAR equal PRESS\n", - "variable pinst equal press\n", - "variable tinst equal temp\n", - "variable pe equal pe\n", - "\n", - "fix 1 all npt temp ${TK} ${TK} 1.0 iso ${PBAR} ${PBAR} # Change dampening factors according to your system\n", - "thermo ${freq}\n", - "\n# Set-up Loop\n", - "variable nblocks equal 1/v_delta\n", - "variable runid loop 0 ${nblocks} pad\n", - " label runloop1\n", - "\n # Adjust param for the box and equilibrate\n", - " variable param equal v_paramstart-v_runid*v_delta\n", - ' if "${runid} == 0" then &\n', - ' "jump SELF skipequil"\n', - " variable param0 equal v_paramstart-(v_runid-1)*v_delta\n", - " variable paramramp equal ramp(v_param0,v_param)\n", - " fix ADAPT all adapt/fep ${freq} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_paramramp\n", - " thermo_style custom v_paramramp temp press pe evdwl enthalpy\n", - " run ${runtime} # Run Ramp\n", - " thermo_style custom v_param temp press pe evdwl enthalpy\n", - " run ${runtime} # Run Equil\n", - "\n label skipequil\n\n", - f" write_data files/npt_{name1}_" + "${param}.data\n", - "\n # Initialize computes\n", - " thermo_style custom v_param temp press pe evdwl enthalpy\n", - " variable deltacdm2 equal -v_deltacdm\n", - " compute FEPdb all fep ${TK} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm2\n", - " compute FEPdf all fep ${TK} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_deltacdm\n", - " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_tinst v_pinst v_pe &\n", - f" c_FEPdb[1] c_FEPdf[1] file files/ti_{name1}_" + "${param}.txt\n", - "\n dump TRAJ all custom ${freq} " - + f"files/dump_{name1}_" - + "${param}.lammpstrj id mol type element xu yu zu\n", - "\n run ${runtime}\n\n", - " uncompute FEPdb\n", - " uncompute FEPdf\n", - ' if "${runid} != 0" then &\n', - ' "unfix ADAPT"\n', - " unfix FEPout\n", - " undump TRAJ\n", - "\n next runid\n", - " jump SELF runloop1\n", - "write_data npt.data nocoeff\n", - ] - - file2 = [] - for i in range(nblocks+1): - tmp = [ - " variable delta{0:0d} equal ".format(i) + f"(v_runid-{i})*v_delta\n", - " compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_delta{i}\n", - " variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), - " fix FEPout{0:03d} all".format(i) - + " ave/time ${freq} 1 ${freq} " - + "v_param v_param{0:03d} &\n".format(i), - " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) - + f" file files/mbar_{name1}" - + "_${param}_${param" - + str("{0:03d}".format(i)) - + "}.txt\n\n", - ] - if parameter2 is not None: - name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) - ind = [ii for ii, x in enumerate(tmp) if "fix FEPout" in x][0] - tmp[ind : ind + 2] = [ - " fix FEPout{0:03d} all".format(i) - + " ave/time ${freq} 1 ${freq} " - + "v_param v_param{0:03d} v_param2 &\n".format(i), - " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) - + f" file files/mbar_{name1}" - + "_${param}_${param" - + str("{0:03d}".format(i)) - + "}_" - + "{}_{}.txt\n\n".format(name2, parameter2_value), - ] - file2.extend(tmp) - file[40:40] = file2 - - file2 = [] - for i in range(nblocks+1): - file2.extend( - [ - " uncompute FEP{0:03d}\n".format(i), - " unfix FEPout{0:03d}\n".format(i), - ] - ) - file[-7:-7] = file2 - - if parameter2 is not None: - name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) - file[6:6] = (f"variable delta2cdm equal {del_parameter2}\n",) - file2 = [ - "\n# Set Previous Change\n", - f"variable param2 equal {parameter2_value}\n", - "fix ADAPT2 all adapt/fep 1 &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", - "variable delta2cdm2 equal -v_delta2cdm\n", - "compute FEP2db all fep ${TK} &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm2\n", - "compute FEP2df all fep ${TK} &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_delta2cdm\n", - ] - file[14:14] = file2 - file[-1:-1] = "unfix ADAPT2\n" - file[-1:-1] = "uncompute FEP2db\n" - file[-1:-1] = "uncompute FEP2df\n" - ind = [ii for ii, x in enumerate(file) if "write_data files/npt" in x][0] - file[ind] = ( - f" write_data files/npt_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.data\n" - ) - ind = [ii for ii, x in enumerate(file) if "fix FEPout" in x][0] - file[ind] = ( - " fix FEPout all ave/time ${freq} 1 ${freq} v_param v_deltacdm v_param2 v_delta2cdm v_tinst v_pinst v_pe &\n" - ) - file[ind + 1] = ( - f" c_FEPdb[1] c_FEPdf[1] c_FEP2db[1] c_FEP2df[1] file files/ti_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.txt\n" - ) - file[ind + 2] = ( - "\n dump TRAJ all custom ${freq} " - + f"files/dump_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.lammpstrj id mol type element xu yu zu\n" - ) - - if output_file is not None: - with open(output_file, "w") as f: - for line in file: - f.write(line) - - return file - - -def generate_rerun_mbar( - parameter_value, - parameter, - parameter_range, - parameter_change, - pair_style, - types_solvent, - types_solute, - output_file=None, - parameter2=None, - pair_style2=None, - parameter2_value=None, -): - """Outputs the section of a LAMMPS input file that reruns trajectories for different lambda values and calculates - the potential energy for all other lambda values with this set of configurations. +def beta_from_units(T, units): + """Output value of beta from temperature and units. Parameters ---------- - parameter_value : float - Value of parameter being varied (e.g., lambda) - parameter : str - Parameter being varied, see table in `compute fep `_ for the options in - your pair-potential - parameter_range : list[float] - Range of parameter values to be changed where the first value should be the value with which the system has been - equilibrated. - parameter_change : float - The size of the step between parameter values. Take care that number of points needed to traverse the given range - should result in an integer, otherwise lammps will not end at the desired value. - pair_style : str - String of LAMMPS pair style being changes - types_solvent : str - String defining atom types in the solvent (not spaces) - types_solute : str - String defining atom types in the solute (not spaces) - output_file : str, default=None - File name and path for optional output file - parameter2 : str, default=None - Parameter that has been varied and is set to another value in this simulation, e.g., lambda for the coulombic potential - is set to zero. Using this feature avoids complicaitons with writing the pair potential information in the data file. - See table in `compute fep `_ for the options in your pair-potential - pair_style2 : str, default=None - String with LAMMPS pair style being set for ``parameter2`` - parameter2_value : float, default=None - Value to set ``parameter2`` + T : float + Temperature that the system was run with + units : str + LAMMPS style unit Returns ------- - file : list[str] - List of strings representing lines in a file - + beta : float + Value of beta used to scale the potential energy. + + Raises + ------ + ValueError + If unit string is not recognized. + + .. versionadded:: 1.?? """ - nblocks = round(abs(parameter_range[1] - parameter_range[0]) / parameter_change, 6) - if nblocks % 1 > 0: - raise ValueError( - "The number of steps needed to traverse the parameter range, {}, with a step size of, {} is not an integer".format( - parameter_range, parameter_change - ) - ) + if units == "real": # E in kcal/mol, T in K + beta = 1 / (R_kJmol * kJ2kcal * T) + elif units == "lj": # Nondimensional E and T scaled by epsilon + beta = 1 / T + elif units == "metal": # E in eV, T in K + beta = 1 / (R_kJmol * kJ2kcal * T) # NoteHere!!!! + elif units == "si": # E in J, T in K + beta = 1 / ( + constants.R * T * + constants.physical_constants["electron volt"][0] + ) + elif units == "cgs": # E in ergs, T in K + beta = 1 / (constants.R * T * 1e-7) + elif units == "electron": # E in Hartrees, T in K + beta = 1 / ( + constants.R * T * + constants.physical_constants["Hartree energy"][0] + ) + elif units == "micro": # E in epicogram-micrometer^2/microsecond^2, T in K + beta = 1 / (constants.R * T * 1e-15) + elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, T in K + beta = 1 / (constants.R * T * 1e-21) else: - nblocks = int(nblocks) - - if any( - [x is not None for x in [parameter2, pair_style2, parameter2_value]] - ) and not all([x is not None for x in [parameter2, pair_style2, parameter2_value]]): raise ValueError( - ( - f"If any values for 'parameter2' are provided, all must be provided: parameter2={parameter2}, " - + f"parameter2_value={parameter2_value}, pair_style2={pair_style2}" + "LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format( + units ) ) + + return beta - if np.isclose(parameter_range[0], 0): - prec = int(np.abs(int(np.log10(np.abs(parameter_change))))) - else: - prec = max( - int(np.abs(int(np.log10(np.abs(parameter_range[0]))))), - int(np.abs(int(np.log10(np.abs(parameter_change))) + 1)), - ) - name1 = "-".join([pair_style.replace("/", "-"), parameter]) - file = [ - "\n# Variables and System Conditions\n", - f"variable param equal {parameter_value}\n", - "variable freq equal 1000 # Consider changing\n", - "variable runtime equal 1000000\n", - f"variable delta equal {parameter_change}\n", - "variable TK equal TEMP\n", - "\nthermo ${freq}\n", - f"read_data files/npt_{name1}_" + "${param}.data\n", - "\n# Initialize computes\n", - ] - if parameter2 is not None: - file2 = [ - "\n# Set Previous Change\n", - "variable param2 equal {parameter2_value}\n", - "fix ADAPT2 all adapt/fep 1 &\n", - f" pair {pair_style2} {parameter2} {types_solute} {types_solvent} v_param2\n", - ] - file[8:8] = file2 - name2 = "-".join([pair_style2.replace("/", "-"), parameter2]) - ind = [ii for ii, x in enumerate(file) if "read_data files/npt" in x][0] - file[ind] = ( - f"read_data files/npt_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.data\n" - ) - - for i in range(nblocks+1): - value2 = parameter_range[0] + parameter_change * i - delta = value2 - parameter_value - tmp = "variable delta{0:0d}".format(i) + " equal {0:." + str(prec) + "f}\n" - tmp = [ - tmp.format(delta), - "compute FEP{0:03d} all fep ".format(i) + "${TK} &\n", - f" pair {pair_style} {parameter} {types_solute} {types_solvent} v_delta{i}\n", - "variable param{0:03d} equal v_param+v_delta{0:0d}\n".format(i), - "fix FEPout{0:03d} all".format(i) - + " ave/time ${freq} 1 ${freq} " - + "v_param v_param{0:03d} &\n".format(i), - " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) - + f" file files/mbar_{name1}" - + "_${param}_${param" - + str("{0:03d}".format(i)) - + "}.txt\n\n", - ] - if parameter2 is not None: - ind = [ii for ii, x in enumerate(tmp) if "fix FEPout" in x][0] - tmp[ind : ind + 2] = [ - "fix FEPout{0:03d} all".format(i) - + " ave/time ${freq} 1 ${freq} " - + "v_param v_param{0:03d} v_param2 &\n".format(i), - " c_FEP{0:03d}[1] c_FEP{0:03d}[2] c_FEP{0:03d}[3]".format(i) - + f" file files/mbar_{name1}" - + "_${param}_${param" - + str("{0:03d}".format(i)) - + "}_" - + "{}_{}.txt\n\n".format(name2, parameter2_value), - ] - file.extend(tmp) - - if parameter2 is not None: - file.append( - f"\nrerun files/dump_{name1}_" - + "${param}_" - + f"{name2}_{parameter2_value}.lammpstrj " - + "every ${freq} dump xu yu zu\n\n" - ) - else: - file.append( - f"\nrerun files/dump_{name1}" - + "_${param}.lammpstrj every ${freq} dump xu yu zu\n\n" - ) - - if output_file is not None: - with open(output_file, "w") as f: - for line in file: - f.write(line) - - return file def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): """ Pull a tuple representing the lambda values used, as defined by the filenames. @@ -835,6 +120,7 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): tuple[float] Tuple of lambda values + .. versionadded:: 1.?? """ name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) @@ -848,7 +134,7 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): ) return (round(float(name_array[indices[0]]), prec), round(float(name_array[indices[1]]), prec)) -def _lambda2_from_filename(filename, separator="_", index=-1, prec=4): +def _lambda_from_filename(filename, separator="_", index=-1, prec=4): """ Pull the :math:`\lambda'` value, as defined by the filenames. Here :math:`\lambda'` is the scaling value applied to a configuration that is equilibrated to @@ -870,6 +156,7 @@ def _lambda2_from_filename(filename, separator="_", index=-1, prec=4): float Lambda prime value + .. versionadded:: 1.?? """ name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) if not _isfloat(name_array[index]): @@ -878,7 +165,7 @@ def _lambda2_from_filename(filename, separator="_", index=-1, prec=4): ) return round(float(name_array[index]), prec) -def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4): +def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): """Retrieves all lambda values from FEP filenames. Parameters @@ -890,6 +177,8 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4): containing the lambda information. prec : int, default=4 Number of decimal places defined used in ``round()`` function. + force : bool, default=False + If ``True`` the dataframe will be created, even if not all lambda and lambda prime combinations are available. Returns ------- @@ -898,12 +187,13 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4): lambda_pairs : list List of tuples containing two floats, lambda and lambda'. + .. versionadded:: 1.?? """ lambda_pairs = [_tuple_from_filename(y, indices=indices, prec=prec) for y in fep_files] if len(indices) == 3: lambda2 = list( - set([_lambda2_from_filename(y, index=indices[2], prec=prec) for y in fep_files]) + set([_lambda_from_filename(y, index=indices[2], prec=prec) for y in fep_files]) ) if len(lambda2) > 1: raise ValueError( @@ -965,13 +255,13 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4): [(lambda_value, x) for x in lambda_array if x not in tmp_array] ) - if missing_combinations_bar: + if missing_combinations_bar and not force: raise ValueError( "BAR calculation cannot be performed without the following lambda-lambda prime combinations: {}".format( missing_combinations_bar ) ) - if extra_combinations_bar: + if extra_combinations_bar and not force: warnings.warn( "The following combinations of lambda and lambda prime are extra and being discarded for BAR analysis: {}".format( extra_combinations_bar @@ -982,6 +272,139 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4): return lambda_values, lambda_pairs, lambda2 +@_init_attrs +def extract_u_nk_from_u_n( + fep_files, + T, + column_lambda, + column_u_cross, + dependence=lambda x : (x), + units="real", + index=-1, + prec=4, +): + """ Produce u_nk from files containing u_n given a separable dependence on lambda. + + Parameters + ---------- + filenames : str + Path to fepout file(s) to extract data from. Filenames and paths are + aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*.txt". + temperature : float + Temperature in Kelvin at which the simulation was sampled. + columns_lambda : int + Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated + column_cross : int + Index for the column (file column number minus one) representing the potential energy of the cross interactions + between the solute and solvent. + dependence : func, default=`lambda x : (x)` + Dependence of changing variable on the potential energy, which must be separable. + index : int, default=-1 + In provided file names, using underscore as a separator, these indices mark the part of the filename + containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` + this list should be of length three, where the last value represents the invariant lambda. + units : str, default="real" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + prec : int, default=4 + Number of decimal places defined used in ``round()`` function. + + Returns + ------- + u_nk_df : pandas.Dataframe + Dataframe of potential energy for each alchemical state (k) for each frame (n). + Note that the units for timestamps are not considered in the calculation. + + Attributes + + - temperature in K + - energy unit in kT + + .. versionadded:: 1.?? + """ + # Collect Files + files = glob.glob(fep_files) + if not files: + raise ValueError(f"No files have been found that match: {fep_files}") + + beta = beta_from_units(T, units) + + if not isinstance(column_lambda, int): + raise ValueError( + f"Provided column for lambda must be type int. column_u_lambda: {column_lambda}, type: {type(column_lambda)}" + ) + if not isinstance(column_u_cross, int): + raise ValueError( + f"Provided column for u_cross must be type int. column_u_cross: {column_u_cross}, type: {type(column_u_cross)}" + ) + + lambda_values = list( + set([_lambda_from_filename(y, index=index, prec=prec) for y in files]) + ) + + u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) + lc = len(lambda_values) + col_indices = [0, column_lambda, column_u_cross] + + for file in files: + if not os.path.isfile(file): + raise ValueError("File not found: {}".format(file)) + + data = pd.read_csv(file, sep=" ", comment="#", header=None) + lx = len(data.columns) + if [False for x in col_indices if x > lx]: + raise ValueError( + "Number of columns, {}, is less than index: {}".format(lx, col_indices) + ) + data = data.iloc[:, col_indices] + data.columns = ["time", "fep-lambda", "u_cross"] + lambda1_col = "fep-lambda" + data[[lambda1_col]] = data[[lambda1_col]].apply( + lambda x: round(x, prec) + ) + + for lambda1 in list(data[lambda1_col].unique()): + tmp_df = data.loc[data[lambda1_col] == lambda1] + lr = tmp_df.shape[0] + for lambda12 in lambda_values: + if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: + u_nk = pd.concat( + [ + u_nk, + pd.concat( + [ + tmp_df[["time", "fep-lambda"]], + pd.DataFrame( + np.zeros((lr, lc)), + columns=lambda_values, + ), + ], + axis=1, + ), + ], + axis=0, + sort=False, + ) + + if u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0: + raise ValueError( + "Energy values already available for lambda, {}, lambda', {}.".format( + lambda1, lambda12 + ) + ) + + u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] = ( + beta * tmp_df["u_cross"] * (dependence(lambda12) / dependence(lambda1) - 1) + ) + + if lambda1 == lambda12 and u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0: + raise ValueError(f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," \ + " Check that the 'column_u_n' was defined correctly.") + + u_nk.set_index(["time", "fep-lambda"], inplace=True) + + return u_nk + + @_init_attrs def extract_u_nk( fep_files, @@ -993,6 +416,7 @@ def extract_u_nk( units="real", vdw_lambda=1, prec=4, + force=False, ): """This function will go into alchemlyb.parsing.lammps @@ -1026,6 +450,8 @@ def extract_u_nk( In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. prec : int, default=4 Number of decimal places defined used in ``round()`` function. + force : bool, default=False + If ``True`` the dataframe will be created, even if not all lambda and lambda prime combinations are available. Results ------- @@ -1038,6 +464,7 @@ def extract_u_nk( - temperature in K - energy unit in kT + .. versionadded:: 1.?? """ # Collect Files @@ -1045,14 +472,7 @@ def extract_u_nk( if not files: raise ValueError(f"No files have been found that match: {fep_files}") - if units == "real": - beta = 1 / (k_b * T) - elif units == "lj": - beta = 1 / T - else: - raise ValueError( - f"LAMMPS unit type, {units}, is not supported. Supported types are: real and lj" - ) + beta = beta_from_units(T, units) if len(columns_lambda1) != 2: raise ValueError( @@ -1064,14 +484,14 @@ def extract_u_nk( ) if column_lambda2 is not None and not isinstance(column_lambda2, int): raise ValueError( - f"Provided column for u_nk must be type int. column_u_nk: {column_lambda2}, type: {type(column_lambda2)}" + f"Provided column for lambda must be type int. column_lambda2: {column_lambda2}, type: {type(column_lambda2)}" ) if not isinstance(column_u_nk, int): raise ValueError( f"Provided column for u_nk must be type int. column_u_nk: {column_u_nk}, type: {type(column_u_nk)}" ) - lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices, prec=prec) + lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices, prec=prec, force=force) if column_lambda2 is None: u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) @@ -1086,7 +506,7 @@ def extract_u_nk( if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - data = pd.read_csv(file, sep=" ", comment="#") + data = pd.read_csv(file, sep=" ", comment="#", header=None) lx = len(data.columns) if [False for x in col_indices if x > lx]: raise ValueError( @@ -1170,9 +590,10 @@ def extract_u_nk( if vdw_lambda == 1 else (column_name, lambda2) ) - if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0: + + if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != abs(0): raise ValueError( - "Energy values already available for lambda, {}, lambda', {}.".format( + "Energy values already available for lambda, {}, lambda', {}. Check for a duplicate file.".format( lambda1, lambda12 ) ) @@ -1206,6 +627,96 @@ def extract_u_nk( return u_nk +@_init_attrs +def extract_dHdl_from_u_n( + fep_files, + T, + column_lambda=None, + column_u_cross=None, + dependence=lambda x : (1/x), + units="real", +): + """Produce dHdl dataframe from sparated contributions of the potential energy. + + Each file is imported as a data frame where the columns are: + [0, column_lambda, column_solvent, column_solute, column_cross] + + Parameters + ---------- + filenames : str + Path to fepout file(s) to extract data from. Filenames and paths are + aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*.txt". + T : float + Temperature in Kelvin at which the simulation was sampled. + columns_lambda : int, default=None + Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated + column_u : int, default=None + Index for the column (file column number minus one) representing the potential energy of the system + dependence : func, default=`lambda x : (1/x)` + Transform of lambda needed to convert the potential energy into the derivative of the potential energy with respect to lambda, which must be separable. + For example, for the LJ potential U = eps * f(sig, r), dU/deps = f(sig, r), so we need a dependence function of 1/eps to convert the + potential energy to the derivative with respect to eps. + units : str, default="real" + Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + + Results + ------- + dHdl : pandas.Dataframe + Dataframe of the derivative for the potential energy for each alchemical state (k) + for each frame (n). Note that the units for timestamps are not considered in the calculation. + + Attributes + + - temperature in K or dimensionless + - energy unit in kT + + .. versionadded:: 1.?? + """ + + # Collect Files + files = glob.glob(fep_files) + if not files: + raise ValueError(f"No files have been found that match: {fep_files}") + + beta = beta_from_units(T, units) + + if not isinstance(column_lambda, int): + raise ValueError( + f"Provided column for lambda must be type int. column_lambda: {column_lambda}, type: {type(column_lambda)}" + ) + if not isinstance(column_u_cross, int): + raise ValueError( + f"Provided column for u_cross must be type int. column_u_cross: {column_u_cross}, type: {type(column_u_cross)}" + ) + + dHdl = pd.DataFrame(columns=["time", "fep-lambda", "fep"]) + col_indices = [0, column_lambda, column_u_cross] + + for file in files: + if not os.path.isfile(file): + raise ValueError("File not found: {}".format(file)) + + data = pd.read_csv(file, sep=" ", comment="#", header=None) + lx = len(data.columns) + if [False for x in col_indices if x > lx]: + raise ValueError( + "Number of columns, {}, is less than index: {}".format(lx, col_indices) + ) + + data = data.iloc[:, col_indices] + + data.columns = ["time", "fep-lambda", "U"] + data["fep"] = dependence(data.loc[:, "fep-lambda"]) * data.U + data.drop( columns=["U"], inplace=True) + + dHdl = pd.concat([dHdl, data], axis=0, sort=False) + + dHdl.set_index(["time", "fep-lambda"], inplace=True) + dHdl.mul({"fep": beta}) + + return dHdl + + @_init_attrs def extract_dHdl( fep_files, @@ -1216,7 +727,6 @@ def extract_dHdl( column_dlambda2=None, columns_derivative1=[10, 11], columns_derivative2=[12, 13], - index=-1, units="real", ): """This function will go into alchemlyb.parsing.lammps @@ -1248,9 +758,6 @@ def extract_dHdl( columns_derivative : list[int], default=[10,11] Indices for columns (column number minus one) representing the lambda at which to find the forward and backward distance. - index : int, default=-1 - In provided file names, using underscore as a separator, this index marks the part of the filename - containing the lambda information for :func:`alchemlyb.parsing._get_ti_lambdas`. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" @@ -1265,6 +772,7 @@ def extract_dHdl( - temperature in K or dimensionless - energy unit in kT + .. versionadded:: 1.?? """ # Collect Files @@ -1272,16 +780,7 @@ def extract_dHdl( if not files: raise ValueError("No files have been found that match: {}".format(fep_files)) - if units == "real": - beta = 1 / (k_b * T) - elif units == "lj": - beta = 1 / T - else: - raise ValueError( - "LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format( - units - ) - ) + beta = beta_from_units(T, units) if not isinstance(column_lambda1, int): raise ValueError( @@ -1350,7 +849,7 @@ def extract_dHdl( if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - data = pd.read_csv(file, sep=" ", comment="#") + data = pd.read_csv(file, sep=" ", comment="#", header=None) lx = len(data.columns) if [False for x in col_indices if x > lx]: raise ValueError( @@ -1449,6 +948,7 @@ def extract_H( - temperature in K or dimensionless - energy unit in kT + .. versionadded:: 1.?? """ # Collect Files @@ -1456,16 +956,7 @@ def extract_H( if not files: raise ValueError("No files have been found that match: {}".format(fep_files)) - if units == "real": - beta = 1 / (k_b * T) - elif units == "lj": - beta = 1 / T - else: - raise ValueError( - "LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format( - units - ) - ) + beta = beta_from_units(T, units) if not isinstance(column_lambda1, int): raise ValueError( @@ -1499,7 +990,7 @@ def extract_H( if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - data = pd.read_csv(file, sep=" ", comment="#") + data = pd.read_csv(file, sep=" ", comment="#", header=None) lx = len(data.columns) if [False for x in col_indices if x > lx]: raise ValueError( From 32daa0780a1f3dc925e2e1d14358fd451a98eb1d Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 10:13:41 -0400 Subject: [PATCH 15/59] Update module docstring --- src/alchemlyb/parsing/lammps.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 6ea00078..9bea2966 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -10,20 +10,8 @@ The parsers featured in this module are constructed to parse LAMMPS output files output using the [`fix ave/time command`](https://docs.lammps.org/fix_ave_time.html), containing data for given potential energy values (an -approximation of the Hamiltonian) at specified values of $\lambda$ and $\lambda'$, $U_{\lambda,\lambda'}$. Because generating -the input files can be cumbersome, functions have been included to generate the appropriate sections. If a linear approximation -can be made to calculate $U_{\lambda,\lambda'}$ from $U_{\lambda}$ in post-processing, we recommend using -:func:`alchemlyb.parsing.generate_input_linear_approximation()`. If a linear approximation cannot be made (such as changing -$\lambda$ in the soft-LJ potential) we recommend running a loop over all values of $\lambda$ saving frames spaced to be -independent samples, and an output file with small perturbations with $\lambda'$ to calculate the derivative for TI in -post-processing. This is achieved with `alchemlyb.parsing.generate_traj_input()`. After this first simulation, we then -recommend the files needed for MBAR are generated using the [rerun](https://docs.lammps.org/rerun.html) feature in LAMMPS. -Breaking up the computation like this will allow one to add additional points to their MBAR analysis without repeating the -points from an initial simulation. Generating the file for a rerun is achieved with -:func:`alchemlyb.parsing.generate_rerun_mbar()`. Notice that the output files do not contain the header information expected -in LAMMPS as that is system specific and left to the user. - -Note that in LAMMPS, [fix adapt/fep](https://docs.lammps.org/fix_adapt_fep.html) changes $\lambda$ and +approximation of the Hamiltonian) at specified values of $\lambda$ and $\lambda'$, $U_{\lambda,\lambda'}$. Note that in +LAMMPS, [fix adapt/fep](https://docs.lammps.org/fix_adapt_fep.html) changes $\lambda$ and [compute fep](https://docs.lammps.org/compute_fep.html) changes $\lambda'$. .. versionadded:: 1.0.0 From 5ca4199076b6ad3e48a6de74fcf60626f5d3e379 Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 10:19:20 -0400 Subject: [PATCH 16/59] Add name to author list --- AUTHORS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 19ee3a57..bea72e89 100644 --- a/AUTHORS +++ b/AUTHORS @@ -49,3 +49,5 @@ Chronological list of authors - Domenico Marson (@DrDomenicoMarson) 2023 - Haoxi Li (@hl2500) +2024 + - Jennifer A. Clark (@jaclark5) From d9b62d20379ac150d0787445887ae5d55d35ef6f Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 10:21:39 -0400 Subject: [PATCH 17/59] Added to changelog --- CHANGES | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 71fd2912..f2091e03 100644 --- a/CHANGES +++ b/CHANGES @@ -14,7 +14,7 @@ The rules for this file: ------------------------------------------------------------------------------ -*/*/2023 hl2500, xiki-tempula +*/*/2023 hl2500, xiki-tempula, jaclark5 * 2.2.0 @@ -22,6 +22,7 @@ Changes - Require pandas >= 2.1 (PR #340) - For pandas>=2.1, metadata will be loaded from the parquet file (issue #331, PR #340). - add support for Python 3.12, remove Python 3.8 support (issue #341, PR #304). + - Add support for LAMMPS FEP files (Issue #349, PR #348) Enhancements - Add a TI estimator using gaussian quadrature to calculate the free energy. From 03f55b86cc920071a3645f4a3cc7408ff9580a77 Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 13:30:46 -0400 Subject: [PATCH 18/59] Update docs --- docs/parsing.rst | 1 + docs/parsing/alchemlyb.parsing.lammps.rst | 14 ++++++ src/alchemlyb/parsing/lammps.py | 57 ++++++++++++++--------- 3 files changed, 49 insertions(+), 23 deletions(-) create mode 100644 docs/parsing/alchemlyb.parsing.lammps.rst diff --git a/docs/parsing.rst b/docs/parsing.rst index 73de3b68..0b494e3e 100644 --- a/docs/parsing.rst +++ b/docs/parsing.rst @@ -238,4 +238,5 @@ See the documentation for the package you are using for more details on parser u namd gomc parquet + lammps diff --git a/docs/parsing/alchemlyb.parsing.lammps.rst b/docs/parsing/alchemlyb.parsing.lammps.rst new file mode 100644 index 00000000..c75408c2 --- /dev/null +++ b/docs/parsing/alchemlyb.parsing.lammps.rst @@ -0,0 +1,14 @@ +LAMMPS Parsing +============= +.. automodule:: alchemlyb.parsing.lammps + +API Reference +------------- +This submodule includes these parsing functions: + +.. autofunction:: alchemlyb.parsing.lammps.extract_dHdl +.. autofunction:: alchemlyb.parsing.lammps.extract_u_nk +.. autofunction:: alchemlyb.parsing.lammps.extract_dHdl_from_u_n +.. autofunction:: alchemlyb.parsing.lammps.extract_u_nk_from_u_n +.. autofunction:: alchemlyb.parsing.lammps.extract_H +.. autofunction:: alchemlyb.parsing.lammps.beta_from_units diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 9bea2966..e65b8972 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -1,18 +1,18 @@ -""" Parsers for extracting alchemical data from [LAMMPS](https://docs.lammps.org/Manual.html) output files. +""" Parsers for extracting alchemical data from `LAMMPS `_ output files. -For clarity, we would like to distinguish the difference between $\lambda$ and $\lambda'$. We refer to $\lambda$ as +For clarity, we would like to distinguish the difference between :math:`\lambda` and :math:`\lambda'`. We refer to :math:`\lambda` as the potential scaling of the equilibrated system, so that when this value is changed, the system undergoes another equilibration -step. One the other hand, $\lambda'$ is the value used to scaled the potentials for the configurations of the system equilibrated -for $\lambda$. The value of $\lambda'$ is used in two instances. First, in thermodynamic integration (TI), values of $\lambda'$ -that are very close to $\lambda$ can be used to calculate the derivative. This is needed because LAMMPS does not compute +step. One the other hand, :math:`\lambda'` is the value used to scaled the potentials for the configurations of the system equilibrated +for :math:`\lambda`. The value of :math:`\lambda'` is used in two instances. First, in thermodynamic integration (TI), values of :math:`\lambda'` +that are very close to :math:`\lambda` can be used to calculate the derivative. This is needed because LAMMPS does not compute explicit derivatives, although one should check whether they can derive an explicit expression, they cannot for changes of -$\lambda'$ in the soft Lennard-Jones (LJ) potential. +:math:`\lambda'` in the soft Lennard-Jones (LJ) potential. The parsers featured in this module are constructed to parse LAMMPS output files output using the -[`fix ave/time command`](https://docs.lammps.org/fix_ave_time.html), containing data for given potential energy values (an -approximation of the Hamiltonian) at specified values of $\lambda$ and $\lambda'$, $U_{\lambda,\lambda'}$. Note that in -LAMMPS, [fix adapt/fep](https://docs.lammps.org/fix_adapt_fep.html) changes $\lambda$ and -[compute fep](https://docs.lammps.org/compute_fep.html) changes $\lambda'$. +`fix ave/time command `_, containing data for given potential energy values (an +approximation of the Hamiltonian) at specified values of :math:`\lambda` and :math:`\lambda'`, :math:`U_{\lambda,\lambda'}`. Note that in +LAMMPS, `fix adapt/fep `_ changes :math:`\lambda` and +`compute fep `_ changes :math:`\lambda'`. .. versionadded:: 1.0.0 @@ -277,7 +277,7 @@ def extract_u_nk_from_u_n( ---------- filenames : str Path to fepout file(s) to extract data from. Filenames and paths are - aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*.txt". + aggregated using `glob `_. For example, "/path/to/files/something_*.txt". temperature : float Temperature in Kelvin at which the simulation was sampled. columns_lambda : int @@ -408,16 +408,19 @@ def extract_u_nk( ): """This function will go into alchemlyb.parsing.lammps - Each file is imported as a data frame where the columns kept are either: + Each file is imported as a data frame where the columns kept are either:: + [0, columns_lambda1[0] columns_lambda1[1], column_u_nk] - or if columns_lambda2 is not None: + + or if columns_lambda2 is not None:: + [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_u_nk] Parameters ---------- filenames : str Path to fepout file(s) to extract data from. Filenames and paths are - aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". + aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". temperature : float Temperature in Kelvin at which the simulation was sampled. columns_lambda1 : list[int], default=[1,2] @@ -626,14 +629,15 @@ def extract_dHdl_from_u_n( ): """Produce dHdl dataframe from sparated contributions of the potential energy. - Each file is imported as a data frame where the columns are: + Each file is imported as a data frame where the columns are:: + [0, column_lambda, column_solvent, column_solute, column_cross] Parameters ---------- filenames : str Path to fepout file(s) to extract data from. Filenames and paths are - aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*.txt". + aggregated using `glob `_. For example, "/path/to/files/something_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. columns_lambda : int, default=None @@ -719,19 +723,23 @@ def extract_dHdl( ): """This function will go into alchemlyb.parsing.lammps - Each file is imported as a data frame where the columns kept are either: + Each file is imported as a data frame where the columns kept are either:: + [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] - or if columns_lambda2 is not None: + + or if columns_lambda2 is not None:: + [ 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, - columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], columns_derivative2[1] + columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], + columns_derivative2[1] ] Parameters ---------- filenames : str Path to fepout file(s) to extract data from. Filenames and paths are - aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". + aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". temperature : float Temperature in Kelvin at which the simulation was sampled. column_lambda1 : int, default=2 @@ -900,9 +908,12 @@ def extract_H( ): """This function will go into alchemlyb.parsing.lammps - Each file is imported as a data frame where the columns kept are either: + Each file is imported as a data frame where the columns kept are either:: + [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] - or if columns_lambda2 is not None: + + or if columns_lambda2 is not None:: + [ 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], columns_derivative2[1] @@ -912,7 +923,7 @@ def extract_H( ---------- filenames : str Path to fepout file(s) to extract data from. Filenames and paths are - aggregated using [glob](https://docs.python.org/3/library/glob.html). For example, "/path/to/files/something_*_*.txt". + aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". temperature : float Temperature in Kelvin at which the simulation was sampled. column_lambda1 : int, default=2 From 22a62b0c82531a4b9c53b4a5a43e1a6139ce3a15 Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 13:32:28 -0400 Subject: [PATCH 19/59] Run Black --- src/alchemlyb/parsing/lammps.py | 203 ++++++++++++++++++-------------- 1 file changed, 112 insertions(+), 91 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index e65b8972..c12c01b6 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -28,6 +28,7 @@ from . import _init_attrs from ..postprocessors.units import R_kJmol, kJ2kcal + def _isfloat(x): try: float(x) @@ -35,6 +36,7 @@ def _isfloat(x): except ValueError: return False + def beta_from_units(T, units): """Output value of beta from temperature and units. @@ -49,35 +51,29 @@ def beta_from_units(T, units): ------- beta : float Value of beta used to scale the potential energy. - + Raises ------ ValueError If unit string is not recognized. - + .. versionadded:: 1.?? """ - if units == "real": # E in kcal/mol, T in K + if units == "real": # E in kcal/mol, T in K beta = 1 / (R_kJmol * kJ2kcal * T) - elif units == "lj": # Nondimensional E and T scaled by epsilon + elif units == "lj": # Nondimensional E and T scaled by epsilon beta = 1 / T - elif units == "metal": # E in eV, T in K - beta = 1 / (R_kJmol * kJ2kcal * T) # NoteHere!!!! - elif units == "si": # E in J, T in K - beta = 1 / ( - constants.R * T * - constants.physical_constants["electron volt"][0] - ) - elif units == "cgs": # E in ergs, T in K + elif units == "metal": # E in eV, T in K + beta = 1 / (R_kJmol * kJ2kcal * T) # NoteHere!!!! + elif units == "si": # E in J, T in K + beta = 1 / (constants.R * T * constants.physical_constants["electron volt"][0]) + elif units == "cgs": # E in ergs, T in K beta = 1 / (constants.R * T * 1e-7) - elif units == "electron": # E in Hartrees, T in K - beta = 1 / ( - constants.R * T * - constants.physical_constants["Hartree energy"][0] - ) - elif units == "micro": # E in epicogram-micrometer^2/microsecond^2, T in K + elif units == "electron": # E in Hartrees, T in K + beta = 1 / (constants.R * T * constants.physical_constants["Hartree energy"][0]) + elif units == "micro": # E in epicogram-micrometer^2/microsecond^2, T in K beta = 1 / (constants.R * T * 1e-15) - elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, T in K + elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, T in K beta = 1 / (constants.R * T * 1e-21) else: raise ValueError( @@ -85,12 +81,12 @@ def beta_from_units(T, units): units ) ) - + return beta def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): - """ Pull a tuple representing the lambda values used, as defined by the filenames. + """Pull a tuple representing the lambda values used, as defined by the filenames. Parameters ---------- @@ -110,7 +106,7 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): .. versionadded:: 1.?? """ - + name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) if not _isfloat(name_array[indices[0]]): raise ValueError( @@ -120,11 +116,15 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): raise ValueError( f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" ) - return (round(float(name_array[indices[0]]), prec), round(float(name_array[indices[1]]), prec)) + return ( + round(float(name_array[indices[0]]), prec), + round(float(name_array[indices[1]]), prec), + ) + def _lambda_from_filename(filename, separator="_", index=-1, prec=4): - """ Pull the :math:`\lambda'` value, as defined by the filenames. - + """Pull the :math:`\lambda'` value, as defined by the filenames. + Here :math:`\lambda'` is the scaling value applied to a configuration that is equilibrated to a different value of :math:`\lambda`. @@ -153,6 +153,7 @@ def _lambda_from_filename(filename, separator="_", index=-1, prec=4): ) return round(float(name_array[index]), prec) + def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): """Retrieves all lambda values from FEP filenames. @@ -178,10 +179,17 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): .. versionadded:: 1.?? """ - lambda_pairs = [_tuple_from_filename(y, indices=indices, prec=prec) for y in fep_files] + lambda_pairs = [ + _tuple_from_filename(y, indices=indices, prec=prec) for y in fep_files + ] if len(indices) == 3: lambda2 = list( - set([_lambda_from_filename(y, index=indices[2], prec=prec) for y in fep_files]) + set( + [ + _lambda_from_filename(y, index=indices[2], prec=prec) + for y in fep_files + ] + ) ) if len(lambda2) > 1: raise ValueError( @@ -262,16 +270,16 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): @_init_attrs def extract_u_nk_from_u_n( - fep_files, - T, - column_lambda, - column_u_cross, - dependence=lambda x : (x), - units="real", - index=-1, - prec=4, + fep_files, + T, + column_lambda, + column_u_cross, + dependence=lambda x: (x), + units="real", + index=-1, + prec=4, ): - """ Produce u_nk from files containing u_n given a separable dependence on lambda. + """Produce u_nk from files containing u_n given a separable dependence on lambda. Parameters ---------- @@ -283,7 +291,7 @@ def extract_u_nk_from_u_n( columns_lambda : int Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated column_cross : int - Index for the column (file column number minus one) representing the potential energy of the cross interactions + Index for the column (file column number minus one) representing the potential energy of the cross interactions between the solute and solvent. dependence : func, default=`lambda x : (x)` Dependence of changing variable on the potential energy, which must be separable. @@ -295,7 +303,7 @@ def extract_u_nk_from_u_n( Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" prec : int, default=4 Number of decimal places defined used in ``round()`` function. - + Returns ------- u_nk_df : pandas.Dataframe @@ -306,7 +314,7 @@ def extract_u_nk_from_u_n( - temperature in K - energy unit in kT - + .. versionadded:: 1.?? """ # Collect Files @@ -326,8 +334,8 @@ def extract_u_nk_from_u_n( ) lambda_values = list( - set([_lambda_from_filename(y, index=index, prec=prec) for y in files]) - ) + set([_lambda_from_filename(y, index=index, prec=prec) for y in files]) + ) u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) @@ -346,9 +354,7 @@ def extract_u_nk_from_u_n( data = data.iloc[:, col_indices] data.columns = ["time", "fep-lambda", "u_cross"] lambda1_col = "fep-lambda" - data[[lambda1_col]] = data[[lambda1_col]].apply( - lambda x: round(x, prec) - ) + data[[lambda1_col]] = data[[lambda1_col]].apply(lambda x: round(x, prec)) for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] @@ -381,12 +387,19 @@ def extract_u_nk_from_u_n( ) u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] = ( - beta * tmp_df["u_cross"] * (dependence(lambda12) / dependence(lambda1) - 1) + beta + * tmp_df["u_cross"] + * (dependence(lambda12) / dependence(lambda1) - 1) ) - if lambda1 == lambda12 and u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0: - raise ValueError(f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," \ - " Check that the 'column_u_n' was defined correctly.") + if ( + lambda1 == lambda12 + and u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0 + ): + raise ValueError( + f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," + " Check that the 'column_u_n' was defined correctly." + ) u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -397,7 +410,7 @@ def extract_u_nk_from_u_n( def extract_u_nk( fep_files, T, - columns_lambda1=[1,2], + columns_lambda1=[1, 2], column_u_nk=3, column_lambda2=None, indices=[1, 2], @@ -409,11 +422,11 @@ def extract_u_nk( """This function will go into alchemlyb.parsing.lammps Each file is imported as a data frame where the columns kept are either:: - + [0, columns_lambda1[0] columns_lambda1[1], column_u_nk] - + or if columns_lambda2 is not None:: - + [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_u_nk] Parameters @@ -443,7 +456,7 @@ def extract_u_nk( Number of decimal places defined used in ``round()`` function. force : bool, default=False If ``True`` the dataframe will be created, even if not all lambda and lambda prime combinations are available. - + Results ------- u_nk_df : pandas.Dataframe @@ -482,7 +495,9 @@ def extract_u_nk( f"Provided column for u_nk must be type int. column_u_nk: {column_u_nk}, type: {type(column_u_nk)}" ) - lambda_values, _, lambda2 = _get_bar_lambdas(files, indices=indices, prec=prec, force=force) + lambda_values, _, lambda2 = _get_bar_lambdas( + files, indices=indices, prec=prec, force=force + ) if column_lambda2 is None: u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) @@ -509,9 +524,9 @@ def extract_u_nk( lambda1_col, lambda1_2_col = "fep-lambda", "fep-lambda2" columns_a = ["time", "fep-lambda"] columns_b = lambda_values - data[[lambda1_col, lambda1_2_col]] = data[[lambda1_col, lambda1_2_col]].apply( - lambda x: round(x, prec) - ) + data[[lambda1_col, lambda1_2_col]] = data[ + [lambda1_col, lambda1_2_col] + ].apply(lambda x: round(x, prec)) else: columns_a = ["time", "coul-lambda", "vdw-lambda"] if vdw_lambda == 1: @@ -538,9 +553,9 @@ def extract_u_nk( raise ValueError( f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'" ) - data[columns_a[1:]+[lambda1_2_col]] = data[columns_a[1:]+[lambda1_2_col]].apply( - lambda x: round(x, prec) - ) + data[columns_a[1:] + [lambda1_2_col]] = data[ + columns_a[1:] + [lambda1_2_col] + ].apply(lambda x: round(x, prec)) for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] @@ -567,21 +582,27 @@ def extract_u_nk( axis=0, sort=False, ) - - column_list = [ii for ii, x in enumerate(lambda_values) if round(float(x), prec) == lambda12] + + column_list = [ + ii + for ii, x in enumerate(lambda_values) + if round(float(x), prec) == lambda12 + ] if not column_list: - raise ValueError("Lambda values found in files do not align with those in the filenames. " \ - "Check that 'columns_lambda' are defined correctly.") + raise ValueError( + "Lambda values found in files do not align with those in the filenames. " + "Check that 'columns_lambda' are defined correctly." + ) else: column_name = lambda_values[column_list[0]] - + if column_lambda2 is not None: column_name = ( (lambda2, column_name) if vdw_lambda == 1 else (column_name, lambda2) ) - + if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != abs(0): raise ValueError( "Energy values already available for lambda, {}, lambda', {}. Check for a duplicate file.".format( @@ -606,9 +627,14 @@ def extract_u_nk( u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = ( beta * tmp_df2["u_nk"] ) - if lambda1 == lambda12 and u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0: - raise ValueError(f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," \ - " Check that 'column_u_nk' was defined correctly.") + if ( + lambda1 == lambda12 + and u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0 + ): + raise ValueError( + f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," + " Check that 'column_u_nk' was defined correctly." + ) if column_lambda2 is None: u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -624,13 +650,13 @@ def extract_dHdl_from_u_n( T, column_lambda=None, column_u_cross=None, - dependence=lambda x : (1/x), + dependence=lambda x: (1 / x), units="real", ): """Produce dHdl dataframe from sparated contributions of the potential energy. Each file is imported as a data frame where the columns are:: - + [0, column_lambda, column_solvent, column_solute, column_cross] Parameters @@ -646,7 +672,7 @@ def extract_dHdl_from_u_n( Index for the column (file column number minus one) representing the potential energy of the system dependence : func, default=`lambda x : (1/x)` Transform of lambda needed to convert the potential energy into the derivative of the potential energy with respect to lambda, which must be separable. - For example, for the LJ potential U = eps * f(sig, r), dU/deps = f(sig, r), so we need a dependence function of 1/eps to convert the + For example, for the LJ potential U = eps * f(sig, r), dU/deps = f(sig, r), so we need a dependence function of 1/eps to convert the potential energy to the derivative with respect to eps. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" @@ -654,7 +680,7 @@ def extract_dHdl_from_u_n( Results ------- dHdl : pandas.Dataframe - Dataframe of the derivative for the potential energy for each alchemical state (k) + Dataframe of the derivative for the potential energy for each alchemical state (k) for each frame (n). Note that the units for timestamps are not considered in the calculation. Attributes @@ -696,10 +722,10 @@ def extract_dHdl_from_u_n( ) data = data.iloc[:, col_indices] - + data.columns = ["time", "fep-lambda", "U"] data["fep"] = dependence(data.loc[:, "fep-lambda"]) * data.U - data.drop( columns=["U"], inplace=True) + data.drop(columns=["U"], inplace=True) dHdl = pd.concat([dHdl, data], axis=0, sort=False) @@ -724,11 +750,11 @@ def extract_dHdl( """This function will go into alchemlyb.parsing.lammps Each file is imported as a data frame where the columns kept are either:: - + [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] - + or if columns_lambda2 is not None:: - + [ 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], @@ -760,7 +786,7 @@ def extract_dHdl( Results ------- dHdl : pandas.Dataframe - Dataframe of the derivative for the potential energy for each alchemical state (k) + Dataframe of the derivative for the potential energy for each alchemical state (k) for each frame (n). Note that the units for timestamps are not considered in the calculation. Attributes @@ -909,11 +935,11 @@ def extract_H( """This function will go into alchemlyb.parsing.lammps Each file is imported as a data frame where the columns kept are either:: - + [0, column_lambda, column_dlambda1, columns_derivative[0], columns_derivative[1]] - + or if columns_lambda2 is not None:: - + [ 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], columns_derivative2[1] @@ -965,9 +991,7 @@ def extract_H( ) if not isinstance(column_pe, int): raise ValueError( - "Provided column_pe must be type 'int', instead: {}".format( - type(column_pe) - ) + "Provided column_pe must be type 'int', instead: {}".format(type(column_pe)) ) if column_lambda2 is not None and not isinstance(column_lambda2, int): raise ValueError( @@ -980,11 +1004,9 @@ def extract_H( df_H = pd.DataFrame(columns=["time", "fep-lambda", "U"]) col_indices = [0, column_lambda1, column_pe] else: - df_H = pd.DataFrame( - columns=["time", "coul-lambda", "vdw-lambda", "U"] - ) + df_H = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda", "U"]) col_indices = [0, column_lambda2, column_lambda1, column_pe] - + for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) @@ -1008,11 +1030,10 @@ def extract_H( ] df_H = pd.concat([df_H, data], axis=0, sort=False) - if column_lambda2 is None: df_H.set_index(["time", "fep-lambda"], inplace=True) else: df_H.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) df_H.mul({"U": beta}) - return df_H \ No newline at end of file + return df_H From 22d72ac2aa1afc22d0c48e0863a1ba92bfc1e9a4 Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 10 Jun 2024 16:15:39 -0400 Subject: [PATCH 20/59] Sort lambda values for consistent output --- src/alchemlyb/parsing/lammps.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index c12c01b6..1b38c02b 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -336,6 +336,7 @@ def extract_u_nk_from_u_n( lambda_values = list( set([_lambda_from_filename(y, index=index, prec=prec) for y in files]) ) + lambda_values = sorted(lambda_values) u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) @@ -653,9 +654,9 @@ def extract_dHdl_from_u_n( dependence=lambda x: (1 / x), units="real", ): - """Produce dHdl dataframe from sparated contributions of the potential energy. + """Produce dHdl dataframe from separated contributions of the potential energy. - Each file is imported as a data frame where the columns are:: + Each file is imported as a dataframe where the columns are: [0, column_lambda, column_solvent, column_solute, column_cross] @@ -669,7 +670,7 @@ def extract_dHdl_from_u_n( columns_lambda : int, default=None Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated column_u : int, default=None - Index for the column (file column number minus one) representing the potential energy of the system + Index for the column (file column number minus one) representing the cross interaction potential energy of the system dependence : func, default=`lambda x : (1/x)` Transform of lambda needed to convert the potential energy into the derivative of the potential energy with respect to lambda, which must be separable. For example, for the LJ potential U = eps * f(sig, r), dU/deps = f(sig, r), so we need a dependence function of 1/eps to convert the From 5f8fb716d93efc7a7aa4794766f5aa76313aa551 Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 9 Jul 2024 13:04:49 -0400 Subject: [PATCH 21/59] Update lammps.extract_dHdl defaults --- src/alchemlyb/parsing/lammps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 1b38c02b..8265a2a6 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -744,8 +744,8 @@ def extract_dHdl( column_dlambda1=2, column_lambda2=None, column_dlambda2=None, - columns_derivative1=[10, 11], - columns_derivative2=[12, 13], + columns_derivative1=[11, 10], + columns_derivative2=[13, 12], units="real", ): """This function will go into alchemlyb.parsing.lammps @@ -778,9 +778,9 @@ def extract_dHdl( If this array is ``None`` then we do not expect two lambda values. column_dlambda2 : int, default=None Index for column (column number minus one) for the change in lambda2. - columns_derivative : list[int], default=[10,11] + columns_derivative : list[int], default=[11, 10] Indices for columns (column number minus one) representing the lambda at which to find the forward - and backward distance. + and backward distance respectively. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" From 23bc5c52a01fb419f1d0e8d111384dd7be4264af Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 15 Jul 2024 08:59:22 -0400 Subject: [PATCH 22/59] Update error for lambda values --- src/alchemlyb/parsing/lammps.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 8265a2a6..76522b15 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -592,7 +592,8 @@ def extract_u_nk( if not column_list: raise ValueError( "Lambda values found in files do not align with those in the filenames. " - "Check that 'columns_lambda' are defined correctly." + "Check that 'columns_lambda' or 'prec' are defined correctly. lambda" + " file: {}; lambda columns: {}".format(lambda12, lambda_values) ) else: column_name = lambda_values[column_list[0]] From d8e7092b35973d7b09d39c353211864e2bc98ddb Mon Sep 17 00:00:00 2001 From: jac16 Date: Mon, 15 Jul 2024 09:57:39 -0400 Subject: [PATCH 23/59] Update extract_dHdl* to use prec --- src/alchemlyb/parsing/lammps.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 76522b15..aff21fba 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -654,6 +654,7 @@ def extract_dHdl_from_u_n( column_u_cross=None, dependence=lambda x: (1 / x), units="real", + prec=4, ): """Produce dHdl dataframe from separated contributions of the potential energy. @@ -678,6 +679,8 @@ def extract_dHdl_from_u_n( potential energy to the derivative with respect to eps. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + prec : int, default=4 + Number of decimal places defined used in ``round()`` function. Results ------- @@ -726,6 +729,7 @@ def extract_dHdl_from_u_n( data = data.iloc[:, col_indices] data.columns = ["time", "fep-lambda", "U"] + data["fep-lambda"] = data["fep-lambda"].apply(lambda x: round(x, prec)) data["fep"] = dependence(data.loc[:, "fep-lambda"]) * data.U data.drop(columns=["U"], inplace=True) @@ -748,6 +752,7 @@ def extract_dHdl( columns_derivative1=[11, 10], columns_derivative2=[13, 12], units="real", + prec=4, ): """This function will go into alchemlyb.parsing.lammps @@ -784,6 +789,8 @@ def extract_dHdl( and backward distance respectively. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + prec : int, default=4 + Number of decimal places defined used in ``round()`` function. Results ------- @@ -884,6 +891,7 @@ def extract_dHdl( if column_lambda2 is None: # dU_back: U(l-dl) - U(l); dU_forw: U(l+dl) - U(l) data.columns = ["time", "fep-lambda", "dlambda", "dU_back", "dU_forw"] + data["fep-lambda"] = data["fep-lambda"].apply(lambda x: round(x, prec)) data["fep"] = (data.dU_forw - data.dU_back) / (2 * data.dlambda) data.drop(columns=["dlambda", "dU_back", "dU_forw"], inplace=True) else: @@ -898,6 +906,7 @@ def extract_dHdl( "dU_back_coul", "dU_forw_coul", ] + data["vdw-lambda"] = data["vdw-lambda"].apply(lambda x: round(x, prec)) data["coul"] = (data.dU_forw_coul - data.dU_back_coul) / ( 2 * data.dlambda_coul ) From 809c6f0173f1632208ff167c758d6fe346e0acb0 Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 24 Jul 2024 15:28:43 -0400 Subject: [PATCH 24/59] update parsing.lammps.py docstrings --- src/alchemlyb/parsing/lammps.py | 97 ++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 39 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index aff21fba..5d17dd94 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -14,7 +14,7 @@ LAMMPS, `fix adapt/fep `_ changes :math:`\lambda` and `compute fep `_ changes :math:`\lambda'`. -.. versionadded:: 1.0.0 +.. versionadded:: 2.4.0 """ @@ -40,6 +40,8 @@ def _isfloat(x): def beta_from_units(T, units): """Output value of beta from temperature and units. + Supported types are: cgs, electron, lj. metal, micro, nano, real, si + Parameters ---------- T : float @@ -57,14 +59,14 @@ def beta_from_units(T, units): ValueError If unit string is not recognized. - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ if units == "real": # E in kcal/mol, T in K beta = 1 / (R_kJmol * kJ2kcal * T) elif units == "lj": # Nondimensional E and T scaled by epsilon beta = 1 / T elif units == "metal": # E in eV, T in K - beta = 1 / (R_kJmol * kJ2kcal * T) # NoteHere!!!! + beta = 1 / (R_kJmol * kJ2kcal * T) elif units == "si": # E in J, T in K beta = 1 / (constants.R * T * constants.physical_constants["electron volt"][0]) elif units == "cgs": # E in ergs, T in K @@ -77,7 +79,8 @@ def beta_from_units(T, units): beta = 1 / (constants.R * T * 1e-21) else: raise ValueError( - "LAMMPS unit type, {}, is not supported. Supported types are: real and lj".format( + "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," + " lj. metal, micro, nano, real, si".format( units ) ) @@ -104,18 +107,23 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): tuple[float] Tuple of lambda values - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) - if not _isfloat(name_array[indices[0]]): + try: + float(name_array[indices[0]]) + except ValueError: raise ValueError( - f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" - ) - if not _isfloat(name_array[indices[1]]): + f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" + ( + try: + float(name_array[indices[1]]) + except ValueError: raise ValueError( - f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" + f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" ) + return ( round(float(name_array[indices[0]]), prec), round(float(name_array[indices[1]]), prec), @@ -144,10 +152,12 @@ def _lambda_from_filename(filename, separator="_", index=-1, prec=4): float Lambda prime value - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) - if not _isfloat(name_array[index]): + try: + float(name_array[index]) + except: raise ValueError( f"Entry, {index} in filename cannot be converted to float: {name_array[index]}" ) @@ -176,7 +186,7 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): lambda_pairs : list List of tuples containing two floats, lambda and lambda'. - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ lambda_pairs = [ @@ -275,22 +285,22 @@ def extract_u_nk_from_u_n( column_lambda, column_u_cross, dependence=lambda x: (x), - units="real", index=-1, + units="real", prec=4, ): """Produce u_nk from files containing u_n given a separable dependence on lambda. Parameters ---------- - filenames : str + fep_files : str Path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*.txt". - temperature : float + T : float Temperature in Kelvin at which the simulation was sampled. columns_lambda : int Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated - column_cross : int + column_u_cross : int Index for the column (file column number minus one) representing the potential energy of the cross interactions between the solute and solvent. dependence : func, default=`lambda x : (x)` @@ -300,7 +310,8 @@ def extract_u_nk_from_u_n( containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` this list should be of length three, where the last value represents the invariant lambda. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. @@ -315,7 +326,7 @@ def extract_u_nk_from_u_n( - temperature in K - energy unit in kT - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ # Collect Files files = glob.glob(fep_files) @@ -420,7 +431,7 @@ def extract_u_nk( prec=4, force=False, ): - """This function will go into alchemlyb.parsing.lammps + """Return reduced potentials `u_nk` from LAMMPS dump file(s). Each file is imported as a data frame where the columns kept are either:: @@ -432,10 +443,10 @@ def extract_u_nk( Parameters ---------- - filenames : str + fep_files : str Path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". - temperature : float + T : float Temperature in Kelvin at which the simulation was sampled. columns_lambda1 : list[int], default=[1,2] Indices for columns (column number minus one) representing (1) the lambda at which the system is equilibrated and (2) the lambda used @@ -450,7 +461,8 @@ def extract_u_nk( containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` this list should be of length three, where the last value represents the invariant lambda. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + "real", "si" vdw_lambda : int, default=1 In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. prec : int, default=4 @@ -469,7 +481,7 @@ def extract_u_nk( - temperature in K - energy unit in kT - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ # Collect Files @@ -664,21 +676,22 @@ def extract_dHdl_from_u_n( Parameters ---------- - filenames : str + fep_files : str Path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. columns_lambda : int, default=None Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated - column_u : int, default=None + column_u_cross : int, default=None Index for the column (file column number minus one) representing the cross interaction potential energy of the system dependence : func, default=`lambda x : (1/x)` Transform of lambda needed to convert the potential energy into the derivative of the potential energy with respect to lambda, which must be separable. For example, for the LJ potential U = eps * f(sig, r), dU/deps = f(sig, r), so we need a dependence function of 1/eps to convert the potential energy to the derivative with respect to eps. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. @@ -693,7 +706,7 @@ def extract_dHdl_from_u_n( - temperature in K or dimensionless - energy unit in kT - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ # Collect Files @@ -754,7 +767,7 @@ def extract_dHdl( units="real", prec=4, ): - """This function will go into alchemlyb.parsing.lammps + """Return reduced potentials `dHdl` from LAMMPS dump file(s). Each file is imported as a data frame where the columns kept are either:: @@ -770,10 +783,10 @@ def extract_dHdl( Parameters ---------- - filenames : str + fep_files : str Path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". - temperature : float + T : float Temperature in Kelvin at which the simulation was sampled. column_lambda1 : int, default=2 Index for column (column number minus one) representing the lambda at which the system is equilibrated. @@ -784,11 +797,15 @@ def extract_dHdl( If this array is ``None`` then we do not expect two lambda values. column_dlambda2 : int, default=None Index for column (column number minus one) for the change in lambda2. - columns_derivative : list[int], default=[11, 10] + columns_derivative1 : list[int], default=[11, 10] Indices for columns (column number minus one) representing the lambda at which to find the forward and backward distance respectively. + columns_derivative2 : list[int], default=[13, 12] + Indices for columns (column number minus one) representing the second value of lambda at which to find the forward + and backward distance respectively. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. @@ -803,7 +820,7 @@ def extract_dHdl( - temperature in K or dimensionless - energy unit in kT - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ # Collect Files @@ -943,7 +960,7 @@ def extract_H( column_lambda2=None, units="real", ): - """This function will go into alchemlyb.parsing.lammps + """Return reduced potentials Hamiltonian from LAMMPS dump file(s). Each file is imported as a data frame where the columns kept are either:: @@ -958,10 +975,10 @@ def extract_H( Parameters ---------- - filenames : str + fep_files : str Path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". - temperature : float + T : float Temperature in Kelvin at which the simulation was sampled. column_lambda1 : int, default=2 Index for column (column number minus one) representing the lambda at which the system is equilibrated. @@ -971,7 +988,8 @@ def extract_H( Index for column (column number minus one) for a second value of lambda. If this array is ``None`` then we do not expect two lambda values. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "real" and "lj" + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + "real", "si" Results ------- @@ -984,7 +1002,7 @@ def extract_H( - temperature in K or dimensionless - energy unit in kT - .. versionadded:: 1.?? + .. versionadded:: 2.4.0 """ # Collect Files @@ -1048,3 +1066,4 @@ def extract_H( df_H.mul({"U": beta}) return df_H + From bdb14c6aaf2e2d8d81f55d51750b9c2b2e0ca944 Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 24 Jul 2024 15:39:19 -0400 Subject: [PATCH 25/59] Run Black --- src/alchemlyb/parsing/lammps.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 5d17dd94..62b56490 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -80,9 +80,7 @@ def beta_from_units(T, units): else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," - " lj. metal, micro, nano, real, si".format( - units - ) + " lj. metal, micro, nano, real, si".format(units) ) return beta @@ -115,13 +113,14 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): float(name_array[indices[0]]) except ValueError: raise ValueError( - f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" - ( + f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" + ) + try: float(name_array[indices[1]]) except ValueError: raise ValueError( - f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" + f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" ) return ( @@ -310,7 +309,7 @@ def extract_u_nk_from_u_n( containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` this list should be of length three, where the last value represents the invariant lambda. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. @@ -461,7 +460,7 @@ def extract_u_nk( containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` this list should be of length three, where the last value represents the invariant lambda. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" vdw_lambda : int, default=1 In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. @@ -690,7 +689,7 @@ def extract_dHdl_from_u_n( For example, for the LJ potential U = eps * f(sig, r), dU/deps = f(sig, r), so we need a dependence function of 1/eps to convert the potential energy to the derivative with respect to eps. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. @@ -804,7 +803,7 @@ def extract_dHdl( Indices for columns (column number minus one) representing the second value of lambda at which to find the forward and backward distance respectively. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. @@ -988,7 +987,7 @@ def extract_H( Index for column (column number minus one) for a second value of lambda. If this array is ``None`` then we do not expect two lambda values. units : str, default="real" - Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", + Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" Results @@ -1066,4 +1065,3 @@ def extract_H( df_H.mul({"U": beta}) return df_H - From fe94f2abf88ecac6129767d8de1a99284954ddee Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 21 Aug 2024 10:18:17 -0400 Subject: [PATCH 26/59] Add ensemble support --- src/alchemlyb/parsing/lammps.py | 234 ++++++++++++++++++++------------ 1 file changed, 147 insertions(+), 87 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 62b56490..36b48b92 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -282,11 +282,15 @@ def extract_u_nk_from_u_n( fep_files, T, column_lambda, - column_u_cross, + column_U, + column_U_cross, dependence=lambda x: (x), index=-1, units="real", prec=4, + ensemble="nvt", + pressure=None, + column_volume=4, ): """Produce u_nk from files containing u_n given a separable dependence on lambda. @@ -299,7 +303,9 @@ def extract_u_nk_from_u_n( Temperature in Kelvin at which the simulation was sampled. columns_lambda : int Indices for columns (file column number minus one) representing the lambda at which the system is equilibrated - column_u_cross : int + column_U : int + Index for the column (file column number minus one) representing the potential energy of the system. + column_U_cross : int Index for the column (file column number minus one) representing the potential energy of the cross interactions between the solute and solvent. dependence : func, default=`lambda x : (x)` @@ -313,6 +319,14 @@ def extract_u_nk_from_u_n( "real", "si" prec : int, default=4 Number of decimal places defined used in ``round()`` function. + ensemble : str, default="nvt" + Ensemble from which the given data was generated. Either "nvt" or "npt" is supported where values from NVT are + unaltered, while those from NPT are corrected + pressure : float, default=None + The pressure of the system in the NPT ensemble in units of energy / volume, where the units of energy and volume + are as recorded in the LAMMPS dump file. + column_volume : int, default=4 + The column for the volume in a LAMMPS dump file. Returns ------- @@ -331,6 +345,15 @@ def extract_u_nk_from_u_n( files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") + + if ensemble == "npt": + if pressure is None or not isinstance(pressure, float) or pressure < 0: + raise ValueError("In the npt ensemble, a pressure must be provided in the form of a positive float") + elif ensemble != "nvt": + raise ValueError("Only ensembles of nvt or npt are supported.") + else: + if pressure is not None: + raise ValueError("There is no volume correction in the nvt ensemble, the pressure value will not be used.") beta = beta_from_units(T, units) @@ -338,9 +361,13 @@ def extract_u_nk_from_u_n( raise ValueError( f"Provided column for lambda must be type int. column_u_lambda: {column_lambda}, type: {type(column_lambda)}" ) - if not isinstance(column_u_cross, int): + if not isinstance(column_U_cross, int): raise ValueError( - f"Provided column for u_cross must be type int. column_u_cross: {column_u_cross}, type: {type(column_u_cross)}" + f"Provided column for `U_cross` must be type int. column_U_cross: {column_U_cross}, type: {type(column_U_cross)}" + ) + if not isinstance(column_U, int): + raise ValueError( + f"Provided column for `U` must be type int. column_U: {column_U}, type: {type(column_U)}" ) lambda_values = list( @@ -350,25 +377,31 @@ def extract_u_nk_from_u_n( u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) - col_indices = [0, column_lambda, column_u_cross] + col_indices = [0, column_lambda, column_U, column_U_cross] + if ensemble == "npt": + col_indices.append(column_volume) for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - data = pd.read_csv(file, sep=" ", comment="#", header=None) - lx = len(data.columns) - if [False for x in col_indices if x > lx]: + tmp_data = pd.read_csv(file, sep=" ", comment="#", header=None) + lx = len(tmp_data.columns) + if [False for x in col_indices if x >= lx]: raise ValueError( "Number of columns, {}, is less than index: {}".format(lx, col_indices) ) - data = data.iloc[:, col_indices] - data.columns = ["time", "fep-lambda", "u_cross"] + data = tmp_data.iloc[:, col_indices] + columns = ["time", "fep-lambda", "U", "U_cross"] + if ensemble == "npt": + columns.append("volume") + data.columns = columns lambda1_col = "fep-lambda" - data[[lambda1_col]] = data[[lambda1_col]].apply(lambda x: round(x, prec)) + data.loc[:, [lambda1_col]] = data[[lambda1_col]].apply(lambda x: round(x, prec)) for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] + lr = tmp_df.shape[0] for lambda12 in lambda_values: if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: @@ -396,21 +429,17 @@ def extract_u_nk_from_u_n( lambda1, lambda12 ) ) - + u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] = ( - beta - * tmp_df["u_cross"] - * (dependence(lambda12) / dependence(lambda1) - 1) - ) - - if ( - lambda1 == lambda12 - and u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0 - ): - raise ValueError( - f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," - " Check that the 'column_u_n' was defined correctly." + beta * ( + tmp_df["U_cross"]* (dependence(lambda12) / dependence(lambda1) - 1) + + tmp_df["U"] ) + ) + if ensemble == "npt": + u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] += ( + beta * pressure * tmp_df["volume"] + ) u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -422,23 +451,31 @@ def extract_u_nk( fep_files, T, columns_lambda1=[1, 2], - column_u_nk=3, + column_dU=4, + column_U=3, column_lambda2=None, indices=[1, 2], units="real", vdw_lambda=1, + ensemble="nvt", + pressure=None, + column_volume=6, prec=4, force=False, + ): """Return reduced potentials `u_nk` from LAMMPS dump file(s). Each file is imported as a data frame where the columns kept are either:: - [0, columns_lambda1[0] columns_lambda1[1], column_u_nk] + [0, columns_lambda1[0] columns_lambda1[1], column_U, column_dU] or if columns_lambda2 is not None:: - [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_u_nk] + [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_U, column_dU] + + If the simulation took place in the NPT ensemble, column_volume is appended to the end + of this list. Parameters ---------- @@ -450,7 +487,9 @@ def extract_u_nk( columns_lambda1 : list[int], default=[1,2] Indices for columns (column number minus one) representing (1) the lambda at which the system is equilibrated and (2) the lambda used in the computation of the potential energy. - column_u_nk : int, default=4 + column_dU : int, default=4 + Index for the column (column number minus one) representing the difference in potential energy between lambda states + column_U : int, default=4 Index for the column (column number minus one) representing the potential energy column_lambda2 : int Index for column (column number minus one) for the unchanging value of lambda for another potential. @@ -464,6 +503,14 @@ def extract_u_nk( "real", "si" vdw_lambda : int, default=1 In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. + ensemble : str, default="nvt" + Ensemble from which the given data was generated. Either "nvt" or "npt" is supported where values from NVT are + unaltered, while those from NPT are corrected + pressure : float, default=None + The pressure of the system in the NPT ensemble in units of energy / volume, where the units of energy and volume + are as recorded in the LAMMPS dump file. + column_volume : int, default=4 + The column for the volume in a LAMMPS dump file. prec : int, default=4 Number of decimal places defined used in ``round()`` function. force : bool, default=False @@ -487,6 +534,15 @@ def extract_u_nk( files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") + + if ensemble == "npt": + if pressure is None or not isinstance(pressure, float) or pressure < 0: + raise ValueError("In the npt ensemble, a pressure must be provided in the form of a positive float") + elif ensemble != "nvt": + raise ValueError("Only ensembles of nvt or npt are supported.") + else: + if pressure is not None: + raise ValueError("There is no volume correction in the nvt ensemble, the pressure value will not be used.") beta = beta_from_units(T, units) @@ -502,9 +558,13 @@ def extract_u_nk( raise ValueError( f"Provided column for lambda must be type int. column_lambda2: {column_lambda2}, type: {type(column_lambda2)}" ) - if not isinstance(column_u_nk, int): + if not isinstance(column_dU, int): raise ValueError( - f"Provided column for u_nk must be type int. column_u_nk: {column_u_nk}, type: {type(column_u_nk)}" + f"Provided column for dU_nk must be type int. column_dU: {column_dU}, type: {type(column_dU)}" + ) + if not isinstance(column_U, int): + raise ValueError( + f"Provided column for U must be type int. column_U: {column_U}, type: {type(column_U)}" ) lambda_values, _, lambda2 = _get_bar_lambdas( @@ -514,61 +574,75 @@ def extract_u_nk( if column_lambda2 is None: u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) - col_indices = [0] + list(columns_lambda1) + [column_u_nk] + col_indices = [0] + list(columns_lambda1) + [column_U, column_dU] else: u_nk = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda"]) lc = len(lambda_values) ** 2 - col_indices = [0] + list(columns_lambda1) + [column_lambda2, column_u_nk] + col_indices = [0] + list(columns_lambda1) + [column_lambda2, column_U, column_dU] + if ensemble == "npt": + col_indices.append(column_volume) + for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) - data = pd.read_csv(file, sep=" ", comment="#", header=None) - lx = len(data.columns) - if [False for x in col_indices if x > lx]: + tmp_data = pd.read_csv(file, sep=" ", comment="#", header=None) + lx = len(tmp_data.columns) + if [False for x in col_indices if x >= lx]: raise ValueError( "Number of columns, {}, is less than index: {}".format(lx, col_indices) ) - data = data.iloc[:, col_indices] + data = tmp_data.iloc[:, col_indices] if column_lambda2 is None: - data.columns = ["time", "fep-lambda", "fep-lambda2", "u_nk"] + columns = ["time", "fep-lambda", "fep-lambda2", "U", "dU_nk"] + if ensemble == "npt": + columns.append("volume") + data.columns = columns lambda1_col, lambda1_2_col = "fep-lambda", "fep-lambda2" columns_a = ["time", "fep-lambda"] columns_b = lambda_values - data[[lambda1_col, lambda1_2_col]] = data[ + data.loc[:, [lambda1_col, lambda1_2_col]] = data[ [lambda1_col, lambda1_2_col] ].apply(lambda x: round(x, prec)) else: columns_a = ["time", "coul-lambda", "vdw-lambda"] if vdw_lambda == 1: - data.columns = [ + columns = [ "time", "vdw-lambda", "vdw-lambda2", "coul-lambda", - "u_nk", + "U", + "dU_nk", ] + if ensemble == "npt": + columns.append("volume") + data.columns = columns lambda1_col, lambda1_2_col = "vdw-lambda", "vdw-lambda2" columns_b = [(lambda2, x) for x in lambda_values] elif vdw_lambda == 2: - data.columns = [ + columns = [ "time", "coul-lambda", "coul-lambda2", "vdw-lambda", - "u_nk", + "U", + "dU_nk", ] + if ensemble == "npt": + columns.append("volume") + data.columns = columns lambda1_col, lambda1_2_col = "coul-lambda", "coul-lambda2" columns_b = [(x, lambda2) for x in lambda_values] else: raise ValueError( f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'" ) - data[columns_a[1:] + [lambda1_2_col]] = data[ + data.loc[:, columns_a[1:] + [lambda1_2_col]] = data[ columns_a[1:] + [lambda1_2_col] ].apply(lambda x: round(x, prec)) - + for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] @@ -625,28 +699,32 @@ def extract_u_nk( if ( u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] - != tmp_df2["u_nk"].shape[0] + != tmp_df2["dU_nk"].shape[0] ): raise ValueError( "Number of energy values in file, {}, N={}, inconsistent with previous files of length, {}.".format( file, - tmp_df2["u_nk"].shape[0], + tmp_df2["dU_nk"].shape[0], u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[ 0 ], ) ) - - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = ( - beta * tmp_df2["u_nk"] - ) if ( lambda1 == lambda12 - and u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != 0 + and not np.all(tmp_df2["dU_nk"][0] == 0) ): raise ValueError( - f"The difference in PE should be zero when lambda = lambda', {lambda1} = {lambda12}," - " Check that 'column_u_nk' was defined correctly." + f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," + " Check that 'column_dU' was defined correctly." + ) + # calculate reduced potential u_k = dH + pV + U + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = ( + beta * (tmp_df2["dU_nk"] + tmp_df2["U"]) + ) + if ensemble == "npt": + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] += ( + beta * pressure * tmp_df2["volume"] ) if column_lambda2 is None: @@ -733,7 +811,7 @@ def extract_dHdl_from_u_n( data = pd.read_csv(file, sep=" ", comment="#", header=None) lx = len(data.columns) - if [False for x in col_indices if x > lx]: + if [False for x in col_indices if x >= lx]: raise ValueError( "Number of columns, {}, is less than index: {}".format(lx, col_indices) ) @@ -761,8 +839,7 @@ def extract_dHdl( column_dlambda1=2, column_lambda2=None, column_dlambda2=None, - columns_derivative1=[11, 10], - columns_derivative2=[13, 12], + columns_derivative=[8, 7], units="real", prec=4, ): @@ -776,8 +853,7 @@ def extract_dHdl( [ 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, - columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], - columns_derivative2[1] + columns_derivative[0], columns_derivative[1], ] Parameters @@ -796,12 +872,9 @@ def extract_dHdl( If this array is ``None`` then we do not expect two lambda values. column_dlambda2 : int, default=None Index for column (column number minus one) for the change in lambda2. - columns_derivative1 : list[int], default=[11, 10] - Indices for columns (column number minus one) representing the lambda at which to find the forward - and backward distance respectively. - columns_derivative2 : list[int], default=[13, 12] - Indices for columns (column number minus one) representing the second value of lambda at which to find the forward - and backward distance respectively. + columns_derivative : list[int], default=[8, 7] + Indices for columns (column number minus one) representing the the forward + and backward derivative respectively. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" @@ -854,42 +927,29 @@ def extract_dHdl( ) ) - if len(columns_derivative1) != 2: - raise ValueError( - "Provided columns for derivative values must have a length of two, columns_derivative1: {}".format( - columns_derivative1 - ) - ) - if not np.all([isinstance(x, int) for x in columns_derivative1]): - raise ValueError( - "Provided column for columns_derivative1 must be type int. columns_derivative1: {}, type: {}".format( - columns_derivative1, type([type(x) for x in columns_derivative1]) - ) - ) - if len(columns_derivative2) != 2: + if len(columns_derivative) != 2: raise ValueError( - "Provided columns for derivative values must have a length of two, columns_derivative2: {}".format( - columns_derivative2 + "Provided columns for derivative values must have a length of two, columns_derivative: {}".format( + columns_derivative ) ) - if not np.all([isinstance(x, int) for x in columns_derivative2]): + if not np.all([isinstance(x, int) for x in columns_derivative]): raise ValueError( - "Provided column for columns_derivative1 must be type int. columns_derivative1: {}, type: {}".format( - columns_derivative2, type([type(x) for x in columns_derivative2]) + "Provided column for columns_derivative must be type int. columns_derivative: {}, type: {}".format( + columns_derivative, type([type(x) for x in columns_derivative]) ) ) if column_lambda2 is None: dHdl = pd.DataFrame(columns=["time", "fep-lambda", "fep"]) - col_indices = [0, column_lambda1, column_dlambda1] + list(columns_derivative1) + col_indices = [0, column_lambda1, column_dlambda1] + list(columns_derivative) else: dHdl = pd.DataFrame( columns=["time", "coul-lambda", "vdw-lambda", "coul", "vdw"] ) col_indices = ( [0, column_lambda2, column_lambda1, column_dlambda1, column_dlambda2] - + list(columns_derivative1) - + list(columns_derivative2) + + list(columns_derivative) ) for file in files: @@ -898,7 +958,7 @@ def extract_dHdl( data = pd.read_csv(file, sep=" ", comment="#", header=None) lx = len(data.columns) - if [False for x in col_indices if x > lx]: + if [False for x in col_indices if x >= lx]: raise ValueError( "Number of columns, {}, is less than index: {}".format(lx, col_indices) ) @@ -1041,7 +1101,7 @@ def extract_H( data = pd.read_csv(file, sep=" ", comment="#", header=None) lx = len(data.columns) - if [False for x in col_indices if x > lx]: + if [False for x in col_indices if x >= lx]: raise ValueError( "Number of columns, {}, is less than index: {}".format(lx, col_indices) ) From fa5a539acc0db32c7437b843273b7314d6b79468 Mon Sep 17 00:00:00 2001 From: jac16 Date: Fri, 23 Aug 2024 14:43:25 -0400 Subject: [PATCH 27/59] Bug fix, inputs for extract_dHdl --- src/alchemlyb/parsing/lammps.py | 46 ++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 36b48b92..a9e6fc89 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -645,7 +645,7 @@ def extract_u_nk( for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] - + for lambda12 in list(tmp_df[lambda1_2_col].unique()): tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12] @@ -1014,10 +1014,13 @@ def extract_dHdl( def extract_H( fep_files, T, - column_lambda1=2, + column_lambda1=1, column_pe=5, column_lambda2=None, units="real", + ensemble="nvt", + pressure=None, + column_volume=6, ): """Return reduced potentials Hamiltonian from LAMMPS dump file(s). @@ -1029,7 +1032,7 @@ def extract_H( [ 0, column_lambda, column_dlambda1, column_lambda2, column_dlambda2, - columns_derivative1[0], columns_derivative1[1], columns_derivative2[0], columns_derivative2[1] + columns_derivative1[0], columns_derivative1[1] ] Parameters @@ -1049,6 +1052,14 @@ def extract_H( units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" + ensemble : str, default="nvt" + Ensemble from which the given data was generated. Either "nvt" or "npt" is supported where values from NVT are + unaltered, while those from NPT are corrected + pressure : float, default=None + The pressure of the system in the NPT ensemble in units of energy / volume, where the units of energy and volume + are as recorded in the LAMMPS dump file. + column_volume : int, default=4 + The column for the volume in a LAMMPS dump file. Results ------- @@ -1069,6 +1080,15 @@ def extract_H( if not files: raise ValueError("No files have been found that match: {}".format(fep_files)) + if ensemble == "npt": + if pressure is None or not isinstance(pressure, float) or pressure < 0: + raise ValueError("In the npt ensemble, a pressure must be provided in the form of a positive float") + elif ensemble != "nvt": + raise ValueError("Only ensembles of nvt or npt are supported.") + else: + if pressure is not None: + raise ValueError("There is no volume correction in the nvt ensemble, the pressure value will not be used.") + beta = beta_from_units(T, units) if not isinstance(column_lambda1, int): @@ -1089,11 +1109,15 @@ def extract_H( ) if column_lambda2 is None: - df_H = pd.DataFrame(columns=["time", "fep-lambda", "U"]) + columns = ["time", "fep-lambda", "u_n"] col_indices = [0, column_lambda1, column_pe] else: - df_H = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda", "U"]) + columns = ["time", "coul-lambda", "vdw-lambda", "u_n"] col_indices = [0, column_lambda2, column_lambda1, column_pe] + + if ensemble == "npt": + col_indices.append(column_volume) + df_H = pd.DataFrame(columns=columns) for file in files: if not os.path.isfile(file): @@ -1108,14 +1132,22 @@ def extract_H( data = data.iloc[:, col_indices] if column_lambda2 is None: - data.columns = ["time", "fep-lambda", "U"] + columns = ["time", "fep-lambda", "U"] else: - data.columns = [ + columns = [ "time", "coul-lambda", "vdw-lambda", "U", ] + if ensemble == "npt": + columns.append("volume") + data.columns = columns + data["u_n"] = beta * data["U"] + del data["U"] + if ensemble == "npt": + data["u_n"] += beta * pressure * data["volume"] + del data["volume"] df_H = pd.concat([df_H, data], axis=0, sort=False) if column_lambda2 is None: From 8da4b99e51643e5dbca9d57f7480e9ab587586ee Mon Sep 17 00:00:00 2001 From: jac16 Date: Fri, 30 Aug 2024 10:46:33 -0400 Subject: [PATCH 28/59] Add LAMMPS unit conversion for pV term in npt u_r --- src/alchemlyb/parsing/lammps.py | 62 +++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index a9e6fc89..2f103913 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -85,6 +85,54 @@ def beta_from_units(T, units): return beta +def energy_from_units(units): + """Output conversion factor for pressure * volume to LAMMPS energy units + + Supported types are: cgs, electron, lj. metal, micro, nano, real, si + + Parameters + ---------- + units : str + LAMMPS style unit + + Returns + ------- + conversion_factor : float + Conversion factor for pressure * volume to LAMMPS energy units + + Raises + ------ + ValueError + If unit string is not recognized. + + .. versionadded:: 2.4.0 + """ + if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm + beta = constants.atm * constants.angstrom**3 / 1e+3 * kJ2kcal * constants.N_A + elif units == "lj": # Nondimensional E scaled by epsilon + beta = 1 + elif units == "metal": # E in eV, vol in Å^3, pressure in bar + beta = constants.bar * constants.angstrom**3 / constants.eV + elif units == "si": # E in J, vol in m^3, pressure in Pa + beta = 1 + elif units == "cgs": # E in ergs, vol in cm^3, pressure in dyne/cm^2 + beta = 1 + elif units == "electron": # E in Hartrees, vol in Bohr^3, pressure in Pa + Hartree2J = 4.3597447222060e-8 + Bohr2m = 5.29177210544e+11 + beta = 1 / Hartree2J / Bohr2m**3 + elif units == "micro": # E in picogram-micrometer^2/microsecond^2, vol in um^3, pressure in picogram/(micrometer-microsecond^2) + beta = 1 + elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) + beta = 1 + else: + raise ValueError( + "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," + " lj. metal, micro, nano, real, si".format(units) + ) + + return beta + def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): """Pull a tuple representing the lambda values used, as defined by the filenames. @@ -218,12 +266,12 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): raise ValueError("Lambda values must be positive: {}".format(lambda_values)) # check that all needed lamba combinations are present - lamda_dict = {x: [y[1] for y in lambda_pairs if y[0] == x] for x in lambda_values} + lambda_dict = {x: [y[1] for y in lambda_pairs if y[0] == x] for x in lambda_values} # Check for MBAR content missing_combinations_mbar = [] missing_combinations_bar = [] - for lambda_value, lambda_array in lamda_dict.items(): + for lambda_value, lambda_array in lambda_dict.items(): missing_combinations_mbar.extend( [(lambda_value, x) for x in lambda_values if x not in lambda_array] ) @@ -241,10 +289,10 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): missing_combinations_bar = [] extra_combinations_bar = [] lambda_values.sort() - for ind, (lambda_value, lambda_array) in enumerate(lamda_dict.items()): + for ind, (lambda_value, lambda_array) in enumerate(lambda_dict.items()): if ind == 0: tmp_array = [lambda_values[ind], lambda_values[ind + 1]] - elif ind == len(lamda_dict) - 1: + elif ind == len(lambda_dict) - 1: tmp_array = [lambda_values[ind - 1], lambda_values[ind]] else: tmp_array = [ @@ -438,7 +486,7 @@ def extract_u_nk_from_u_n( ) if ensemble == "npt": u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] += ( - beta * pressure * tmp_df["volume"] + beta * pressure * tmp_df["volume"] * energy_from_units(units) ) u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -724,7 +772,7 @@ def extract_u_nk( ) if ensemble == "npt": u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] += ( - beta * pressure * tmp_df2["volume"] + beta * pressure * tmp_df2["volume"] * energy_from_units(units) ) if column_lambda2 is None: @@ -1146,7 +1194,7 @@ def extract_H( data["u_n"] = beta * data["U"] del data["U"] if ensemble == "npt": - data["u_n"] += beta * pressure * data["volume"] + data["u_n"] += beta * pressure * data["volume"] * energy_from_units(units) del data["volume"] df_H = pd.concat([df_H, data], axis=0, sort=False) From 2d5fe486ca0881c5a5ff15b8dac56210a26c50c4 Mon Sep 17 00:00:00 2001 From: jac16 Date: Fri, 30 Aug 2024 13:28:35 -0400 Subject: [PATCH 29/59] Fixed kT multiplication issue in dHdl --- src/alchemlyb/parsing/lammps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 2f103913..349d3965 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -874,7 +874,7 @@ def extract_dHdl_from_u_n( dHdl = pd.concat([dHdl, data], axis=0, sort=False) dHdl.set_index(["time", "fep-lambda"], inplace=True) - dHdl.mul({"fep": beta}) + dHdl = dHdl.mul({"fep": beta}) return dHdl @@ -1050,10 +1050,10 @@ def extract_dHdl( if column_lambda2 is None: dHdl.set_index(["time", "fep-lambda"], inplace=True) - dHdl.mul({"fep": beta}) + dHdl = dHdl.mul({"fep": beta}) else: dHdl.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) - dHdl.mul({"coul": beta, "vdw": beta}) + dHdl = dHdl.mul({"coul": beta, "vdw": beta}) return dHdl @@ -1202,6 +1202,6 @@ def extract_H( df_H.set_index(["time", "fep-lambda"], inplace=True) else: df_H.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) - df_H.mul({"U": beta}) + df_H = df_H.mul({"U": beta}) return df_H From bd2c23a399b5dec2aa96eb8d20cbe51da53904f9 Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 3 Sep 2024 09:18:40 -0400 Subject: [PATCH 30/59] Resolve deprecation warnings --- src/alchemlyb/parsing/lammps.py | 35 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 349d3965..a888a219 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -699,23 +699,27 @@ def extract_u_nk( lr = tmp_df2.shape[0] if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: - u_nk = pd.concat( + tmp_df3 = pd.concat( [ - u_nk, - pd.concat( - [ - tmp_df2[columns_a], - pd.DataFrame( - np.zeros((lr, lc)), - columns=columns_b, - ), - ], - axis=1, + tmp_df2[columns_a], + pd.DataFrame( + np.zeros((lr, lc)), + columns=columns_b, ), ], - axis=0, - sort=False, + axis=1, ) + if len(u_nk) != 0: + u_nk = pd.concat( + [ + u_nk, + tmp_df3, + ], + axis=0, + sort=False, + ) + else: + u_nk = tmp_df3 column_list = [ ii @@ -1046,7 +1050,10 @@ def extract_dHdl( ], inplace=True, ) - dHdl = pd.concat([dHdl, data], axis=0, sort=False) + if len(dHdl) != 0: + dHdl = pd.concat([dHdl, data], axis=0, sort=False) + else: + dHdl = data if column_lambda2 is None: dHdl.set_index(["time", "fep-lambda"], inplace=True) From e1d6e87bd1985ae4366783b28d59d78d5bb5894f Mon Sep 17 00:00:00 2001 From: jac16 Date: Fri, 13 Sep 2024 09:26:09 -0400 Subject: [PATCH 31/59] Resolve pandas depreication warning --- src/alchemlyb/parsing/lammps.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index a888a219..cce131be 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -453,23 +453,20 @@ def extract_u_nk_from_u_n( lr = tmp_df.shape[0] for lambda12 in lambda_values: if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: - u_nk = pd.concat( + tmp_u_nk = pd.concat( [ - u_nk, - pd.concat( - [ - tmp_df[["time", "fep-lambda"]], - pd.DataFrame( - np.zeros((lr, lc)), - columns=lambda_values, - ), - ], - axis=1, + tmp_df[["time", "fep-lambda"]], + pd.DataFrame( + np.zeros((lr, lc)), + columns=lambda_values, ), ], - axis=0, - sort=False, + axis=1, ) + if u_nk.shape[0] == 0: + u_nk = tmp_u_nk + else: + u_nk = pd.concat( [u_nk, tmp_u_nk], axis=0, sort=False) if u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0: raise ValueError( From 9eb22a12bb49e87abf11dc9e71f95096b98af65f Mon Sep 17 00:00:00 2001 From: Jennifer A Clark Date: Sat, 14 Sep 2024 15:16:06 -0400 Subject: [PATCH 32/59] Moving average (#381) * Add moving_average function for visualization and convergence testing * Update versionadded * Run Black * Bug fix bar_.py states * Update Changelog * Update the docs * Add tests * Formatting to align with Black * Update tests * Refactor moving_average to align with forward_backward_convergence function * Update tests * Update test_convergence and lambda tests in convergence.moving_average * Adjust convergence.py and tests for codecoverage * black * Update moving_average to block_average for more accurate descriptive name * Address reviewer comments * Update test to align with changed handling of dfs of different length in block_average * Remove incorrect popagation of error in BAR * Add tests and error catch for ill constructed BAR input, u_nk * black * Updated version comments --------- Co-authored-by: Oliver Beckstein --- CHANGES | 17 ++- docs/convergence.rst | 25 ++++ .../alchemlyb.convergence.convergence.rst | 2 + docs/images/dF_t_block_average.png | Bin 0 -> 44332 bytes docs/visualisation.rst | 1 + ...emlyb.visualisation.plot_block_average.rst | 6 + src/alchemlyb/convergence/__init__.py | 2 +- src/alchemlyb/convergence/convergence.py | 139 ++++++++++++++++-- src/alchemlyb/estimators/bar_.py | 34 ++++- src/alchemlyb/estimators/mbar_.py | 21 ++- src/alchemlyb/tests/test_convergence.py | 132 ++++++++++++++++- src/alchemlyb/tests/test_visualisation.py | 45 +++++- src/alchemlyb/visualisation/__init__.py | 2 +- src/alchemlyb/visualisation/convergence.py | 122 ++++++++++++++- src/alchemlyb/visualisation/dF_state.py | 3 +- 15 files changed, 519 insertions(+), 32 deletions(-) create mode 100644 docs/images/dF_t_block_average.png create mode 100644 docs/visualisation/alchemlyb.visualisation.plot_block_average.rst diff --git a/CHANGES b/CHANGES index 914085c5..043b3cae 100644 --- a/CHANGES +++ b/CHANGES @@ -12,16 +12,27 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) * release numbers follow "Semantic Versioning" https://semver.org ------------------------------------------------------------------------------- + ??/??/2024 jaclark5 - * 2.4.0 + * 2.4.1 Enhancements - Add support for LAMMPS FEP files (Issue #349, PR #348) +09/17/2024 jaclark5, orbeckst + + * 2.4.0 + +Enhancements + - Addition of `block_average` function in both `convergence` and + `visualization` (Issue #380, PR #381) + - add CITATION.cff file with all authors from AUTHORS (issue #394, PR #395) + +Changes + - modernize build system: replaced setup.py,cfg with pyproject.toml (#385) -08/07/2024 xiki-tempula +08/24/2024 xiki-tempula * 2.3.2 diff --git a/docs/convergence.rst b/docs/convergence.rst index c71e65ee..a4a96056 100644 --- a/docs/convergence.rst +++ b/docs/convergence.rst @@ -75,6 +75,31 @@ is, where 0 fully-unequilibrated and 1.0 is fully-equilibrated. :: >>> value = A_c(dhdl_list, tol=2) 0.7085 +Block Average +-------------- +If one obtains suspicious results from the forward / backward convergence plot, +it may be useful to view the block averages of the change in free energy using +:func:`~alchemlyb.convergence.block_average` and +:func:`~alchemlyb.visualisation.plot_block_average` over the course of each +step in lambda individually, the following example is for :math:`\lambda` = 0 + + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.visualisation import plot_block_average + >>> from alchemlyb.convergence import block_average + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> df = block_average(data_list, 'mbar') + >>> ax = plot_block_average(df) + >>> ax.figure.savefig('dF_t_block_average.png') + +Will give a plot looks like this + +.. figure:: images/dF_t_block_average.png + + A plot of the free energy divided into block averages showing consistency/inconsistency + across the trajectory. Convergence functions --------------------- diff --git a/docs/convergence/alchemlyb.convergence.convergence.rst b/docs/convergence/alchemlyb.convergence.convergence.rst index e5512074..c40e3ad3 100644 --- a/docs/convergence/alchemlyb.convergence.convergence.rst +++ b/docs/convergence/alchemlyb.convergence.convergence.rst @@ -15,3 +15,5 @@ All convergence functions are located in this submodule but for convenience they .. autofunction:: alchemlyb.convergence.fwdrev_cumavg_Rc .. autofunction:: alchemlyb.convergence.A_c + +.. autofunction:: alchemlyb.convergence.block_average diff --git a/docs/images/dF_t_block_average.png b/docs/images/dF_t_block_average.png new file mode 100644 index 0000000000000000000000000000000000000000..039f4a7af3621e8792d2bb032fc5354e87304069 GIT binary patch literal 44332 zcmdSBbySsKw>G?KR8qP@6i~X{beE(^N-N#n-Q6G|f`F7XNVl}KlyrA@!@IV>=Q-z$ z^PcgJ?~iYM<9qiQ$gTUn?-g^cYhLr3a~b+pUK0H=@nZ-Cg8oKI>>UJxKna1stD_== ze@S)nvjM-j9mLfgl&p;$oIltZLgYR;*jQLQSeSmKbTYKFH?_86XX0XFXP`83aImrG zVP>}cj~ke*?Tneb*VUE4OCH%sy|;%zFh9V4;PM6YOd*h>ls96+$}VZUbIvYW<0*oN z2e_t2W9|nP^|NJ>jL`Zw>LFCPI8PB8d>MS-{#;Lrq?(E`j2C*Wm-?YikJ5*Vkoh^> z=p4!GPnLLv&V?5|zx!twC~!<2^702~O&5kvHp@5q>(q?9jyIbx(oUM$gs~8@uwkD! zLi@XLe=lP%^dKT)uut5F|F|G_fqkUd(1iZ)yO1`<^nvwySN|)8%PPTgwIn$LJp31S z@@M8;2>E5A>a5qSfhmAcTu8n?pZr+O1 zQsc32eSi=xfYx|dU!K9cy}uu%Q5+;o<1b4iDlQ&a8O%ZJezE_>1%1?WZ>~YjVYib& zmv2R%x%u*AUSo60GK~Xl`k^_K2n`ExQRJ!h8S*G;d`7n#RmingIA7mNu}iqSoD4uS z$IWQTTaJIxU)~)Qi)mUt=itC?Zf<^^QFE3S9gX?(=g(Ze3x^?I5(KQjFZ?!%P}+o; zkXGd(PlLeW)q03UiMqTIMGk*`?K89QkU?p1bG`@haX)Ax7~(rY({bP9yd8SD#l+n| zhA(n?xVzZkP%3vjGYg->c5a6RfEnyT-}CBbyX^Ueq*{t#cMMlY%0lS#``O|l+}EOx zvC>tBvK#Hipo2HO>vfmwpRM1jtB1tIVBIcZdbbMRA7Lu1sjaL;3wDjG>F(_=jgZ=4 zqJniF`?;VHo4Mim3~9fXr1pfI(mIy)5)$FP&d65%aA>99Vk?OWe^wQT;V$20Ecn`T z8nH?va^N#Ge45sMIh2+%z88y(4E>9|@kyE=B_%gE5nBq{!-?R7{?YAJrLirTIBFUi zWV0$_Q&ZF0i+T5vi3ytNZeja>4QYT>fYQrjhfK(6eyn?S74Gu^3-6HEO;q2!goMQ5 zgr+qd#H#6RB2>_jjp+XVq9wbkN@eWfX5m2q_c7A+^z>mbZN?wIgZsN1MO|IY6xkq( z+eMVD4X1Zy-W{IwNwWk zA|)f!K3!#=Il6V&MPeOtEh#Cf-yVbx(L0(oSQ#qTtigEs@+HpR`1tr`8>W{}c{!_= zO&b!I?K0x^%?&FDN5GK44H{TNE{AO>d$+HeFfV*y0Q5%B1Pg4GbHRgc$I2buG7LI; z&U@`rg!{|Q+wQ|2eIcR6+aY}`kCgGt-R@dV>n1!D4AR!~y_uuRjJy8jx@f_BLJE&V zI5&59Y*JDNH!lMN#Dk`b^zoDXedm_;;bE+-tgOQ+;|w{2__$t!+S}v4BQT44N6Vc~ zu9gdT`_7g|^=_vllC~RPoHac650~0QLPJxIXEkfAI&uBm@o?F#sx)O}Xtb^kSq}HpANh;mjxY=W`gE!Iy#m%9zRADE>ovLf(Q!>*Pc(C(_YKvj+oHE zKQZqs>zV<*pi3nyC@GOpP{4#NW`#3Va5K5%UEORKmQtenjhCX3yR@#`5@ekf&l+2SZOxn$31kthw6@k(pUt`> z{quu`==%El(DUd2D9Gw$wy{W(xlK6~=+bHM-Fw)oZa%_lAH<|Nd6z_i7l(U{5 zS6>3(nF-ZIPb?)pqhUS2|5o9zBLsbz5~m;xEHJ6ngt^F{v0d(SkMExD9eoh5oY zQ?W*o?36Iy@-zk*uxjI`%Hze?G>0csd@l|hqGY#AmH#gL0VkqA4n^aa$zfM)MZH^& zw9M_q4>uRh7yNXI%QuILii$Yig5LLy0|%Vr_8jd0c&FJAZ%CO-%9ic>M{hOwD)nW; z<%r54dcABbS6Xk}-FXFW4@DuZ0QQ3K@4Y_jc1Kg7KuCEUP&Wp@;CwOm2vvFio~Zf$ zdM8wnRuKALLxWgEL<9l>5MIUIT#%lf@{dnmHR@M7Xg)eV6_zqBFVEQI+u7q~a(7vY zgtEMT9kjTpr)59DiHeF^c2)JU^Ra=kF+y1yQg45M$FD?sI7n&B{dLE#q?eb#CnjyP zBToP>%{Oxg@DLrBB_s!DXU)@s{QTC2gJ$!|$(_i5**|4fS#O+pbY(p~8jiY1;0XQEzR1(MS@C%u z41VD~N@}?kA2S*d#pH)=l8Fgbi$7&>dGcT2^=2=Q){=CWf6Z3dKWfS0o8-dc)QF=c7e`%&hEe1Tr$Rn7*}30IAj|T3cINE_Y{>M@r**ppRc-CIO>bOXxb>-P&&51Wr$gcux_hP#vreuNBY#N({ z4?af-q=jo5Ja*0bUAyFQ<~FJ2v?fpPb@|~I28lUII$?kh{G4jHm1*Rmfw3|BH!r(N zWuOpm59Qlg&IQigX~!xHwf+32CNkz0i`m|XVr$R7-fUZ4EkALXeW-4WaS^qgsQja+ zT&}nBj{fLJXI50$eN<{tE$oz}@C-QHnanOOMvsk+&H5S4!J7t01)4d=g7a<7NVh+y zo7Hr5FJ5nCS_y8GVghz9S|JM>-ud!{4eZbj|k2=%~-yr z!~N6ZEtr~`Iy^P?2&|;BF=Z{+wZz(o`?1diti5kEVSSQ}CKuPyNf&=eNWit2tH*;Jf<6C-uN070S=k6r6q&!xLuOr&4cMA= zqQCGuf3&f|LPA11;hAi0g~Y|j$Hv6?6qYs<1KdchXlc{)(a8(e$;+6$cmboI$H-Q^ z2=EZ&p>%O!VK~60u&}Uv<{F%-p-|_+jlRLbt|&6zGO!s>&(1#VOcbkE8llT)2t-Ci zpcQuiP^-xqLBar=zOTp|as(V$z7PElN?8Jo>MW;aidUTwY&)Z1Y2c_^kCM zt?ZPxbWId%9xaD6Pp{qD)sfPx772+5?zM_AB_Y@ev7kC#rT?9c^nU7dp9ma8IE~_o zk_~zW1~ChZqPf90urF5!W}T2%GlR*0*0kJNKBu9{YH6Y1b>8ooZ*ng;9wLXdeqz)d z9vcgo)b;jUP6k7PbVaW3?d{PqFa(sA9(2SNFWjy})vL_#*x1;HCntmRGQ5Zb0|Q<5 z>o%Tyd;Zgb9OVgerG=H_;oO>u|H$jAtL@OO?(x%Y#YmdjFLToV(_WX}}VtKoV8kIVD$ z0!tC!(vRg#Ad)u2{p&VDLZ1H<3H*O^k?n8}fvxGr5fD!HFn3r+^C2T5@)+}pz59pK z0{R{F561qF7oUeiL;7v>>A;v){_%VNt=~N|=olK+U-&T4Fr)b6*wRD?po zMzpwLeLFfkBHFcb>ed|{3mX>#fbhoFeP^Gz%H`@JtrfPyG!G#r6g1?oKTx?mAawMs zXY|}pcD)E{m)w6xrNElT0Zn77sZpn29iAasH6ip2;#|b{`8JAomHz&?7@6cdqPSuY zfap-24Om2B&mYPlw#z&yQcc8g|>y01N)bLZ|)p>DnW~a134yrf%?3&NP%U`2 z$DI{Ib*%R)aqFO6v`e|6GINq7`ZHI?w7?2z_Sr!vB%sGXFd*fg3AKxdqEb=AfX`~# zBlEw$+G6RW5O=|Jw5#;|7HP_uZUIL+%t9stV3WDBJb>tqZFHsLseWu*3LPWzFKM~x zBnI~XatqhVSc7#VpC=ZZ&LeGy^s{razul%?JJk;9`SKzUzHkB}iAh8e9R)v=0mUa` zP&byXqE8}$<|n0bGBu>~oxni~C*Z+UV}1ec@?pVNhRqz!*U}+Ey$E`GPOn}AFtT>h zW%P}B!wqS6`TtxRxQ`dBd42my`!*2a2#J4?c|clq7vQhm$e;qSo5*28N`MG!rI+-J zn6zhBz(h{XtT#N*V8F6oix}P`^P>iSEl261ZoQ%PIMF9V@Nim2XvB2Xel4ffx6~(` z$$pnB6ijm^*u6b6q}+5~MMsspmAfMP-?dZFYLDC|s;B`M(L(4$DGVdfG5=F-t~ zz`eWoAFVAyGFEFfZ5>l~?39j7wYf%iSkW zBqR_f=jp6Sh!LHd2ge%0yhN$0AvE3*A9frt{@uITm9yj~IZ zokU;^hAa?CjF1_8X7=34FZ6!H^6pZpgl_ z7S%G+6R0;~^z=eYjQ*`8t-K1fePQeiVj-x9!Zhm;F06OTrmVwrFRV1#!v{91zZpZ4 ztgvdTYTziEp%3;r019oHyH`^%b#)h`8hm*V;EwTAKE3N(>*{u%N4Wi$ghY%pQu_?H zbGOT;)=vVSQ?gD8Y|I}rf7;-2Qg0N5MG))jWmAuXG!GyZC%FPNw3vT9f~Q>1H4k|w92*aEDvP7IK%Tha%{!cDro3TP;i7Pd`fB( zKnJVA^w&mJ#5HbnyHU)^W4d|l=|khSmXJ8W(z?mr(O`j1C=RX7R!-8#m99uSCZ_6X z-NNx_Y-~?41#emKnRP6xhGMBD%S=aUVDutSA){-3;Ir+h9CYbVxqg(#OED)-p2r@Z z%Ck-%d|kBjn;3LWiT)_7Ky*Eg9u5HPuA;lq>@6hxTaa<-^rtV8wV8perka#g7#S@Ce#8h54{vk2saSZY^zL1mrGc?jgIWpzZcd_;{%1}p4*L!`Pr6(Tci1$$N-7?BYUCkk6bo_KA7?Os>W zdqW=_Y}8WpzLa1gM7dy(oypy92eJx;7ZCpYNay_2cAg9T59VubYAWpD@FSm*hldz2 zvDeY2rB;%?NjY?a>Z}_FG6u|NRYFSswFBe@&Ux|mklJ0%#fSUt%L6$u$!|n{ z%6bzZ>VJuzqGMzm--c&Udsz!#gCSH=Q2`TNK@h&&EqKpr)v)(EAt6E5dlA>b$kY$0 zQYN1n*chMIC9PB1TXr=zGsp5LTMJ;Ke|BAuxT;^C5wiPUyVs$pkBk8WsFc8PxV3NZ zmeyNl^4RN8%io#uWfa=(KsqKT37+6Hv>vbZ9RU%PJe+_T#+^4jnv;_W!>6LEt6zt| zkL_Kxq+tGH9=V<;hX?^i&Q35P_zKd8 z?^HiBF!AB!bS`t;%6uXk+Ay2tfw0n|!vBbO-tT`0!U%yI*|g@3Q3b)y?eW45UXEwM zyk25y)e4y$MlEsgTo7&UXl!SCSd>nl0vrx<-JXN$qzj24T9;Fqi%|BplTeop-?QI3<-9(`J=o5JiYP*i@N*>Qyl~qHNlI4 zKnpdE9Zvg%~eef1tx6JZ{(3wi-F$HCO}SxR4r)Dz@}a{m~5uT1iim! z<~G-IGSu;L`K#v~XAXtTAX63>7yoH=Wl#P58K|le07^a#rSsP=htWd_pI}Z*PKHOl zuXot$SWrmk3$2-Vd(zw6n_pav1S!&KB8CMb%-&Zrx7%thB5rOx^R63YMOt<5DkjA_ z2sWgBtSSxV2OD>6ftSzK;LBAIgvU34ducjI0;of514u&*Pa31KbR{Mdw-~VLbZU7*%BdMajoLc=^=w78m=l@e zF~NNLt|AZ)8WklQ)_&}4pDovu4#vb8)vbOn9K95l1`cIZi7uk8!=Z{VL0a!hda?Nd zW%iJN5<>t2O$RzhHZ@AeXelvznM2)47yFc<8lV_LmQ@9NtG4>K9ZyubthK;9_Ot5k zw$0WjJthe~YCl%fYl7_1YFC$pZ)tScj4Cdqxp|N%y6$MKab2cle&LaW6PKYBc+4ti z^K@*8xna1;57P$_M@@P@(>mlIdSA|(2cP0&Xn9xGS{iiJQizp0|B3+vo%L&D@q{c# z0XA1xE_%}$&2I10L$?uO_(~>;Brlk@uny?OAm zjbnJR1M(wlu?ec#4&)Y(KYEss=kOJJ+% zOmKM5JMrWtj7Jqt;D2pWnf){XQbB2Jm)Jsi?EI>9zE9M6t@$c#dyy#_MQ5G15f2A! zVc}Is4;~&~7zIi&#P(>AF!Zh*BLw9)hS`T6T^$GVs zU%5&kLxj>OoWFS!K7M5MtocTpD1Gkx&z_J1jpdRr@E@~&pgwWFR}NucAhA=pP06fV z972~{JB2PUoJ~#fL|{a<-5Ph1u|kemriB@QVXAPs6RrwH7Q=1s%VSq{qD*8SZypv6 zvS+D4da$d;(~1H$UdUO^CO+{mJJ|Ua+MS)v)>uC0V_jP8bp4LaxCOfEb;&Mj-%(Cqmc`S|8_i8ADcljv;W_t?ekeVtEjT#5IcN@Sj)rZ+}Ve`NbWmFC2PdrK!?laIN334m0$ zQzo(t1Uz!U(L_g*3fZ*g$4ltzvJsV?`$^<`IAoo#80(Kf7k9oXg0a3aw~&oG*@Me^ zt)S_J1emtn6S6rohtm+Kk#U|Ak10pgQt3#h-kbP*xjZ$!hhD@@bgTNlbijS^Q>Wqe zSdy8B4B6lwvEG5+>cbvKFsxe2>b_4^d@klbkMhn=OOP$Tv+oy=`|Vqor~=o}ke-wA z6vmb?ps$xkZzQUNto!P+galr`DH?v2A&s-UGEZ~wrwu4Iolsh8P5v|ZfxY|JQItTO z^>%AQlQSwf=eNDWYXt*bQyC8y5;WyeWn!nKo??HUp}CTcTjup1VYQ;y*#jV+qy(P) zUo55ObiUM=gAEI&7^Jg)k6W{f?Uk~5EvPAp;Y zCnjEkx)=Z)g;y11T5(izd{VqT1cO4%=Bx2HN}x)c9UQlZsCv9lU(M_*g$x3mDe|SF z;7GNU=#$^`>knu9eO=(FZW)`Z7IjC9lR4==LLw|CMn2$irD8IS>UYb3Apyf00z(Eq zllq=>#qHSC;gugpTr%%01S{Tka4OWhadLISf}T}4#5Oj_(dwES1U!)RZL;sJBJ=!C z8D#;9fb_wvZxde&QkySzR`v2PsWfYSGZ53;S`g2n;=WkerjJ{!YyDeZiO4Zth8GKjb?kLo59_ zejsKxe){{s-6{HIR-P`Snaug&zeZwTLZfy1M}a@6gK$W$6G{U1f{`ip*elx)xuxMB zM@I81Y;ggao3rV9<)}uwir0Grll5o z5&o`^nvbz}uW}bwfTcmWq9twUNq34o>Mc{(@6*3(MH|(_`9chna>nAlhI0n;LV^UZ zDS#%iN_LJcv4*KQbA=%<&Oxfg;qER6F30-E-BB5$`bxY!yf-XtY++?(LIJ@@A4Yz1 z6xFkkl$S4!W<>PJpu)dZrJwgm$DKrp)G%H_J*eg@MPeko&#J0?p@JO}PAcSa>VqRG z0V+uNPo^*_a>!X@{pE=xf9w+bkn$-9v^3r!oqHT}O8n@G;Gd`%jZk1TGAWGVP-=pn?K|K;hTBPkE*{+c{*cUJ?r^Bg*KGgh1;?u< z97##g66XozIaMyj<2vVWjz44|$Gyh1hcn&tBCg`ph;VOTt@RAaw8eEk!6L&88;*hG zk2i1b6A39O2>zlcw%vBX`}K?Ds3+Tc(K)&$a-c>l3WcJU9DAv4)LZl!rn1o|Hz6n?WMWYdC$=x8amfgrl(Ecd7rqYbx!_6UkKy}tsg8d6)-^$yR`^I%p4b?0i=H!% zYGe^C*_PEcJA~!zM&ql)MrdXRuUa(9K?_`7ix)Ptzy+k)b1jSri2@Rn`qV#T-kJa0 z`v}o#@0OWCCpjqYcl{McXwP24WRS4WmVb|AfuzRAQXI)u0RuL@v)?rND*9RHIGwDU zMrJy3Ax_F4MpVbK{IH7+d9h=!jtgt^;V$XWfr(Go`4Nr>$}Le*hx^?r%_mp&_fkF3 z(9ni%w8ZwsN|Q3P7!$P-5=A7SW3^;&qFCDZ=`d@NAnRv z<*Pv(+0~>SGa5Ebd8EB=&$$H5Wb-?u2Zso)RZW*RvyrKPnL7pT@FaNrPIJxi!{zqQ zb7U$pok4(z4wsZ;I-lxBs2qi8%y|}kv%957W`769#wyGHoLN<#F`xhSnumvw;hG# z1t^n9{1g7OI;Z>vYWHAQrrwcjsHB`&Nvj@&xIb>tMd`fI7UPK!Ps8LCAm;%vr6r>%r-Vglv%+k(dZ$q>`xh@Gux}k;YRAY?Ii}=BIKt#6jGVQd4 zL?V|1*a5}g^^I6llg^nUP1f5d32M(UD0vlT6_`+t@rhcb!+oJT$P$rj=Gf^7@O<9kSjK^b zh{!kb7C#m4;e1cqLz{ByAmP>D--$&SUuPOVcjmPfOBbatf(M=#4XRIQ#yE zfz+xnD31t2jH2Z6 z=y{WUWFccejfKx}&-bbjUDDJ3Yb13@5y@zZm*IELQ;8R)(1HSUCIre^@?$p5yQEc}V8+?N9|8S-UOup-emak1s1dzjtLFS{vh27Te@cr(oEDfz6R@fyowO!cl zly=;JZQ`9R4{Vr*@*f^8eR(U{C$%l{&Z0EGxQ-w4LIp5(_ITVGl)Bkd@3TYSDKc+^ zciwfgi{s9My;8hpDUXW3Qw0$H=8IW7E)*!$xmt`lLwOtsZ{1vt1RM79g8*N|hR&tU za>m<1hjh8Y#^nz@ zN_=STb(ZucT^)IN?x)1xoY+2)uE!gn50ZIMiAn>Qcobe)^SMxisMLuDW6e>CAd7== zHQx+XTt$?QExw`5Zp0P8z)>IY#aFsyS?wraIEW_~56P12xO;mB-Z-7sP}#X^H<|r# z3{_IHKNocav!b3n;#-NAyPWTd@G6ZiOuQ}bM4ptW&cCPZg8_}RoR(13e@nkGbI1%5 zCuz&2UqO8O6PUz;j!UX*SV3sgEH9!TZg@ukN^YnuT~P6Fe*fC}$?{EQIS%g(O=LE~ z6%vOvcX0ObNfu7@d91xrB?a+#@}xo=PhZ@wYMS+hq;d31S~FK|!sV>%SkjFqQ*2Ws zRVJQjs^WJJRPR;IZd12Ko@Ig@G)T5pkYJ`B|n>!74i5sun{slx0y`J z8A+8@4IsI+1!0-NjvGWo)c4g;GM*;W%8l5cE@Ad6TkxlEjbQU;nt28H=rSEsRx0Hm^wLIg&f$FZIXM zEDTcH>$@qRO-&v>HEdRXbqWZm&2osAe>;54_wAF*m5hbXB=;vHlY)-EoS;r~z22FW z6ZctOVG(AaymkFH@b3mR_E(AQNJtSM*RqUAc@kMha>e!S_h+>t)|(p1!~Rb}|+~#rC5Yj{3PIE3+F31wSOv z?*^UhsZSbXNm~{4me}8)8VgaV_*mG*1#8ugc@Qe0BZI{N&7kc(Z&S#&{~cnY<@OOX zRRcjh#RO2x*Ejb1ba1W7@^~`qbm)o1J zHzgm$3Wen2x7cauov#=5wdB)ny+3N3WQ#r%lEtrhG+f&I%9KWuKJ~fX zFRx)`VexY`KRMbgwU(53ilc5ukJU7>x~R$bN*zGsViKl7VAA>%5cElCvv_u3z3Xj7 zZsTjTX~m<`p+KTNd^jnIcbKboi(-nqVw$2dSoJr`{azd5+n@c#Dh>Z}Opdvb4!xM{ zW^UA!mvc^}RNT@^nm(HgIv`x8O1(jY1CCHPgTcS0g}Bu6f&2DuEb0bW&sl>o&$&%R?Tbk=X4Qq)yDKcA7mJb1K}=_fgfX!ke=lD?RE3U@f4jG-uMkVLnySAZ!ujyzhlS(j?F%DA~$lphSn3}m%0mo?1vtoTIz zDQywvrsXdY@FBpru_2X`nCzUkhrG|_L#F(QkphPuFy%>v#?T{@o>Qn6Sj_~qHYXUB z5}g1w8UqP!kTlMj8*Cz;faH)V!^*y)O)P$LG9`6}(dR=6iOGw-_5Z{m_1i8^0l-Z{2~~UQf%JBK{jeTNKaAINE(W3 zr=wT<)SN{01b^Vx#R}~V0~wA>iTs7x6HFozt+Cd$!5WrqZCoETbI-TqSU-P)+t;#A zHg^@XDhlWam-yT>{8ux{U7hN=`B!aEgDa+UD`LPA0bAs^%|({d?=@c3m-oHx=~4CW%w zJ5SqmVQ|}n4;IH3D!fguEZ+?_F-Js4@)cuV|Int8jAU!a%Dph&_7Doyi*(^4+Coz_ z;f1fYLY1;KLRh&`M*awrL>{%+=6$8=9)IFQ5n?9qkBQP49jC9^!aJOtoIsxvBPB2ilS>6HZ2MtJThv1Jw;c5 zAt|pxd-15ww;4`1dq!5x&;d%MayZ?(bb6J zC^zUZ`y!7Nv^oUo88bb@*1*jNrhezBVy@P+zK0Y~`DG1Ccda zB=AxS<9~?hQZpgzH5ct#@3biVZmTvwCukP}A@|6Bc2>SbrZj0uw$6}v<8l+uL=q4G zuH9b}Nc15S@=C8RfU5fK+PE6=Xuxp$wEJJaKF#(S}`+-4N7khhHQqk7qEel zg0)!M#_#NjGNXHo31ZWv8i90RP0pQR9A`(*O*RK!?@7tqI z{u;#+ptKxT?mcqC>$K}5OH+ACOhlA&cG+_S)R)kS<5zfY-)PUdtc5tJ?YM1v!`uvV z)tRzW&GhLY&%4iUEvr%3_V2KCS&*;HW1lXV*BnqK!MPea3IlR5JfBkzEKqn{|2Dix zN~S_HuaRW{`k88HPc6Hga~QjwnwN-FH7k2YVXSLSt(tpP&Di!0V_bZeZLfHE-mWwF zGZv^rm7P_U_vDWrJ#yJC@3vX)c$CIv3!kLp_7ZlQV)^4vqNkaena#M0+K0J%M=r;0 zTynQfs#)g+&p%b>N%^{<h!eRyD_z?}?W}pe zpcxMs2?8FDO~QTld-gIPE&wsi+6!Mz;*Y0?f#5nlrALArQ(L;9C_GlN6Ixr_{irh{ z7GZu{zSH^DNon0Wv6kbwN>6{kn2`~U*UikrGfGMbD2T0^eh4Jc5qkITX}a6?OHd^a zJLJ&XayQemq~LuWpP%thaqMQe3D4PpFX6;>$aF;=>(riD5XN5tYp^pBe~59&ybEMw zvZ^z&DR9+}vRdQSKi{Dql89@FM9;M(iBM5G`?}zN%Y1>#@A-;EGNyrBjl}4v;e+Ym zSS5vz?Uix*)R2UEo}h7$R`S(-bVst+#6v0=NzCOXY8sI821`JR1P_n9qUwat<>l2K z{N&ErpE`S5WE2#|w{P|SDdPChj;uJ7sHOudKIHW*`f(xXXlSsrQNWh9*V~1*+hYZ; zk&%s}?;~rEU6n5>PrMpf6HC{+hEF@lfup0EXbch+M+}lwA01cv;c%HHs4Du7;|9rax6gSvY-=c!u@19^+)z&pn(cP zYj4fWFs`JUj>Fc5qqac}s$gi8{EoTN46oY}>@Op%WX)9{I@6kmz=17nEG$T(bYR1N z{gVdg0|_gulJRCxClL}AgO5g1Fc~q9Q7>#AR z*LirL^~mqxR;fUP^z?eVyRcW8qzl_K$?z7^)Z^w}udLwN!EtXbzmKV3rv#$@G?!mm zPWg+sKTwpZpp;BzUT1!MUqr2D=A4JBE?Y1$mxg0MGHAL&OX}(&DMXJ#B2XpVC7FT5 zd{omy(H7!_c4WI$d$AvBe=YF-{d-UWEC$X4Iqgjc&R8{|gHw5xL;*eh>wJOU`wV2( zx}YRjwOE7m=r)zp9Gu@GSWGcaZv#aTmS<^u<9jBB)kXFq^X-j?Uo|*`zH!Z89V&ll zqgIa@^Z(^Gnw}9>|JESS(Qcw|pvv(&^f4lQSC(&-jznzyL@2x`?f=O2gi_T6n&Go&C)l(*2dXo#N=NlD%tc zO14rE;8#Ml4{nK_?+obWGYYzNo%aZeok@r2DjjTjs`=doMMO6~O||_~Is$<(=r)t; zdf)v$Z9t&wF_L91D~qO~(Lu3pX=(ZPRU#@TX3ZuhNplk*ECH8RS_FJ!E zC&6RcY<)Xr&VZ0<5Sq_@oXgY5>9!;Z2YKb1AI}@`U@6K-T^5m`_)gZ^#;=3D)Y(^D zmalIz6(gyr3ucxU)QW#oQx}kI&@iqsls;+C>US!1eAaW8s4On~m^i|S{f2dLvfHO9 zfeBY7)$29=j%eBQLOfzl@7G$s^0bxtOYLiFqxOG(uKgHO_qdMkE_+yf4-N|xtBh~3 za&f?W3v&J*UAF1%3@^7D5NYsI5XSmCuF-<<9#sCf-0fLsRVxVI;r1ml2S-Pr6<7&l z!HRYQpIAmK{ZnjSdyhmrr3)fr2e(mXq7>P()WJl)@D_>SMkF#WZ6gi0@iF|6+-gX4 z=?Y7h%HkJ(?yPFLg;$O@Pu9+@&C!`P+2DV{L;1w^+~#>^aDs*B`62BQxaOc5TbQuMCNecPT3&IS;FB|CixukP_hQ~ zmFBJ-eu>|iWa_xCI!EV#EdD>MaVLY=OlmtXfm42+{+I$Gya$aql2N2>uv3EPGu8@e zC(xEFbg--gLBT&b0@`c}wqdIKTW+TU0=;aZy)ya>-Y<=m3$)kQf8P?JmC<2=-N>$=L=@vZKLb6mx2RvrdVdD0a&8ia4*rz4XYGOGhCiVT z9{MNA|E}pT>JpwWEK&ue7x$~-2o-aD`dH-eZ5xM+6!;OP97U1R$Hj0*qHCssIai$C!FQ`HkGBF-HGTkE-NyzYjG7IRUAVeH^4;ohsNp(tCF5 zVJ4|MGQolp-5u_pgg*Td+VC+qUe~if#2s_lXF%#a1vxkoUp%-%iD%>P!DlvH3z?zM zylq4B9nEx5l>l8y#NxCkbYw2CLR}>G^ZeEih}{Q?20924`H`@rJ}O%rSqoA6y0Oil z?vGt&!tbDdw~~8y_qP!kiV!xg-SFv?HYu4hoqZqDp<5oxX9gjCWyU>Ba6QwUSC!3& zzazJ7Z^ejC-4K|kd-jya0z>V*=<$%h(VhX;waRg%h!7IJEjjuPyLyGLq&nfV!1 zPv8bn=$iaKsvIEY0jC#c_5@EwjFP|LvwOJKY)XX`AzJPhYTc}lc*?oVCNk4XoDn7h8@`Dyf|%5A`x$zsA$Abvwq?OQ9{oCX+NS~`J)%$Tfyo1O;+_Az8e7Q zTc;0wPuQo$;iMM6K1Lr9mU0@s7D-R#wybCt)>Tg4d-x^0FX6M097%1YK!b2h;GlOf z`3A=l&~}cR8)~MoKJT1~`C{xBRB*srK?d|F=*S38_m0euCl;a~E{6z!7Vo=T^xVSx zNbvBg+dIaLQ0zo?fz;qAb0y+zd6fsGMe$_}UhRkuj~7-04hp}r8@x499{nyl?FYsT ziqvx^GsuUwWj{wy=0a;`61&5#gm0?&$UTz`1UxUg&Z&lV8#9~!6fIJ{frWBhdWdbl z=kKjH&`HUCQ2l&b_NSvc2?Tvm?pIvL#%rk9so|I{Z(g2zozrT0j!R2o`u3Fcv!{s# zq7QPn@VZ95m;xS!W?>|8NlX!qnOIwS;VZLQs)(57RDt%}D;{qeB`^u>9R#Ic9vVmG zO7dzJ2*!<&Oe;rumh!3}p32tsvuRFWoeRzH#w3rgsaf%eZZTk>3&B#gJ4|;k_wTkU z^Gi&H-84w6`RysrJ@1V@Hn!mN;u& zNFLEfh7@gWt7J}C&@j?Ajb;kUNC*1ta@b}C03{JrS^VMh+a2&YZ`Wo2m55boFbP>b zpdf4SRJ%nJtiO4akO+PDlJ&7>MlDJ3xj-OEsV*jWyh=O?2}ykRFN;)u>XfUiUai!)ZD8tx;0+n-5Eg>>L} zUpkXcwctS)Tv!n>k5N@3sfM@Ykb(`cVtQw94QLq4yFVh>F4dVtL><`=(2KQY43|7 zv@|VNDM5B%m}`qb<&6p=s6JRK{V=@E&`u3JQ<(L0gXfM74sBAp6bKbcytdWP%(9B> zsR(5%W-cpN#frapWi4m>*bYTS2XGp|-1O5)`d{m+8e~C@rx;A=PLwBWYN4!m3{QDb zoE`~1U8b%*kK{`Q`5a1b#*ftRFGDRf(`Wp0Ct5=r;@$j77ZSu^xgFZ*es??-2~cx& zE=q&Lz?M{(m39iKOiGs()+$&P-C?)*fA2{Tf6A{~*p+6d^vYT|82F8m>#yIb5K~@i zk?*3mQ$PLzIPo+HP^@-=JkNLC$S*It8W$9li0{k?21@8Kn&5{Z8JjX2SfQbGs4++A zH+zZ^5X_<(HYV}oarjg;Qs{ZH@*W*=BKecJ2F2G~5>l>~bofTqqI)|$DGiO{Ai=7^ z)3H@WU|20$kl-~z#bIZW$ zq29fDz`>~fbZlO29(%=)pK6I}xp%tJ>DvKfxU|CZTW``;!R>F@13F(H2;Z%^i(77d zZ)eS2v9x;0S%vhqNwX0VD3G6(m_=5ep*ZJt;s+Not=X5K6NgOk?EuV{V(V_EGZ@*5yi8OLR`~S{2?CFARI8i&)uWQtygMO3 ze}y4LxoE%(@iLOfQ$mZTl0jo?elNJ1NsvTxSc{_yS;JsBJbK@GIb?RG5g{f+@bu|m zM$J%HA*fyWSu%&RYsd7=okgLEgovlZERTc!aiGr=yUxx>H^?eM?xDwr~_(;?5 zmcf5ku_*wi6L>JG5c~yO#*zr1`PTPXFM%qjG3KovH;$}E=lBUoEwWK(OvMnJgqAq6zHe)bh<%CDnh}Vvr5M z&&(ezC9Q>&8`$$M_XR14&PdDN#u-<9Q9BC?U$9N#3~OFVSRE*M$4i|XpZ0w2gV@)j zeZ@8T&_;JpD|F2tA}ogO0=daE6;G*`Gg2Q_ZD7sm`~1n_{ea} zPlxmCMzSD1y%gUfkL*JVIA}gG$bOHDp6IqF<2UP{y?rNiL(Z#Y)z;aC-MmcFxs z^bPe(UO&E3=V>SU^%O{W?ru9!3#zrk;sVgyuF| z%@7$Liy7i;c)sCTQ+=Mknwj;1@#@{8vlI-67e8AGcCUoS^Kx|9I(Hn1u;$}}gHDv3 z4cCE0M-lt*nW?ryDNaoZtHSs4Rh?`VMbB9~?gmCD2H3gjlW?lHUr?#{u1>sUY1UltYhz6M9hMHf_89{oS%m=JL;8xo zp+&QzA;GYZ&pRHFnw5PWZp#yT>Tua_@`>$Ylu>pl}elL<4@h!(P&_ZS6}fi!YzKW{k+YSM>4&o{bo^%f^^Z5-sh#O zd=8*CAkqW#Wep|1lpAEd-r%n?a(*lGp@U?)c30mUHM!6Gebm^#i|6>9^~Lyr0gZw9 zPxZN8$^h@pvd-*Hm*|s@2OGte9$v*lfqT^JQg)wH&Vob8rQTq1I_N}wM=QJM$r@Ke zzyC;`Dv+T>rc}{M*JIx(#jXDv)&SM$nfX=@pu- zG6(eT?+Fh~3AlpamHdMve+N~8Ig+Srrq=iv(A=A9odyAjd$7I_t*S&ZIeFU!ghS4wk_jS z06UNaN0YPLrEnC#!8L}-?xkypo}vOmHBMAAxw_SNbA}%M{4H2zuqPU`zHL_wq6;OmpPY{(KbFb8D7!> z)X?};_&vpym#yro($HzCW!9v^UuVSA#_ z*T%gb3Ed?2U0m)wqTj_Y<{VdpV1H$W&<7K67G9$qh^qZog{YF1>8;Z+pAy$!h1L7> zx>{(Lm1OR7nk%s?JiZOcK1kK!UXit^8FR14dQO26@6>aoo&P(1Li6O$qX!?J8Z&?vIeTtiAr~ z`c_ad&-=kylsKfz|F)}qRx=>~< z8dX{HrdllV4Ry6QYPVQD~a{Bx- zeyzWq&oaa5alL*;8$s}rECGAGKC|orQ$wU8&Ry-qkCWKH$)+vDwXR;NO?&=1XQ1zD z@Gpso4>r;9>u20#q2WPz z6{mtm3GA2=7$s}bM&gP6LA}0Tnbq$M z^`p7`RQ=4t$U*G|YMUqCq-0#95FN9sv}3UIDRznqty>jc^H&Nlp;OH)pLs!et;KCq zdn3S<=wNQd(z9;jFZkM;cUzum^{PD-hR z%7JsWO~I|#oA=`gGvZ^em7Jl>HB>YkyQDXri<(iikx)S-tv_EhtvT5DneCy|3n1LM zzVGSY1R0m4TRWn9FVD4`xBUWBuJOF5Q#F;_K{`3Io!0V0{NU`CXn3MMeQGMYpyzp? zghs+x73IVlJrhi#&`-AZ!fI zpcH}XPURspNz}8lI}ytdFYVxVPodnR)J8?3(Vo9&zlz!TO!!(n)#vLaUZG4AkIbV; zw^dCQciET>X3F;bc~ZMFwLN)D^o9K)_IoQOS^9i;#M65%-6!0tzN9r%oP&c@TH5y1L=p6JD9TQ^o-BYEiQ>?mKTq4@9vif%^CyL-+Ys__X}5- z`Let!`9VLTenD;ldJp2#%%VElI^~r|8W_QRZkBKyQ1KPJ3J9n3iJ^*>d;hl zD}#(p-xQr;Stn_bjqSm8Y~B?15C=ecMIPm;yqLZY*^(4Kax$hEtzQ_cEBhrQnu@LI zSsiOi${jtJw-N+t0hD-^8(*{_*8cutHYIJ(Yar-wKH0V#A8xFO7 zdMrUs>9R>ZzTn+cfn{hes~dbnv}{n`j?n_+FoHqw$?48W-&OxOJUuB?DaD}0mS+-X zlCsDsVurP6=2^LK86C^ouSXf+zG&z|JsvuyIUs3`6=L@oGTR6lZUfM=XezP&5~*qF z?Twpt1jk7&mrxL9-&#uOynphr&#VUp11K&#Y;z&q4p-JJa!>7Os$D3|H0f*$T5Xma z!J~O?c*5>G$cUL^zPcrtdkOPeD>uS{7@pn1Vb*nKYEwq^)7J<=$+84GO*6#ie3Ku7CC_&%cyGOGd+UyW=b8JG1es8q<~~K2}zO=kt>KkViS~tk4* z{rKo-R8yDmf-jVU2&n1P5>MbH%yl|QHme%|v`nLqICXsC@lQf z#aPdUfA94(2u|*R)#bSJGw>U`5J$o8loo+Bf-BBpJ>yD$I*LI7AYQzIhem|N3z3Z(gefAAUq1QsoWRTL_8rBDMts|H zH-|anDmi^=1TuhHdd1OaUgI$qx!ws)5vd^n5*yh~MNtzBl=(=21i;!FGRMadOhy9E zw)9y-98GF|u#~(b8NX$(J|}PD(G-4JAs(bvB!o$jnV^SXRMafA-5-w<__k=-Z0$7R z-bV;Ib87EuZsgu}Njf;Aj=;*9w=lzy4um0qh|=MA`#UpO4%h6_rBSj3++)Or796c0 zW?Da6S9U$f*T2Aoa0z0ZpIf#sI-+A8%{P&Z9&YMS2XaZCF%1kD((s`Feh9t!KZftB z`{Do9bs^*Tch?1Z%`zr=)g_ zlB%|upY*HOKkcDw$_*epdA}HbtGZ7I)kCGx_uBUF7O1I)zj7N+6_S3@;!WS*jLrM zPmVMIi)-R!Q@#em7j@0fu$ZU?&J|fV8*+aiQ#7i<8!GBlOZ{akUCsv|xw)@Rk+mS2 zw^JyW1O%VYB`!-g$i<7VbUlp~!IhW7UbINxHoQDeBjWV?v3VJVJ@~cR#9_spku`))GnRm74n0A#+fC$#OSyw+pnQ zpo!C*FZX%T_1wwa0|JbrNX8ovJd2)ali<}(d0c&xz*YZyIGm9^!QzPK{%?e_fjznC zpD$#?)2Gvw20tbbH;n8u^D5T6kAvzhaG!5zaWuKCW-!Tsds4Muh(Mvzl1gQXz$${8hdWi(CXg1%Ogx z$KiM9g2+7xr$MI``EO|E*8fNJkzk|V{L+-ESoCe~|hmzMTR}if>gTLdG5tw>88_-aEncRJC&9c>@)h#}r}fEuLE4 z?H{hj?qz^cjB6c|dbWEQGDd1vyO98Vkq@pU6*2(L{9kxI)f)!_4XPTH3sa> z5su{_Fzb)I@z}M`H!By*r;QGjme4tY zJMVk=!CO@aDyD6$(*)=5@m)Z-4t* z{&@8|y`S?_HnOG%1&DV*Wyv!HaVP2Xs(9lvo{i&cP^`ngL`~>bK>S#qp69(!Xx2nJ zAmhmgv4rFG?>0f9tzm}y{5&%2aIN4>$g>pPmtKnC1+)O_o^j>cpOsmuer6k{oepjP z#FM%z7^3Z`^~W%}6D5LDDp9;I_fXX_~XO-T9EeEiUxOc39cgc7CY2m8|} z6krnLpgEGcD!b4s0FV{nlmuhd_dBBjm>nJ0w-&uNb;Z}@$IeUx;k#?Dwhgz)04ztk z(D%_lq`drrPw8_D0t$ik=4j!btzcA}CvzrlOH{u*93UsXz*R-WG^UD;Ixz8+ssXEC?T{G92^iB_UbmtlU)tmjf3mO8H|TdOD3 z-?8k@safy`$}8POgt9T&cLZE{Kpi_zzJ?PLqth&Z8PXwot(+BZK?>nN82R|-wx`Ac zJe5{Ur=eRO-iUR#Ro`eV;pX2YSLpqL)g8-Bj&hwl+Fb*mzHwMmSC(Xc!zM=`EWBPd zVNmoSb%_O>?Z%gL{8>;K6t^8L0_-d&Q7>S;@tG2n!}A}uV=N?@jOE(l$!xwTgm5Go z{As{|voJ?Rfj_(6g%0hgdj0-BRr^nMpB?!atd&$FeTT?FqWd>*lq;*P9{iuJF%9hG z3%`*KoKfskvN~FHz6E-*G4jUHDs)fndq@c%G=G0p-k>NE?O}=0GHtfp+%M65HTR2+ zN~T}(`uU&F*Q&dj;*&-dmSbAfBI&s zy8I>@2G#|xlgxz$K*x-&59@eBqQ$^EfDt{pK6SANd_SZ;w35Fd4uDYLWN^(y=n;#z zIAO-^JHh)kCZ_ufYevuqUCHZd;)NZkAZ8_}2phTOvX$re24V$>-E4AfylrvLPwH)N z&kl1LEbG&@mQ8D+m5q_H)_-i^qH`lrBG1yu1jiR=2&I&^9R!Z+xuigJ*&s| zQ-()~T~DNxzd9BUxu}?-OI2N^nGMh@P|7k9!UVsvr~ze>JdSHT)j7w}or{$VnJuof zqbI7mhrHzy4uV5@Jwf!?BR(_i-p7B+8GKe#E6-j@&iU2T1WNb%*cV4$^xGu+2K^gp zE)Q%+18xcLRjv7*tQcEV)A!Iy`kjx@JId^v;^qr^n{1(sWbe{AHF>E*!86O(9q8;` zyLsz+1&ol3VAJ0&F3-d{(6`MR?67aO7GM?eBq)lmIT9xFrlc9@8sLrp;qRv8awk{iJuAJ>hq|e?gb$&cUPkB`U5~WHV!29OvntmyuVLlgX9C!=~L% z#mRpoS&hP0f5~fo+*V}adp|})>-&S5?GlfdX4O`We-?9+WcI@b4B`CD-Nf26OSPdO zM!uHXxVl&7BtpQWcLtD3=@)cnlGM^4iD6Jfn(lVsS|~UHLld*aR&ib*l885++Ir&%_^wTOCcK2?^X29tZO4 zC#SuKp1t#j{WKa3yu7lX?3DFnII%djR3p2HJ5AJ(1kz7A4gph+gk)(>l;bAE-e6MXKp3LA%~)- zW4%^XKaX%|x9>0i1PSlfURAXb0>;}ddx9wLwO@OyKr$f!T?cZB zGYF`w5~qq6q-vMtv0TnFnyY>tR_r=^oq67o>d}_S`$uoP*co2jr0Qo=u`KuJ5-uPE zl%l|^w~2ScH*QDOAIeLqW_t{U5u$e@{7dM1kffaLe(XKAe7VNfN}X$;I$2tu@e(IL z)_xGadfS}9O!6HuF?Z;K5Zvq!!oJl>CTBGV6aSX0bA06nIrMDb zoSUCf@AWrCvl}F93Ec=H?%72Pf68V;T;X^LBVGXAV_~;O!`x<9h?gVoSuBEV=H|sw@yK1dAy>aA!TeHMg$Hs;FBi zl<|P;cJ$Hv*%b-@Ap6m9KL4O_dd`QsQTdg;O^e>PU8(t>+4VZ;hrHk5pp1T;JEntl zAvw8YBs$sr&t#4g{UU+z9iXCM{LOtYpa_yI&_4KP_{&hzIWp!*APLuL?$US=o6ewT z%g4Pi_E7^;V=AWmPPOzOlUI{B*hR~5?*-hrN`juKV{&h8<3Minh3hDJyuL6Q@MtjN zze(x~+ahql>24~%9^YY&g zJ0=LblA$Xw@5!6Es2BB1;uf^DLD)YI+&3z!tW#O9qX)W6KjFPjp1N^wCx5VWlI4+h zs@r}pIi&6dpynY<%f@1e9m2De7}1nk=Q3`9=t=7KZiey#bQQM}Udsk>G&)xB!EC}G zk1F3m6IBBA!N>1pY#TB!Lriw>#WeZDId?gm>|3*a)voVLp~dUjK6u?-e09;g7YpID zi5o{|bsXjet^gXkr2uz{8RR8r12-*V%-dN@Z}a3FvYaIj{=4ocUaFG>jFz21*pn`A zc7`a%-D&b?(q{hDbxxxjakS*5Pxp6kC}rEl;>ExU*EatA(E>->qoVc3d=VtgDFlKO zu=NNkUjt4$OvzG?q#(w<>@zzKs?zhRSe(w?L1Uj;2BzQ$YJxIAi#5z-diWzc&*%Uk z5w-Jkcy(ew-C(@jMzhqM5Xhtnh>87ueNjD6x7!}kme@^8uOP!R_p1l!Hc?PnmWKKL? zDjh$7ZxuWEKl|eT_=61&VcFt4XPNu~9myB#Y(6iA`qrq_GBQSQtzC|QKG2OVvNE2N zSS#TAyT-tP5-GkuZ~z}e-8 zEwKfOH}W%>RrnvhS0N`L>*%G@c#GtN`?8c|B^&7jeE^0qxI*r`*MI;1&3;_J)r5QJ z-o2(r%HkM^_KuE50Ml>fNS<8+24``^%)b#8A`s=)qrP4z`?JXZ9Jj0A7$E}^5MXhA zay|D_TSJt^W()8GK=adWhWMp#89+;MjEQWr6EoVs{a3BHhc@uZ(RGZ4cg~@lhgL~& zR9BO{Jn!k?|1p{+e>+UbzDK7ICAKh3Lvfd$H*s5B(w^5LGrx8mAM)s&@pm$(*3Foa zj7!poE1nMpAm}4-~fwsR7T~1n}_s+*ibV`H3<>8;jxwbuU=m@Gl`5M^5M**80>JU zyPqPF)LEz-q=Q9|BmkF^La4Ek{Q9!Sxjt1(1uIhhq`b70UPSw}k>od zOM`tZlz|EK?eJ>Gu*1d;!m9FxN(tE{l0?xbq&M#J6deQlemCqUA=lg#emt~!Gx25X zd{>8R$t>Y2ZXe*=Gd|w08HQ2|TAf77SyYlPUZ=PWhxE#-(>ZO11HhUy8Dn`98>nbG zoAc44^77b$)o*U4KkINi(kC2=y=P`-7C7nb@*ceONB6T()ucAvPI<>z?zrl2lN;ag z)w>p%2?+1Xv!DShRM;4l5J^vX*YAHWK47|L@Bf@>Qr6ZTYk_xAu5OB6!J=&Ud9`vX zQSaN2kB!>zEv^TZ7+-_di8kvhQ$>xSof(r||NZBpGJkV|v+8^Y}#Mzg+oAl5+;Xd;&@Ms5tX|zDGoX{XDCj3`yf>{R0eo4@$VR z&?IzwH~A&*LC&sP*5=2RMo4>~?;F-i2eztjTT|G%R+0j{Bxoui^3|>Os*na}f7I;X zVeGwcyg-h|X2;{(qT}^9IM?3{C@T;VcDEEykhU+r7k!D@=+JU0!b1{@`qC)c*V+!C zeL^>yXYbRhp^s9Ilw1C@lOQ-#0jvZx0L2dIcbQqs2IorNm5M+Jz-wu7(3W8P+c~iT z(cA)rroa1cM9M-1Y|0S8+Jn>n+f~jFfZ$cJ)voV5J7tkFWEQlCOFt|4`$LZN2!;6&c+A6!okv z%Eo26_?IX57`4jkF>nxxxHm69dDD+4Y$iEz5BMKmcXQ6%`_z;%EP75?{E-8(@nlXL zFRXa%tqQocqx_cQq}TR?!OzZ4r739@YjHJR;XeH?LRH&AF(H7ku(4?YcGYMgweRGt zz6@;ZU(>`r>|01QK;bMo8Ki_H`z3GPBBArzCEWit8OE#_H`A{`*8*K1;?cfj2pI1O zpyZzeDWw)91H4fiUd>LAsAiR8_Cg^%Kr|FfrA^AcPu2`iNLfqark0C_EJFDY&s=^g9kPuZ@CHq;rO^*Jy4wy=GQ+LRI zcK81qH9Sbv5Y~a3$BoG9*+44L(Gl!2GnbX+1h=yxGtb5I39)GZQ2$2I*pBp_h>BjW zHNd@>CQeuNSO-nm*m=2C0P9?{Jl@-?$x<>TOoig^B;JXjjrk?noYC_PNEyHWQIN3; zCn)2Ma(o~(BeDTXsrCnJj^FAZdBoE}S9~UT2n9j)n80(V20ehNXDug7bvHta#)~O; zx-@|G?Oh_rF#SPspEaqmK|7iAvY*Xv8lCj#yc5EOLODvaKHUd(TFl2tG=zZ2(z2n* zQfHwIssaHqw`G=~FfP(v;>wP~)r=cwv!o*bE%%Wol&IY0Sj(8>TkpzDs+Axa5l=Aq zQXGjTB!n(cz6chS-x$es>3Y2`(DSzHEfO3aa3ypsl#OP`iHAh`^JS^m^Fk-Hp@-86 z!R=wt(9FQ5>p5%xo*jp2?8s!FZyGYWk;o1e00BdKi05p>c+!h9EOc-Oh?N6O6OlI; z)wFKGd2@Pw8M;IxSVaHmD&6={lBWepBs0%EQceAN;tr!e3;ztvY2famb9AhdyF6>p8Wt4Dlu$|6=DuS2a+@~?< z8Wb;Z3IC_LrWy_c1^WhUcPyn6bqEjce4g)Zc?SCy^;8~uW*J8hauVl3l_@$t44bCB zUFk6ZPo`d0k+AZopQ+@wMkRJ7T}hK+p(?~R900zba?S{Vpexu#%|1&6NDg$f0#eCE zSW-9;kU@zX7#6G8QVJDb-khdQVu-t;8#|S6e>_4u#*DgAQ2M*hf-L>LO5Af3@%V0T zO`?wu{o5~IljMru(>CM+Q<7Pp8G47R)-0!3qS=t5dw{w(`D{zp;!4O%NZyG^)#J|qA_;ISXdXMp0 z0DlF@?olB_(_MFpo`i01*}yA|68FBCi19xML`=%F?d3aXfOojkV2bSbt60sjh5r~EN&zlr1kgc;NkhBXkjFThT zsUCl#M-`eu=x`+{d(t=aL&8{&?xYk04SkE!|!J+BuyWrd$j{*dsrcI{rdy3M3YC?ufU$Vx-tC00;h2x zMF23C_Z{9j8;QV!am>TvBxSt-Ts`e1e?b!>Ub8*o)*dAYMWX7F-~{34x0&vhJ`XXo zsyh8vb|I+10_-LX*QIn*wjg`>Y>=3B9fA;>7?@(#{EJh0%Xrix6?l3)fe11@u4T|D5j zAS^>h-=&s6!?2*m;1HFbHKK$r?ZHN_LZDV)2}Dbmdy8Lntt3ns9;th(NoF=x1y0i( zS+%mW4W;K^@_VqHa7z`o{9M*a;{JFGU||^kb=n(5*+l?x~+y#7y98aC{owdBuez^zN%h{o3sDNnATS&~whclSXBLn0vZ_a~^Xw+Kav0BB06IZ#%y2VcstBv{#qn z=I4PRF)hd(Fs`ZGH9_AUyv9X-5!UP8WUeF2kk*E_R?(0Ua#`14S2|@?xrOjY9?A+i z@!*~5{`opytY5X}OgFgYP99s2lW4_j(8qU7Cn$t4h^*vr*1|bO%X4APysKU2PCX)c zL^koA>)-(@R%rSis1Pi8YauI#?=WldCSIL`e# zySaJzg{eTQAnNv_Wc^%vPZOdUt!fQFD@(H)B?s@j6p{Jp16dY#@xCeBGhlSmv)((As>lF=21y{97;Jas? zCf*BmaU=+RPt0czlE}q9H3Mq0AJGZI{VXodEf12m6AfD-451 z|3;%v!9|KPpY9eGzHw0uoapzH^2&erm4&sl9dnMT#pa~`^ZN6doAbSG#$*MUP0Q+E zqjV=iQ5$;oC(1W>j#5j$ppx_E-fBlQdtI1r+#vj5x@mW;s8qo{p1&#CN~Y`P8HZV7 zD9of;tV&+@{R~s4zt{Fj1vTEMv&e{Pd3z6INcWnps59!yYTAi#ZyKrOgFC1)UzfZk zUXxzzEQ;#zw!AFD1UYD^C#KX=0jHdPg^f``ULeed){Lm*W62YN5^2gD4HLc{)rCw zAA)+6@lxT1!vf^tk^1gKho>taRudjuy?jS}H|YL@CU|lkV5L8>QxceD4?emdHQ_0h z@p|8$L4p(qH=e;fu{l z9AIYuU1e2=d-}7Yp}WGdH%QQE4mByPj4aH~dgWO|hSwlnn-=c@AX0;aO}nP2i)}^& zYK)M>?nFe5%C5s;D_WzUqSZa2`%pZ_YW70qwip*yYgOfnF#w|>;|#P?r3y>d zbJQpWQwsHZO*GK+>Se{81Y(c1XA&hT;!mGcguKiZy;vc#F7a;+Prp1v+iFv2>3y>E z#At~B7Y$>BX0YR*_@VS<%hf3mF=F;f5qFYAx)=}vCRDz7Ujiy6@9s>*#aeac#l_f7 zJhSB{$5p45Fi_eNE4Wa*ozJ7HLZY+bwLt8A3lwqc>KgK&BSzikyd8z3D%uZkrPaBK z`Hl7a!R2+oElXV31R%+t+d|nkvT6*fN)a>Ip)p+^wVQT`Ho9;_7#TbMyOIcHufngF ze~Clg{RP@Zi#z|JUDPgqvESj(_nsx5OT_CuzuOzo ztMLYrW22T(670*DW!2P(3u^bZ-^9k=_n2_%6%T^uc5{A8(5yiUWsr&2L;j)Nm^Z92 zI+{=cIO>tF!VHi)Te5@(^vdNlkIx{2y7Ej!>~!nv$mgeM_wL=B+1#|-ORyO)e+qOs zt{_W;{0gK^Azkcc`k6q0+=K0z5ADUzV8&t%*?Ce=c?YTmBTn;7pFXoncOy!HMnmyri zDXt1cfDuxfnhD(!pyy}EayzSk=5TwSoQf*hoQj-W%F&Vc>eZ|Edp|Vhzkhrr&};E` zVsdr)2nRWl!D8tO==8qyGavxc3^L5o>FIVQ(@4^lcztgQP~Y>X$3{vv>bWhFghxbV zfaWP8Dm68NM$~2IE52kiM$kScq@oJ@14AljU;w=LjCr(7@ts95Psx1rK+h3{<}00WV8zgejg5Z3n^K|x zZ11wNFQ~X0ucD(M?|Ta}ZS$R6^|F?hKf@#Uwzs8aWKcmj8!5sA(vf52cSa5+K;tkt zBBE>cgCH_C=tdQOt3PccCnQbt}L^Z599Yj;pN3)EbC#oKgNkT)s2vj|#; zpviPg>)62z5<%P#A3kI`1{KN~ z1<7MV5TGRhEtYJ>c*8&EXO&PepgDVue2QSPG*E2uYU0zsFN_f-m<-Y$z~*IOWyPY5zyxk1q^dK=e4v5%OaSI8V@BDKc$re&%HS>R= zSN5=m9l3fKBUy;hrS13W*E2vk8T7(OD(vWn3ia*EdO<;|$h4dC6iPoS<{mRIv5*93 zU}OmpNC`9~B@L7X(pGlz?YwX^GBR9GHY#kqeI`HPWqb67j5%`l1P;W9v|c<#Yt_c( z;Qu&ING;2>zk~3*{pq0t*nSl5o)+c6bn!mKfRw`Fk z%u7p4HO0+5oJH@|vCADb;+)hX4K|eg7YWz=ynn}}&vYg75p{mBKKfDU9S|2snx|cc z<4aEuGGxG?C3?OUBnh(5O1m)~#+ma-wJ<6&C@}@U!CsMfzASU`nD1xz`REU(Jh+ew z>Yi{e-nnxpp}0^!tMO)Gjc-N<1IWx~*45G8xN!sNG|0-LBj{OJS`EuVce>k9-8e;; zGf3v+3z?6fkHs#mUm}!q7_m_=?haJ7+Th#_`TqSo1>M^GydQ`j!;ywmkJr%my0zdq zuLJaEFM2@&?E9a;6~Hz`yf2>7ZlPT9Gsxu9*6a=G9Krw+kdq7XSbHgu)DuM2U5ZE*4FFI!@z6A*Pl~WMX|THM-;i|E`ENxENQeo z*Mb!00_hCjuR-`g=x&mEvEH1PAJhQ-8nS-xvOe+>JRPi?8U=5DFJ26X+qKA#Xmi#V z;UrQsqayV}qnnZO;QyUMEp?HLAxCnNt=h7GG*Xe0@b2x|EFc(6{=suuYF$?HeT8|h zknZZgJ~ZNV-u&A){cm=Qi70O)?Y$Ns`DJn%N|Lf}lSg>Muk>;hV2v3Ubi-`g@7wEQ zPLquvPK~r4w2IVmm2cM;MQldcc-rMTWgi42_od$9jK59u7TR9Sad@k?C4&YufTkv| zz<)CgzGOY`VMO!K-wV_U4pp6;G%;&l>&`7;P!@?ZE ze8Kc#@FreX7To)dUe0YESL-~pb*@d}edJ#lsNMsEbjz=NX~4rOa})CP&z-X|8r7r5jhGr8N{(M!5Jw3pjzJqc7qGX4cFP(S^Z@A`*-Yv$>-@?V^)Qi6yw+l zf5D~>lYmJe^WHB>zhqZ`R!%@f#DpCAS6+m8yyFK;;G&Yz`kXqIkRY*Eljh ze7N)B<43|N*ZC`ry}in=+bv|Q$4E=rbh25`1#*nov9Ym8KnXLLHl%#t!&>(0gbfeA zf}9+MJBo@#0@fq+Fr*`v>0@Ks3=$H0u$)pqJIu?U;7>K0o=hRHg^I|?%Tod}vb4j2 z)n`OrrP}$QJ&zn`Kjju&#iJFw4ptZk3y+!r2NxG^(1j;HJ$ch}S+oh<gjpVf1<9Yb`9=m7M$SN8IsX9>*;t{OZ`^1=H|fV+vKnR=XjXN>-=|o zJvLkHJ=dLX!GNSB%GnrY2|`07qW~;gSE)=56S@*FJ3MM3de}fnMRyrz=Q8`51{4M^ zF6GMoF)P;NQ?QL>b8~b4KBx0^GLIj}Zoy8(g`;v;wCjf#Fse1d3S|e>k|Kg0kRl%6 z&VT&);hUbGZvW@(02eviid?nV4uJ?0!j>u7bg-?sH~}(;lbhbGZ{p%`>Q5#&(X$`k zpcZ%k-dJdAO8z2B=L;++B%7onbm66PoX(NeC+v@eaeisHMDS`kIu?WfvEydfy12B2 zQ-9QLXDv}Uxa!}~AT?Vyte3nalx-S$u=r)IfXI8xk4|aSY;_>J34|0og(Ld4mX4G5!VIpqroai4Z4-D-lZ&pzQYZevUFqHDjF0$&P_8=EvIfEUSVYbSAw zh*;?z|J31B?Zfa^O-#IPltiOY^>pCdqbXny+OxiT-2GkZtq7n;C>Vwir zScpy8@c^4XfI|+)Y8be~UNG~+B98Pq&5DYPW3lu&Ud;*c_xBHJD=I2#0&nW(&k|TZ zv!K;kmnzSZpNLK``RAxN0L+i-ICA^pLXec?>IpKhm!^-0DrCJ1YJT6Fg|%NkuDR@| z__=)}{MD)AJos^aq1-M?4n>>mp1)JRv|Al_~#C9=oqsz!!A*7%Pfk-yU zg{tt)(vDNfv(<;qt*vv#ot$661z>jn1*=0AR6vm@Jw(1tl9F`D8Gk#xwsB5LZNj

NV*TYV8`>#iZjOelR?g11ke0JTJ4qn^z;y5D4|8klW?=b4mnMJ~)lhJOly|?B zX>&kYVnqmSxgQq(*dMvdU?zoEnl+ISJ~JcnZ!iHjPAb1SzaAJxqfg3z^$Krg&mHbN zY%D7)s~^-@-lPAe@=Ih?#B9Gqi|nN4#RspOn(MIGAeBOe<+2Y ztq>p|zNtyZ17C~R$;}Qn?KX16HOs9jO0_wV_Da`f)C33(H)Rd9pFD}+sz1Gobnv$U zbdr&oi3(0V{xcC8u<+-dvxS|sOTVYoG&JUC8vRsqe~i?62#Sb^*n{QG%FR_Q#W$YV zo^MlCQ}f>#uMl0hCDi08>6e_GybX$}^Z6B1SIee7ce>5(?Ajn}!hnqm7xo5&Fi%#0 z81STR^A6k~65j$;g+!3aX=-oBhHupEq;4XKPZL+17GpK$LRixpj`!B0)6!l+ssfeH zbL*uJ#%{?E=mXsI`o8*XWo1Rm+B&aWqQEh=8R0(>r^HflegoTAF1aFU&0`0WCiBq> zw2go4HtVu7MaCpw_>hqj5E9(@!hVN~R83ZnUD8VLmV5-^=aNCm`?G@xSp-idwh%OA~KFKI8BSDnkg&KV_^?j)x0tE=|8sSD@C@~^H5 zKC_;N_Vzo5X&@TQ1VqCO3=D?Ksgz&FN;|ug1yzCmGdLLA)x+cK*chcz+bfdyR#sL_ zY-~7?7pZjLV_WJ;#Z^>P+}#-0AbhNnkW`MGU%*rbnRLcA4h^Z#6@mXTGdufI++)81 z3>U-VV-~IBh}2Y?_4Rds$?D2Vdhm55BqW*;?~04B%FD|m=S%JJfwZ^`5sS%P-Gt0|ZW@cvBu3aRnmOFwLrpkfEeOLqq%U=~JlO_UP!SVgGsk{U>8f?a@rg)7pN0gy-Y5HJlFyuxBeB z7jZxX{`;#wf*ANg4zAP)dg?RfY=K<1v0oRQ3K7!F| zoS2w+2CObPcg=ZPB?z@iP2SdqMMtFQE)aUp9PKQV3s@)E*MIwV4}pm9l+)Fv&dbXq z*1UNXHN2Swe~H`{P3dl8v=Ga@1rs+h^!`2-Qu!K0pj9+93O!~?@&s(Y&Tls??*uer zsXVr8B_9|VFfcM=n$UwF7bX!Xa&>t~RHDA@QnZpIojhe_(2Id*mWxj*x0W4$a>j~nZnt#GFrk41B;JX>B~TasIag4xgALTUKOLo4-5Pg?RVFP@nKfD zZ{2#VzV}mB?~Db+$;D`u51qhMg6t&y`gjg5^f1KGGRpAtzqjT5Xl zoWF@OI5KK}>z$rXZ)tA+Oy_<0(!#>xlE#(o{e9E4v^2&OS$X+l*!D{c3(c@w+ZdUc zv_SWpySB7cEg`6F9uUq;MezCl?&_oLq5J}4n`Cfj_&DP`J-xi-;(WV0JBgOGAeeZh zp<#w}U^^bEsJz`h{t=hAQS5c%vcH_}Qw&D{DIHxeERfD;&z_Am1>gp_f$8k__4Ct$ zfe4F;FrgB(HP+>f-xw`XSl!DF8`D+(aaRW;_e<*+oVYxj62Muz{QQ0cc@eYXVDzgEzlMoVe$;sK-1SXfA zpa0G1$jIj#kKPBlaRbSWU;60N58JxZ9(nqoU?!S(mjTjv-i1ATTcY6msI&$UBQmNNBg6 zsBAnw*z9FzXP5YL^~r4*8v2?)hz0?hv1SvIO)xt<2M1(2fr9Mp z;U7PJDlW4cE*=@tM4tEBf$;=xAhBm+YHF(Vi+QI>t-=J+8my!Y#Zb!MN_a!h?jWy_+k=rjz#t&Ub1f#gOdGF!4uJ;Qj16L zv$DQY)6gWQr_<$r_&~6vRcX&qQeJL)KZ*aTySsZ=OG^L-CMH|SJx8XsJ+(9V z@^a*a!-!nn57Dc2FWOn|4Wr=`6H5^m73I5f<%+3=MF&tOGyg7#YLyxCCZeM1#Kfa+ z2fgw{n22Q2=bSe=IY~Q+V7d^WAzf~wCe2`M6AzKx`;Q+xTsEgTwp}4dXN`1X>r5;y zznOL=^nb7|#(>2KSmR_?uQrX9T72c_=TE4sli1naHC|m^ z{r2gVE_U2z-2QXTht=R~aw{k(ye}x|>FVwtg}|cul?NDNQ!}$R*e?mSwYBYjQOB)M z^P)YUD_}GD6Ubm&?3&~K^^QT-;yU8u{}}wQU%{Y9ehUB0&OrSAE|Z`5#b5s?^Cl4W z?{~oiDu|2k$zZEe;z<2{se89kuo)Kg+diILCQqznV7^sd7nVtIg0Ck(-MiHI-I6+7 zX=3KPSD@~zliFby+tcV2XzTCgF}BwbDlPf2Id^^X zO{z3$p-At9CPF}@gY-^{|iL7H-H@&fSvb!#C255ba%lFyn&UhQneU=U_njJ_-M zL&e^w;$m6sG@K05RPRVbpswX0{5Zr5^FY zYIbzgJt1q4dS)iw7P?>}I2Y^;0#(+D8+b9^X0N_J{C!L-}qn4-3i=BuL(mm z)YXYVWSCu}VQ$?FAos%Mpb~Cw`%9~~K`4b$OJ|!0S|SfVMZqRHs;+*&bCF?A@bxnm zWo;K^sK9VYd&y@hAeHOKJjJ-&Y@Gw#iwq(iE!y|^;8JWs%5tO`3T;~3m*lnP!RKGE zcaZ(+KDYA%0SE}K?MALhcSUi6J)P1ULRYU9kyAz%T(3**?vkA$Z*{e~6vr*!0k)j* z+7CT=QbpX3{o2$u`g?IKHdIIV?^Kx6FOH2AY)>otA{8J(ne%%(u>stJC5PnP2 zL)f7V&mdVX&7;Fih)81BU0b6imT_a@)mcyi_C8Yc*+_K(EAxzT$6PICDA{%-;6d+Q zfj!zW}$h{8&AEk+iQ-F_FQg=>S{NPq6=f+^WbGWEARt>2bNCrB%Gm*i_OIr zIa@^1xWI$-bcSW~V7A!IhJd-tjXP}FU~+QF!+&f3DpyDTjmDa3vq$6T8x9AMn4Cp9 zb)Df5)w>=wFeo_A^`@BpTakpKGq$1WVuh^_=t4%e3p%vsBsXzfH~}bFtvehe@O?5& zT*AT<75dn$l{Mo0oFnyXZFm0dAS^1s^X0I+;y(X}f3jiW?N)pHj{0OEq@$Hb`B2pfglk%@uM;uzx^e34;Y@Z^ZQ4+|1}6;lTVIdg#Osm^GY^9_2f3Ai_wN_VK+t(5Cq?}I3$JPv z2;5s_(|h@nBu)e7DuEFLbxed(3a0=*$sZn}-8+47M~1b50e~Uyf9-B4c84Pfx!zr6 zBa{93_|$+O8=Nj@PRvTeXOG)MRvRCFme=$N(9KI>hpcdqjW<@-OQ>9awF121zB~t* zU7GdH?0OpVUi)*F@{W~Ssye+Rv4c=BO3?=)g)84X^&qfXdD+uDs+r!Am7W|GkbM;` zeWFrXD;Yo%7Sw=$D2VT2bDpQ^gyHzAe6L-Hd*BKLCG$aj?VLvdeHPqZad5)Gz}+_~ z74&WRzm2MnoT=@5+aBy$yf)(x8WMRF+o_a8FX*=z8_8nzykmtHLv6aK$7ic2VhYYwn5DyM(6`=%2rHWScx{yMn@oUjVfNu!tPQ z<#F~WmP{Zp5^6;Z3=Iqjj$H+)T+eP}9JJrqTZP+bh*ZLcz2ze`d#bvkRwrd@-?MK@ zC6~*CAP`3&8lEayfrZVRg3ferH@La7JkPw{wfyePa&R=v7{*d#mTo0x{p#ipcu|CtCyN;gyiYb~(iE9%hpUJrY z+PEVDCOI@)QY$xGxy2_SAYc*(RD1k8PEJmTe%gGtjlpF#y%O^A=q!MntB`c`-`^N$GA`@-1_<-!VS7z%ns3># zIM0OyRZT?IiS2qlTE{?Fx09iC+af4T=uIfxP|68uZ*2|9Vo~%~Zm=-%p+NtE7Iyp4 z(|xH#?2cYG|IBXn*5lIpNS%W8IX<=mM&M@KqG}P+>{o<`Ece-0^Q?!D?jrImE*4S{ z%Xb@zaGu=&mM&3IQH0$Da3A!zxy~Bz;Gc-KA#neZjQ44*+2UFyiiyR`^KW3eEd1vAYYXbK0<*6}gp_`34>< zB_&8_v;zavCP_UiLyD|d-uLahR@Kv}6kuK!RJwk7S;4n&fhwfEc@d#Z|J1k_kI;c8 zne~PC?kwXfug|Q!y}9cgvu+6tv>SfgUVa+|*Z~3|%;S~4 zt-cYE|A66c7xXVEnrYfJHQU+Y{&}lAgzMR=5kbJ@p1dq+Hs!xi{egBmV@K4`1dUuB ziXg13a|p6QVyQBSX6pAp>afS!`{$GsC(KXmO(>|TCEh?$6?;`{maE}e z)P4Uvw{2s>;y%Y`>>rb|>W#W#vAWbir74D2T;3r81u3EfS(=A90PI(ss3gmKeU?-1 z3v)P-i&-2veqr`9GAYR+KmY9t0CIvB9#rT_tsWOf-fzdd8+=j-3w?cBM~o@wN{?nV z^N5}UYdiu$XHqYvbUyk%Ki^te=NA>kJK^Y2NmLWA2e&gAGWpmemZhBFael=Zy~d>D z*@t!HRa|2HKdb^o*7&eV!Ea9l~GxRPS#q zeO$NAvd3@dTyhqcy<-95tbBUf)-R2MAL)wPa{0~Sx0R=U+`Zq zf5Qq=>`pYwHn%9o95OqJPyMr=xY=IuwWi4swX2b`<&KD%F;nx9insTzXjETLON-Lm>_DUy$Ul~P{;y!eosv&0 zd+f?x$DL1e@!EQN{G}&j*0o>LlexFNc}TdER7Ps@;X{E<27XLe>)PWsMlW7)WZV}) z%^qtK4?NS3i5iNP+QirfIrC6_RBw|;(R_& z%^VSVP2XV_NjSMDA)NDu+fn($EvL%ilne)CIRqPLdI)M84ZcFE>_NN5o!1n@u+Q+MHa8Aj%#qwDb6ixH{ z?}G2_3Ky%#zT^ncHO*+i(=s@wX496#rt=nCvC>afx%t*IvXKmlc$A5&E9&OY8FMv{ zJ)P~SUpWQigeNB|;SG(n$#jt{QqR<(s)@gEvy-Hxn;(55fx(~=D=P&t#`Je zF~j<^1w!TX3xe{#={!fpm_+N`Ay?bV|6x zvunwooGf2cCDDU!QDTO$K1hMCuhD*TOC8y1*6(T++&)matO1}IcGaYr#wUIi(Q0Nr1u1+j zo!>ma7!+B&8!k$><*s)t0^r)zI-b}sKnMt8m(S9|Y9m8Tz7wD+kxJUIY~te3$&kN>u_qRSj{z;B#x^(TFxm|0ci1)7O(ju8PQQKy2cBmbv-#`UAYG2lK=FJ=lN0N0IL z9NMwu<3cg%dEfCufBv}Xn!O~+v2R@DPYfF?#&}3VXGEDHcOn6uW@*e9>`->%bx=sm zUo4vnInc~_bT@RYCWDEkRCsbE+!Eq)vh90b^M(?4!OF#qthJiv0(6~tpP|#us2F1j zA#LXQb`X@ethZmnQpQ(`dBULgx6=fT_esdL6GTQnD=5RLsVl1ja@14)wzP)~O^-~l zvq!wk*@d}4`p$f0WVv3mw7D>>yB(tkJU($*$BQ$|z-a-}-wiK!gt6<&6L+(nBWy4W4}iv!&^7k}njV9(e(Wj^eKf%yaU#iwRc z7YCLg*tszgE<|`9aWb8}S!)gJEL?{ZprFgkqdAD75P)}kMc*TytneAQh&YJdajyiJ zUhuqEI8j_yZcEFJLY`1JS)6ntI8W`k4)9z8=`J=-VPV>B;BeNr@;u{k43D}QZ-U<= zMykNR>%pWp220yV>gOUHV4y5PuIa=?r|0RZ!J`pVkO7FLy@n1Z< z2UAcA^sV%~;x;2|iopjz 1: + ind = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0] + raise ValueError( + "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind) + ) + logger.info("Begin forward analysis") forward_list = [] forward_error_list = [] @@ -170,14 +185,9 @@ def _forward_backward_convergence_estimate( my_estimator.initial_f_k = result.delta_f_.iloc[0, :] mean = result.delta_f_.iloc[0, -1] if estimator.lower() == "bar": - error = np.sqrt( - sum( - [ - result.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1) - ] - ) - ) + # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742 + # Error estimate generated by BAR ARE correlated + error = np.nan else: error = result.d_delta_f_.iloc[0, -1] if estimator.lower() == "mbar" and error > error_tol: @@ -262,7 +272,7 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2): float Convergence time fraction :math:`R_c` [Fan2021]_ :class:`pandas.DataFrame` - The DataFrame with moving average. :: + The DataFrame with block average. :: Forward Backward data_fraction 0 3.016442 3.065176 0.1 @@ -389,3 +399,106 @@ def A_c(series_list, precision=0.01, tol=2): d_R_c = sorted_array[-i] - sorted_array[-i - 1] result += d_R_c * sum(R_c_list <= element) / n_R_c return result + + +def block_average(df_list, estimator="MBAR", num=10, **kwargs): + """Free energy estimate for portions of the trajectory. + + Generate the free energy estimate for a series of blocks in time, + with the specified number of equally spaced points. + For example, setting `num` to 10 would give the block averages + which is the free energy estimate from the first 10% alone, then the + next 10% ... of the data. + + Parameters + ---------- + df_list : list + List of DataFrame of either dHdl or u_nk, where each represents a + different value of lambda. + estimator : {'MBAR', 'BAR', 'TI'} + Name of the estimators. + num : int + The number of blocks used to divide *each* DataFrame. Note that + if the DataFrames are different lengths, the number of samples + contributed to each block will be different. + kwargs : dict + Keyword arguments to be passed to the estimator. + + Returns + ------- + :class:`pandas.DataFrame` + The DataFrame with estimate data. :: + + FE FE_Error + 0 3.016442 0.052748 + 1 3.078106 0.037170 + 2 3.072561 0.030186 + 3 3.048325 0.026070 + 4 3.049769 0.023359 + 5 3.034078 0.021260 + 6 3.043274 0.019642 + 7 3.035460 0.018340 + 8 3.042032 0.017319 + 9 3.044149 0.016405 + + + .. versionadded:: 2.4.0 + + """ + logger.info("Start block averaging analysis.") + logger.info("Check data availability.") + if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS): + msg = f"Estimator {estimator} is not available in {FEP_ESTIMATORS + TI_ESTIMATORS}." + logger.error(msg) + raise ValueError(msg) + else: + # select estimator class by name + estimator_fit = estimators_dispatch[estimator](**kwargs).fit + logger.info(f"Use {estimator} estimator for convergence analysis.") + + # Check that each df in the list has only one value of lambda + for i, df in enumerate(df_list): + lambda_values = list(set([x[1:] for x in df.index.to_numpy()])) + if len(lambda_values) > 1: + ind = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0] + raise ValueError( + "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind) + ) + + if estimator in ["BAR"] and len(df_list) > 2: + raise ValueError( + "Restrict to two DataFrames, one with a fep-lambda value and one its forward adjacent state for a " + "meaningful result." + ) + + logger.info("Begin Moving Average Analysis") + average_list = [] + average_error_list = [] + for i in range(1, num): + logger.info("Moving Average Analysis: {:.2f}%".format(100 * i / num)) + sample = [] + for data in df_list: + ind1, ind2 = len(data) // num * (i - 1), len(data) // num * i + sample.append(data[ind1:ind2]) + sample = concat(sample) + result = estimator_fit(sample) + + average_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == "bar": + # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742 + # Error estimate generated by BAR ARE correlated + average_error_list.append(np.nan) + else: + average_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info( + "{:.2f} +/- {:.2f} kT".format(average_list[-1], average_error_list[-1]) + ) + + convergence = pd.DataFrame( + { + "FE": average_list, + "FE_Error": average_error_list, + } + ) + convergence.attrs = df_list[0].attrs + return convergence diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py index bb0afd4d..52353064 100644 --- a/src/alchemlyb/estimators/bar_.py +++ b/src/alchemlyb/estimators/bar_.py @@ -54,6 +54,9 @@ class BAR(BaseEstimator, _EstimatorMixOut): .. versionchanged:: 1.0.0 `delta_f_`, `d_delta_f_`, `states_` are view of the original object. + .. versionchanged:: 2.4.0 + Added assessment of lambda states represented in the indices of u_nk + to provide meaningful errors to ensure proper use. """ @@ -88,7 +91,7 @@ def fit(self, u_nk): # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) - # get a list of the lambda states + # get a list of the lambda states that are sampled self._states_ = u_nk.columns.values.tolist() # group u_nk by lambda states @@ -97,11 +100,25 @@ def fit(self, u_nk): (len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns ] - + + # Pull lambda states from indices + states = list(set( x[1:] for x in u_nk.index)) + for state in states: + if len(state) == 1: + state = state[0] + if state not in self._states_: + raise ValueError( + f"Indexed lambda state, {state}, is not represented in u_nk columns:" + f" {self._states_}" + ) + # Now get free energy differences and their uncertainties for each step deltas = np.array([]) d_deltas = np.array([]) for k in range(len(N_k) - 1): + if N_k[k] == 0 or N_k[k + 1] == 0: + continue + # get us from lambda step k uk = groups.get_group(self._states_[k]) # get w_F @@ -126,6 +143,13 @@ def fit(self, u_nk): deltas = np.append(deltas, df) d_deltas = np.append(d_deltas, ddf**2) + if len(deltas) == 0 and len(states) > 1: + raise ValueError( + "u_nk does not contain energies computed between any adjacent states.\n" + "To compute the free energy with BAR, ensure that values in u_nk exist" + f" for the columns:\n{states}." + ) + # build matrix of deltas between each state adelta = np.zeros((len(deltas) + 1, len(deltas) + 1)) ad_delta = np.zeros_like(adelta) @@ -150,13 +174,11 @@ def fit(self, u_nk): ad_delta += np.diagflat(np.array(dout), k=j + 1) # yield standard delta_f_ free energies between each state - self._delta_f_ = pd.DataFrame( - adelta - adelta.T, columns=self._states_, index=self._states_ - ) + self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states) # yield standard deviation d_delta_f_ between each state self._d_delta_f_ = pd.DataFrame( - np.sqrt(ad_delta + ad_delta.T), columns=self._states_, index=self._states_ + np.sqrt(ad_delta + ad_delta.T), columns=states, index=states ) self._delta_f_.attrs = u_nk.attrs self._d_delta_f_.attrs = u_nk.attrs diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index e0ab594a..fa399cb1 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -33,7 +33,7 @@ class MBAR(BaseEstimator, _EstimatorMixOut): .. versionchanged:: 2.3.0 The new default is now "BAR" as it provides a substantial speedup over the previous default `None`. - + method : str, optional, default="robust" The optimization routine to use. This can be any of the methods @@ -79,6 +79,11 @@ class MBAR(BaseEstimator, _EstimatorMixOut): default value for `method` was changed from "hybr" to "robust" .. versionchanged:: 2.1.0 `n_bootstraps` option added. + .. versionchanged:: 2.4.0 + Handle initial estimate, initial_f_k, from bar in the instance + that not all lambda states represented as column headers are + represented in the indices of u_nk. + """ def __init__( @@ -135,6 +140,20 @@ def fit(self, u_nk): ) bar.fit(u_nk) initial_f_k = bar.delta_f_.iloc[0, :] + if len(bar.delta_f_.iloc[0, :]) != len(self._states_): + states = [ + x + for i, x in enumerate(self._states_[:-1]) + if N_k[i] > 0 and N_k[i + 1] > 0 + ] + initial_f_k = pd.Series( + [ + initial_f_k.loc(x) if x in states else np.nan + for x in self._states_ + ], + index=self._states_, + dtype=float, + ) else: initial_f_k = self.initial_f_k diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 2bde94b3..7852217b 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -2,7 +2,13 @@ import pandas as pd import pytest -from alchemlyb.convergence import forward_backward_convergence, fwdrev_cumavg_Rc, A_c +from alchemlyb import concat +from alchemlyb.convergence import ( + forward_backward_convergence, + fwdrev_cumavg_Rc, + A_c, + block_average, +) from alchemlyb.convergence.convergence import _cummean @@ -26,6 +32,113 @@ def test_convergence_fep(gmx_benzene_Coulomb_u_nk, estimator): assert convergence.loc[9, "Backward"] == pytest.approx(3.04, 0.01) +def test_block_average_ti(gmx_benzene_Coulomb_dHdl): + df_avg = block_average(gmx_benzene_Coulomb_dHdl, "TI") + assert df_avg.shape == (9, 2) + assert df_avg.loc[1, "FE"] == pytest.approx(3.18, 0.01) + assert df_avg.loc[1, "FE_Error"] == pytest.approx(0.07, 0.1) + assert df_avg.loc[8, "FE"] == pytest.approx(3.15, 0.01) + assert df_avg.loc[8, "FE_Error"] == pytest.approx(0.07, 0.1) + + +@pytest.mark.parametrize("estimator", ["DUMMY"]) +def test_block_average_error_1(gmx_ABFE_complex_u_nk, estimator): + with pytest.raises(ValueError, match=r"Estimator DUMMY is not available .*"): + _ = block_average(gmx_ABFE_complex_u_nk, estimator) + + +@pytest.mark.parametrize("estimator", ["MBAR"]) +def test_block_average_error_2_mbar(gmx_ABFE_complex_u_nk, estimator): + df_list = gmx_ABFE_complex_u_nk[10:15] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[0\]", + ): + _ = block_average([concat(df_list)], estimator) + + df_list = gmx_ABFE_complex_u_nk[14:17] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[1\]", + ): + _ = block_average([concat(df_list)], estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_error_2_bar(gmx_ABFE_complex_u_nk, estimator): + df_list = gmx_ABFE_complex_u_nk[10:13] + with pytest.raises( + ValueError, + match=r"Restrict to two DataFrames, one with a fep-lambda value .*", + ): + _ = block_average(df_list, estimator) + + df_list = gmx_ABFE_complex_u_nk[14:17] + with pytest.raises( + ValueError, + match=r"Restrict to two DataFrames, one with a fep-lambda value .*", + ): + _ = block_average(df_list, estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_error_3_bar(gmx_ABFE_complex_u_nk, estimator): + # Test if lambda state column representing one of the two lambda + # states in the df indices is missing from *both* dfs. + df_list = gmx_ABFE_complex_u_nk[10:12] + state1 = list(set(x[1:] for x in df_list[0].index))[0] + df_list[0] = df_list[0].drop(state1, axis=1) + df_list[1] = df_list[1].drop(state1, axis=1) + with pytest.raises( + ValueError, + match=r"Indexed lambda state, .*", + ): + _ = block_average(df_list, estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_error_4_bar(gmx_ABFE_complex_u_nk, estimator): + # Test if lambda state column representing one of the two lambda + # states in the df indices is missing from *one* dfs. + df_list = gmx_ABFE_complex_u_nk[10:12] + state1 = list(set(x[1:] for x in df_list[0].index))[0] + df_list[0] = df_list[0].drop(state1, axis=1) + with pytest.raises( + ValueError, + match=r"u_nk does not contain energies computed between any adjacent .*", + ): + _ = block_average(df_list, estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_bar(gmx_ABFE_complex_u_nk, estimator): + df_avg = block_average(gmx_ABFE_complex_u_nk[10:12], estimator) + assert df_avg.shape == (9, 2) + assert df_avg.loc[0, "FE"] == pytest.approx(3.701, 0.01) + assert np.isnan(df_avg.loc[0, "FE_Error"]) + assert df_avg.loc[8, "FE"] == pytest.approx(3.603, 0.01) + assert np.isnan(df_avg.loc[8, "FE_Error"]) + + df_list = gmx_ABFE_complex_u_nk[14:16] + df_list[-1] = df_list[-1].iloc[:-2] + df_avg = block_average(df_list, estimator) + assert df_avg.shape == (9, 2) + assert df_avg.loc[0, "FE"] == pytest.approx(0.651, 0.01) + assert np.isnan(df_avg.loc[0, "FE_Error"]) + assert df_avg.loc[8, "FE"] == pytest.approx(0.901, 0.01) + assert np.isnan(df_avg.loc[8, "FE_Error"]) + + +@pytest.mark.parametrize("estimator", ["MBAR"]) +def test_block_average_mbar(gmx_benzene_Coulomb_u_nk, estimator): + df_avg = block_average([gmx_benzene_Coulomb_u_nk[0]], estimator) + assert df_avg.shape == (9, 2) + assert df_avg.loc[0, "FE"] == pytest.approx(3.41, 0.01) + assert df_avg.loc[0, "FE_Error"] == pytest.approx(0.22, 0.01) + assert df_avg.loc[8, "FE"] == pytest.approx(2.83, 0.01) + assert df_avg.loc[8, "FE_Error"] == pytest.approx(0.33, 0.01) + + def test_convergence_wrong_estimator(gmx_benzene_Coulomb_dHdl): with pytest.raises(ValueError, match="is not available in"): forward_backward_convergence(gmx_benzene_Coulomb_dHdl, "WWW") @@ -52,6 +165,23 @@ def test_convergence_method(gmx_benzene_Coulomb_u_nk): assert len(convergence) == 2 +@pytest.mark.parametrize("estimator", ["MBAR"]) +def test_forward_backward_convergence_mbar(gmx_ABFE_complex_u_nk, estimator): + df_list = gmx_ABFE_complex_u_nk[10:15] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[0\]", + ): + _ = forward_backward_convergence([concat(df_list)], estimator) + + df_list = gmx_ABFE_complex_u_nk[14:17] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[1\]", + ): + _ = forward_backward_convergence([concat(df_list)], estimator) + + def test_cummean_short(): """Test the case where the input is shorter than the expected output""" value = _cummean(np.empty(10), 100) diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index 8be24867..f2535f8e 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -8,7 +8,7 @@ import alchemlyb from alchemlyb.convergence import forward_backward_convergence from alchemlyb.estimators import MBAR, TI, BAR -from alchemlyb.visualisation import plot_convergence +from alchemlyb.visualisation import plot_convergence, plot_block_average from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation.mbar_matrix import plot_mbar_overlap_matrix from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl @@ -147,7 +147,7 @@ def test_plot_dF_state( def test_plot_convergence_dataframe(gmx_benzene_Coulomb_u_nk): - df = forward_backward_convergence(gmx_benzene_Coulomb_u_nk, "MBAR") + df = forward_backward_convergence([gmx_benzene_Coulomb_u_nk[0]], "MBAR") ax = plot_convergence(df) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure) @@ -218,6 +218,47 @@ def test_plot_convergence_final_nan(): plt.close(ax.figure) +def test_plot_block_average(gmx_benzene_Coulomb_u_nk): + data_list = gmx_benzene_Coulomb_u_nk + fe = [] + fe_error = [] + num_points = 10 + for i in range(1, num_points + 1): + slice = int(len(data_list[0]) / num_points * i) + u_nk_coul = alchemlyb.concat([data[:slice] for data in data_list]) + estimate = MBAR().fit(u_nk_coul) + fe.append(estimate.delta_f_.loc[0, 1]) + fe_error.append(estimate.d_delta_f_.loc[0, 1]) + + df = pd.DataFrame( + data={ + "FE": fe, + "FE_Error": fe_error, + } + ) + df.attrs = estimate.delta_f_.attrs + ax = plot_block_average(df) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + ax = plot_block_average(df, units="kJ/mol") + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + df = df.drop("FE_Error", axis=1) + ax = plot_block_average(df) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + ax = plot_block_average(df, final_error=1) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + ax = plot_block_average(df, final_error=np.inf) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + class Test_Units: @staticmethod @pytest.fixture() diff --git a/src/alchemlyb/visualisation/__init__.py b/src/alchemlyb/visualisation/__init__.py index 6955dcaf..6aa4c358 100644 --- a/src/alchemlyb/visualisation/__init__.py +++ b/src/alchemlyb/visualisation/__init__.py @@ -1,4 +1,4 @@ -from .convergence import plot_convergence +from .convergence import plot_convergence, plot_block_average from .dF_state import plot_dF_state from .mbar_matrix import plot_mbar_overlap_matrix from .ti_dhdl import plot_ti_dhdl diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index b74ad336..bf232b5d 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -56,11 +56,10 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): Keyword arg final_error for plotting a horizontal error bar. The array input has been deprecated. The units default to `None` which uses the units in the input. - .. versionchanged:: 0.6.0 data now takes in dataframe - .. versionadded:: 0.4.0 + """ if units is not None: dataframe = get_unit_converter(units)(dataframe) @@ -136,7 +135,7 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): ax.legend( (line1[0], line2[0]), ("Forward", "Reverse"), - loc=9, + loc="best", prop=FP(size=18), frameon=False, ) @@ -144,4 +143,121 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): ax.set_ylabel(r"$\Delta G$ ({})".format(units), fontsize=16, color="#151B54") plt.tick_params(axis="x", color="#D2B9D3") plt.tick_params(axis="y", color="#D2B9D3") + plt.tight_layout() + return ax + + +def plot_block_average(dataframe, units=None, final_error=None, ax=None): + """Plot the forward and backward convergence. + + The input could be the result from + :func:`~alchemlyb.convergence.forward_backward_convergence` or + :func:`~alchemlyb.convergence.fwdrev_cumavg_Rc`. The input should be a + :class:`pandas.DataFrame` which has column `FE` and + :attr:`pandas.DataFrame.attrs` should compile with :ref:`note-on-units`. + The errorbar will be plotted if column `FE_Error` and `Backward_Error` + is present. + + `FE`: A column of free energy estimate from some X% block of the data, + where optional `FE_Error` column is the corresponding error. + + `final_error` is the error of the final value and is shown as the error band around the + final value. It can be provided in case an estimate is available that is more appropriate + than the default, which is the error of the last value in `Backward`. + + Parameters + ---------- + dataframe : Dataframe + Output Dataframe has column `Forward`, `Backward` or optionally + `Forward_Error`, `Backward_Error` see :ref:`plot_convergence `. + units : str + The unit of the estimate. The default is `None`, which is to use the + unit in the input. Setting this will change the output unit. + final_error : float + The error (standard deviation) of the final value in ``units``. If not given, takes the + overall error of the time blocks, unless these were not provided, it which case it + equals 1 kT. + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ``ax=None``, + a new axes will be generated. + + Returns + ------- + matplotlib.axes.Axes + An axes with the forward and backward convergence drawn. + + Note + ---- + The code is taken and modified from + `Alchemical Analysis `_. + + .. versionadded:: 2.4.0 + + """ + if units is not None: + dataframe = get_unit_converter(units)(dataframe) + df_avg = dataframe["FE"].to_numpy() + if "FE_Error" in dataframe: + df_avg_error = dataframe["FE_Error"].to_numpy() + else: + df_avg_error = np.zeros(len(df_avg)) + + if ax is None: # pragma: no cover + fig, ax = plt.subplots(figsize=(8, 6)) + + plt.setp(ax.spines["bottom"], color="#D2B9D3", lw=3, zorder=-2) + plt.setp(ax.spines["left"], color="#D2B9D3", lw=3, zorder=-2) + + for dire in ["top", "right"]: + ax.spines[dire].set_color("none") + + ax.xaxis.set_ticks_position("bottom") + ax.yaxis.set_ticks_position("left") + + f_ts = np.linspace(0, 1, len(df_avg) + 1)[1:] + + if final_error is None: + if np.sum(df_avg_error) != 0: + final_error = np.std(df_avg) + else: + final_error = 1.0 + + if np.isfinite(final_error): + line0 = ax.fill_between( + [0, 1], + np.mean(df_avg) - final_error, + np.mean(df_avg) + final_error, + color="#D2B9D3", + zorder=1, + ) + line1 = ax.errorbar( + f_ts, + df_avg, + yerr=df_avg_error, + color="#736AFF", + lw=3, + zorder=2, + marker="o", + mfc="w", + mew=2.5, + mec="#736AFF", + ms=12, + label="Avg FE", + ) + + xticks_spacing = len(f_ts) // 10 or 1 + xticks = f_ts[::xticks_spacing] + plt.xticks(xticks, [f"{i:.2f}" for i in xticks], fontsize=10) + plt.yticks(fontsize=10) + + ax.legend( + loc="best", + prop=FP(size=18), + frameon=False, + ) + ax.set_xlabel(r"Fraction of the Simulation Time", fontsize=16, color="#151B54") + ax.set_ylabel(r"$\Delta G$ ({})".format(units), fontsize=16, color="#151B54") + plt.tick_params(axis="x", color="#D2B9D3") + plt.tick_params(axis="y", color="#D2B9D3") + plt.tight_layout() return ax diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 09d685a9..c050cfe5 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -1,7 +1,7 @@ """Functions for Plotting the dF states. To assess the quality of the free energy estimation, The dF between adjacent -lambda states can be ploted to assess the quality of the estimation. +lambda states can be plotted to assess the quality of the estimation. The code for producing the dF states plot is modified based on `Alchemical Analysis `_. @@ -266,3 +266,4 @@ def plot_dF_state( leg.get_frame().set_alpha(0.5) return fig + From ac8c4796296cb7e7450e2a078496639a0af62688 Mon Sep 17 00:00:00 2001 From: jac16 Date: Wed, 13 Mar 2024 10:21:39 -0400 Subject: [PATCH 33/59] Added to changelog --- CHANGES | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index 043b3cae..91b1d5bb 100644 --- a/CHANGES +++ b/CHANGES @@ -27,10 +27,8 @@ Enhancements Enhancements - Addition of `block_average` function in both `convergence` and `visualization` (Issue #380, PR #381) - - add CITATION.cff file with all authors from AUTHORS (issue #394, PR #395) - -Changes - - modernize build system: replaced setup.py,cfg with pyproject.toml (#385) + - Add support for LAMMPS FEP files (Issue #349, PR #348) + 08/24/2024 xiki-tempula From 7331f281e71b290ac1b15ab254829f39a4c9eac3 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 11 Sep 2024 14:29:40 -0700 Subject: [PATCH 34/59] initial CITATION.cff file - generated with https://bit.ly/cffinit - authors and order from AUTHORS - affiliations and ORCID from JOSS paper - emails from paper or individuals Co-authored-by: David L. Dotson --- CITATION.cff | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..33ac0025 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# This CITATION.cff file was initially generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: alchemlyb +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - email: david@datryllic.com + given-names: David L. + family-names: Dotson + orcid: 'https://orcid.org/0000-0001-5879-2942' + affiliation: Datryllic LLC + - given-names: Ian M. + family-names: Kenney + orcid: 'https://orcid.org/0000-0002-9749-8866' + affiliation: Kenney Consulting, LLC + - given-names: Oliver + family-names: Beckstein + email: obeckste@asu.edu + affiliation: Arizona State University + orcid: 'https://orcid.org/0000-0003-1340-0831' + - given-names: Shuai + family-names: Liu + orcid: 'https://orcid.org/0000-0002-8632-633X' + affiliation: Silicon Therapeutics LLC + - given-names: Travis + family-names: Jensen + - given-names: Bryce + family-names: Allen + orcid: 'https://orcid.org/0000-0002-0804-8127' + affiliation: Differentiated Therapeutics + - given-names: Dominik + family-names: Wille + affiliation: Freie Universität Berlin + - given-names: Victoria + family-names: Lim + orcid: 'https://orcid.org/0000-0003-4030-9312' + affiliation: University of California Irvine + - given-names: Hyungro + family-names: Lee + orcid: 'https://orcid.org/0000-0002-4221-7094' + affiliation: Pacific Northwest National Laboratory + - given-names: Mohammad Soroush + family-names: Barhaghi + orcid: 'https://orcid.org/0000-0001-8226-7347' + affiliation: Wayne State University + - given-names: Zhiyi + family-names: Wu + affiliation: Exscientia plc + orcid: 'https://orcid.org/0000-0002-7615-7851' + email: william@zhiyiwu.me + - given-names: Alexander + family-names: Schlaich + orcid: 'https://orcid.org/0000-0002-4250-363X' + affiliation: University of Stuttgart + - given-names: Jérôme + family-names: Hénin + orcid: 'https://orcid.org/0000-0003-2540-4098' + affiliation: CNRS + - given-names: Thomas T. + family-names: Joseph + orcid: 'https://orcid.org/0000-0003-1323-3244' + affiliation: University of Pennsylvania + - given-names: Irfan + family-names: Alibay + orcid: 'https://orcid.org/0000-0001-5787-9130' + affiliation: Open Molecular Software Foundation + - given-names: Pascal + family-names: Merz + affiliation: PM Scientific Consulting + orcid: 'https://orcid.org/0000-0002-7045-8725' + - given-names: Domenico + family-names: Marson + orcid: 'https://orcid.org/0000-0003-1839-9868' + affiliation: University of Trieste + - given-names: Haoxi + family-names: Li + orcid: 'https://orcid.org/0009-0004-8369-1042' + affiliation: University of North Carolina +identifiers: + - type: swh + value: 'swh:1:dir:4cd266754be413312ee7ef3f1f7fbe05b38192cf' + description: >- + The Software Heritage identifier for the top level + directory of the repository. +repository-code: 'https://github.com/alchemistry/alchemlyb' +url: 'https://alchemlyb.readthedocs.io' +abstract: >- + alchemlyb is an open-source Python software package for + the analysis of alchemical free energy calculations. Its + functionality contains individual composable building + blocks for all aspects of a full typical free energy + analysis workflow, starting with the extraction of raw + data from the output of diverse molecular simulation + packages, moving on to data preprocessing tasks such as + decorrelation of time series, using various estimators to + derive free energy estimates from simulation samples, and + finally providing quality analysis tools for data + convergence checking and visualization. alchemlyb also + contains high-level end-to-end workflows that combine + multiple building blocks into a user-friendly analysis + pipeline from the initial data input stage to the final + results. +keywords: + - Python + - free energy + - molecular dynamics + - alchemistry +license: BSD-3-Clause From ab91db51a4dadef0b8f0b69ee8cd540d2795a17c Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 11 Sep 2024 15:24:15 -0700 Subject: [PATCH 35/59] add JOSS paper as preferred citation to CITATION - inferred DOI from proof - assume final acceptance in 2024... --- CITATION.cff | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index 33ac0025..5482ecd3 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -69,7 +69,7 @@ authors: family-names: Alibay orcid: 'https://orcid.org/0000-0001-5787-9130' affiliation: Open Molecular Software Foundation - - given-names: Pascal + - given-names: Pascal T. family-names: Merz affiliation: PM Scientific Consulting orcid: 'https://orcid.org/0000-0002-7045-8725' @@ -111,3 +111,85 @@ keywords: - molecular dynamics - alchemistry license: BSD-3-Clause +preferred-citation: + title: 'alchemlyb: the simple alchemistry library' + authors: + - given-names: Zhiyi + family-names: Wu + affiliation: Exscientia plc + orcid: 'https://orcid.org/0000-0002-7615-7851' + email: william@zhiyiwu.me + - given-names: David L. + family-names: Dotson + orcid: 'https://orcid.org/0000-0001-5879-2942' + affiliation: Arizona State University + - given-names: Irfan + family-names: Alibay + orcid: 'https://orcid.org/0000-0001-5787-9130' + affiliation: Open Molecular Software Foundation + - given-names: Bryce + family-names: Allen + orcid: 'https://orcid.org/0000-0002-0804-8127' + affiliation: Differentiated Therapeutics + - given-names: Mohammad Soroush + family-names: Barhaghi + orcid: 'https://orcid.org/0000-0001-8226-7347' + affiliation: Wayne State University + - given-names: Jérôme + family-names: Hénin + orcid: 'https://orcid.org/0000-0003-2540-4098' + affiliation: CNRS + - given-names: Thomas T. + family-names: Joseph + orcid: 'https://orcid.org/0000-0003-1323-3244' + affiliation: University of Pennsylvania + - given-names: Ian M. + family-names: Kenney + orcid: 'https://orcid.org/0000-0002-9749-8866' + affiliation: Arizona State University + - given-names: Hyungro + family-names: Lee + orcid: 'https://orcid.org/0000-0002-4221-7094' + affiliation: Pacific Northwest National Laboratory + - given-names: Haoxi + family-names: Li + orcid: 'https://orcid.org/0009-0004-8369-1042' + affiliation: University of North Carolina + - given-names: Victoria + family-names: Lim + orcid: 'https://orcid.org/0000-0003-4030-9312' + affiliation: University of California Irvine + - given-names: Shuai + family-names: Liu + orcid: 'https://orcid.org/0000-0002-8632-633X' + affiliation: Silicon Therapeutics LLC + - given-names: Domenico + family-names: Marson + orcid: 'https://orcid.org/0000-0003-1839-9868' + affiliation: University of Trieste + - given-names: Pascal T. + family-names: Merz + affiliation: PM Scientific Consulting + orcid: 'https://orcid.org/0000-0002-7045-8725' + - given-names: Alexander + family-names: Schlaich + orcid: 'https://orcid.org/0000-0002-4250-363X' + affiliation: University of Stuttgart + - given-name: David + family-name: Mobley + orcid: 'https://orcid.org/0000-0002-1083-5533' + affiliation: University of California Irvine + - given-names: Michael R. + family-names: Shirts + orcid: 'https://orcid.org/0000-0003-3249-1097' + affiliation: University of Colorado Boulder + - given-names: Oliver + family-names: Beckstein + email: obeckste@asu.edu + affiliation: Arizona State University + orcid: 'https://orcid.org/0000-0003-1340-0831' + type: "Article" + year: 2024 + journal: "Journal of Open Source Software" + doi: 10.21105/joss.06934 + From d12d662a57cdf826f178f743186b528682757d84 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 11 Sep 2024 15:39:15 -0700 Subject: [PATCH 36/59] added references for MBAR and equilibration detection --- CITATION.cff | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 5482ecd3..15a6e9fd 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -6,7 +6,7 @@ cff-version: 1.2.0 title: alchemlyb message: >- If you use this software, please cite it using the - metadata from this file. + preferred citation together with any other references. type: software authors: - email: david@datryllic.com @@ -175,8 +175,8 @@ preferred-citation: family-names: Schlaich orcid: 'https://orcid.org/0000-0002-4250-363X' affiliation: University of Stuttgart - - given-name: David - family-name: Mobley + - given-names: David + family-names: Mobley orcid: 'https://orcid.org/0000-0002-1083-5533' affiliation: University of California Irvine - given-names: Michael R. @@ -188,8 +188,29 @@ preferred-citation: email: obeckste@asu.edu affiliation: Arizona State University orcid: 'https://orcid.org/0000-0003-1340-0831' - type: "Article" + type: 'article' year: 2024 - journal: "Journal of Open Source Software" - doi: 10.21105/joss.06934 - + journal: 'Journal of Open Source Software' + doi: '10.21105/joss.06934' +references: + - title: Statistically optimal analysis of samples from multiple equilibrium states + authors: + - family-names: Shirts + given-names: Michael R + - family-names: Chodera + given-names: John D + type: 'article' + pages: '124105' + year: '2008' + journal: Journal of Chemical Physics + doi: '10.1063/1.2978177' + - title: A Simple Method for Automated Equilibration Detection in Molecular Simulations + authors: + - family-names: Chodera + given-names: John D. + type: 'article' + pages: '1799–1805' + year: '2016' + journal: 'Journal of Chemical Theory and Computation' + doi: '10.1021/acs.jctc.5b00784' + From 5969444d28780a07659e86ed3179fbb76b1a93b2 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 11 Sep 2024 16:22:29 -0700 Subject: [PATCH 37/59] inserted additional JOSS paper authors in software author list - added David Mobley @davidlmobley (original conception and design of alchemlyb) - added Michael Shirts @mrshirts (original conception and design of alchemlyb) --- CITATION.cff | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CITATION.cff b/CITATION.cff index 15a6e9fd..54f10405 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -81,6 +81,14 @@ authors: family-names: Li orcid: 'https://orcid.org/0009-0004-8369-1042' affiliation: University of North Carolina + - given-names: David + family-names: Mobley + orcid: 'https://orcid.org/0000-0002-1083-5533' + affiliation: University of California Irvine + - given-names: Michael R. + family-names: Shirts + orcid: 'https://orcid.org/0000-0003-3249-1097' + affiliation: University of Colorado Boulder identifiers: - type: swh value: 'swh:1:dir:4cd266754be413312ee7ef3f1f7fbe05b38192cf' From 7e96f64ad8911f1a4db06380b6be642a60b0a662 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 11 Sep 2024 16:38:41 -0700 Subject: [PATCH 38/59] updated CHANGES and AUTHORS for 2.3.3 - CHANGES: bump to 2.3.3 - add reminder to AUTHORS to also update CITATION.cff --- AUTHORS | 3 +++ CHANGES | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/AUTHORS b/AUTHORS index 3e41aa22..fda5162e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,6 +16,9 @@ GitHub handle are preferred. The repository history at https://github.com/alchemistry/alchemlyb and the CHANGES file show individual code contributions. +New contributors should add themselves to the end of this file AND to +the file CITATION.cff at the end of the top-level authors list. + Chronological list of authors ----------------------------- diff --git a/CHANGES b/CHANGES index 91b1d5bb..96bb24ba 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,7 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) * release numbers follow "Semantic Versioning" https://semver.org +-------------------------------------------------------------------------------- ??/??/2024 jaclark5 @@ -29,6 +30,13 @@ Enhancements `visualization` (Issue #380, PR #381) - Add support for LAMMPS FEP files (Issue #349, PR #348) +09/??/2024 orbeckst + + * 2.3.3 + +Enhancements: + - add CITATION.cff file with all authors from AUTHORS (issue #394, PR #395) + 08/24/2024 xiki-tempula From a241777066386b9567182d479cfe04c190301ce1 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 11 Sep 2024 17:09:08 -0700 Subject: [PATCH 39/59] add authors to CITATION who had NOT been in AUTHORS Added all authors that were mentioned in the Acknowledgements of the JOSS paper but were not in AUTHORS - Wei-Tse Hsu @wehs7661 (2020) for code clean-up in a732380cc6b9571eca7eb694e3acd8adeda4ed78 - Jan Janssen @jan-janssen (2022) for creating the conda-forge package - Shujie Fan @VOD555 (2022) for initial code for fractional equilibration time - Helmut Carter @helmutcarter (2024) for doc fix in #356 --- CITATION.cff | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CITATION.cff b/CITATION.cff index 54f10405..d25b1e2c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -53,6 +53,10 @@ authors: affiliation: Exscientia plc orcid: 'https://orcid.org/0000-0002-7615-7851' email: william@zhiyiwu.me + - given-names: Wei-Tse + family-names: Hsu + orcid: https://orcid.org/0000-0001-6167-5480 + affiliation: University of Colorado Boulder - given-names: Alexander family-names: Schlaich orcid: 'https://orcid.org/0000-0002-4250-363X' @@ -77,10 +81,22 @@ authors: family-names: Marson orcid: 'https://orcid.org/0000-0003-1839-9868' affiliation: University of Trieste + - given-names: Shujie + family-names: Fan + orcid: 'https://orcid.org/0000-0002-0271-2760' + affiliation: Arizona State University + - given-names: Jan + family-names: Janssen + affiliation: Max-Planck-Institut for Sustainable Materials + orcid: 'https://orcid.org/0000-0001-9948-7119' - given-names: Haoxi family-names: Li orcid: 'https://orcid.org/0009-0004-8369-1042' affiliation: University of North Carolina + - given-names: Helmut + family-names: Carter + orcid: https://orcid.org/0000-0003-0273-4107 + affiliation: City University of New York - given-names: David family-names: Mobley orcid: 'https://orcid.org/0000-0002-1083-5533' From d97d6823c028475b4ffe567050f5af971912e51b Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 16 Sep 2024 13:00:26 -0700 Subject: [PATCH 40/59] add @jaclark5 to AUTHORS and CITATION.cff AUTHORS entry had been forgotten in PR #381 --- AUTHORS | 1 + CITATION.cff | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/AUTHORS b/AUTHORS index fda5162e..53219da2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -56,5 +56,6 @@ Chronological list of authors 2023 - Haoxi Li (@hl2500) + 2024 - Jennifer A. Clark (@jaclark5) diff --git a/CITATION.cff b/CITATION.cff index d25b1e2c..f9030c7b 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -105,6 +105,10 @@ authors: family-names: Shirts orcid: 'https://orcid.org/0000-0003-3249-1097' affiliation: University of Colorado Boulder + - given-names: Jennifer A. + family-names: Clark + orcid: https://orcid.org/0000-0003-4897-5651 + affiliation: National Institute of Standards and Technology identifiers: - type: swh value: 'swh:1:dir:4cd266754be413312ee7ef3f1f7fbe05b38192cf' From cf4a307fd7b8d75f923eb2e48632112415a2c1a7 Mon Sep 17 00:00:00 2001 From: Jennifer A Clark Date: Mon, 16 Sep 2024 20:14:25 -0400 Subject: [PATCH 41/59] Resolve bug in bar_model.delta_f_ creation (#397) * fix bug introduced in PR #381: there was a change to creating the delta_f_ matrix, which resulted in the columns and indices being tuples that were in the wrong order for single lambda computations. * ensure that columns are in the correct order by explicitly sorting * add a test for the delta_f_ columns --- src/alchemlyb/estimators/bar_.py | 13 +++---- src/alchemlyb/tests/test_fep_estimators.py | 44 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py index 52353064..22bf958f 100644 --- a/src/alchemlyb/estimators/bar_.py +++ b/src/alchemlyb/estimators/bar_.py @@ -100,25 +100,24 @@ def fit(self, u_nk): (len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns ] - + # Pull lambda states from indices - states = list(set( x[1:] for x in u_nk.index)) + states = list(set(x[1:] if len(x[1:]) > 1 else x[1] for x in u_nk.index)) for state in states: - if len(state) == 1: - state = state[0] if state not in self._states_: raise ValueError( f"Indexed lambda state, {state}, is not represented in u_nk columns:" f" {self._states_}" ) - + states.sort(key=lambda x: self._states_.index(x)) + # Now get free energy differences and their uncertainties for each step deltas = np.array([]) d_deltas = np.array([]) for k in range(len(N_k) - 1): if N_k[k] == 0 or N_k[k + 1] == 0: continue - + # get us from lambda step k uk = groups.get_group(self._states_[k]) # get w_F @@ -149,7 +148,7 @@ def fit(self, u_nk): "To compute the free energy with BAR, ensure that values in u_nk exist" f" for the columns:\n{states}." ) - + # build matrix of deltas between each state adelta = np.zeros((len(deltas) + 1, len(deltas) + 1)) ad_delta = np.zeros_like(adelta) diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py index 46487c18..074e3d56 100644 --- a/src/alchemlyb/tests/test_fep_estimators.py +++ b/src/alchemlyb/tests/test_fep_estimators.py @@ -138,6 +138,50 @@ def test_states_(self, estimator): _estimator.states_ = 1 +def test_delta_f_columns( + gmx_benzene_Coulomb_u_nk, + gmx_expanded_ensemble_case_1, +): + """Ensure columns are tuples when appropriate.""" + + bar_1lambda = BAR().fit(alchemlyb.concat(gmx_benzene_Coulomb_u_nk)) + assert set(bar_1lambda.delta_f_.columns) == set([0.0, 0.25, 0.5, 0.75, 1.0]) + + bar_4lambda = BAR().fit(alchemlyb.concat(gmx_expanded_ensemble_case_1)) + assert set(bar_4lambda.delta_f_.columns) == set( + [ + (0.0, 0.1, 0.0, 0.0), + (0.0, 0.4, 0.0, 0.0), + (0.0, 1.0, 0.4, 0.002), + (0.0, 1.0, 0.0, 0.0001), + (0.0, 1.0, 0.1, 0.0002), + (0.0, 0.84, 0.0, 0.0), + (0.0, 0.68, 0.0, 0.0), + (0.0, 1.0, 0.84, 0.2), + (0.0, 1.0, 0.3, 0.001), + (0.0, 1.0, 0.2, 0.0004), + (0.0, 0.16, 0.0, 0.0), + (0.0, 1.0, 0.52, 0.01), + (0.0, 1.0, 0.92, 0.4), + (0.0, 0.76, 0.0, 0.0), + (0.0, 0.46, 0.0, 0.0), + (0.0, 1.0, 0.6, 0.02), + (0.0, 0.92, 0.0, 0.0), + (0.0, 0.6, 0.0, 0.0), + (0.0, 0.34, 0.0, 0.0), + (0.0, 1.0, 0.76, 0.1), + (0.0, 1.0, 1.0, 1.0), + (0.0, 0.05, 0.0, 0.0), + (0.0, 1.0, 0.48, 0.004), + (0.0, 0.0, 0.0, 0.0), + (0.0, 0.22, 0.0, 0.0), + (0.0, 0.52, 0.0, 0.0), + (0.0, 1.0, 0.68, 0.04), + (0.0, 0.28, 0.0, 0.0), + ] + ) + + def test_bootstrap(gmx_benzene_Coulomb_u_nk): u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk) mbar = MBAR(n_bootstraps=2) From b16f177d55c11743ed031122514df7a1148062ca Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Tue, 17 Sep 2024 09:41:31 -0700 Subject: [PATCH 42/59] black-formatted files --- src/alchemlyb/convergence/__init__.py | 7 +- src/alchemlyb/convergence/convergence.py | 30 +- src/alchemlyb/estimators/mbar_.py | 4 +- .../estimators/ti_gaussian_quadrature_.py | 378 +++++++++++++++--- src/alchemlyb/parsing/gmx.py | 1 + src/alchemlyb/parsing/gomc.py | 1 + src/alchemlyb/parsing/namd.py | 1 + src/alchemlyb/parsing/util.py | 1 + src/alchemlyb/preprocessing/subsampling.py | 1 + src/alchemlyb/visualisation/convergence.py | 4 +- src/alchemlyb/visualisation/dF_state.py | 1 - 11 files changed, 359 insertions(+), 70 deletions(-) diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py index b031527e..c0ef2580 100644 --- a/src/alchemlyb/convergence/__init__.py +++ b/src/alchemlyb/convergence/__init__.py @@ -1 +1,6 @@ -from .convergence import forward_backward_convergence, fwdrev_cumavg_Rc, A_c, block_average +from .convergence import ( + forward_backward_convergence, + fwdrev_cumavg_Rc, + A_c, + block_average, +) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index d613f22e..5d9d4827 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -39,7 +39,7 @@ def forward_backward_convergence( Lower case input is also accepted until release 2.0.0. num : int The number of blocks used to divide *each* DataFrame and progressively add - to assess convergence. Note that if the DataFrames are different lengths, + to assess convergence. Note that if the DataFrames are different lengths, the number of samples contributed with each block will be different. error_tol : float The maximum error tolerated for analytic error. If the analytic error is @@ -48,7 +48,7 @@ def forward_backward_convergence( .. versionadded:: 2.3.0 .. versionchanged:: 2.4.0 Clarified docstring, removed incorrect estimation of std for cumulative - result in bar and added check that only a single lambda state is + result in bar and added check that only a single lambda state is represented in the indices of each df in df_list. kwargs : dict @@ -100,16 +100,22 @@ def forward_backward_convergence( # select estimator class by name my_estimator = estimators_dispatch[estimator](**kwargs) logger.info(f"Use {estimator} estimator for convergence analysis.") - + # Check that each df in the list has only one value of lambda for i, df in enumerate(df_list): lambda_values = list(set([x[1:] for x in df.index.to_numpy()])) if len(lambda_values) > 1: - ind = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0] + ind = [ + j + for j in range(len(lambda_values[0])) + if len(list(set([x[j] for x in lambda_values]))) > 1 + ][0] raise ValueError( - "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind) + "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format( + i, ind + ) ) - + logger.info("Begin forward analysis") forward_list = [] forward_error_list = [] @@ -460,9 +466,15 @@ def block_average(df_list, estimator="MBAR", num=10, **kwargs): for i, df in enumerate(df_list): lambda_values = list(set([x[1:] for x in df.index.to_numpy()])) if len(lambda_values) > 1: - ind = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0] + ind = [ + j + for j in range(len(lambda_values[0])) + if len(list(set([x[j] for x in lambda_values]))) > 1 + ][0] raise ValueError( - "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind) + "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format( + i, ind + ) ) if estimator in ["BAR"] and len(df_list) > 2: @@ -470,7 +482,7 @@ def block_average(df_list, estimator="MBAR", num=10, **kwargs): "Restrict to two DataFrames, one with a fep-lambda value and one its forward adjacent state for a " "meaningful result." ) - + logger.info("Begin Moving Average Analysis") average_list = [] average_error_list = [] diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index fa399cb1..ab35df14 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -81,9 +81,9 @@ class MBAR(BaseEstimator, _EstimatorMixOut): `n_bootstraps` option added. .. versionchanged:: 2.4.0 Handle initial estimate, initial_f_k, from bar in the instance - that not all lambda states represented as column headers are + that not all lambda states represented as column headers are represented in the indices of u_nk. - + """ def __init__( diff --git a/src/alchemlyb/estimators/ti_gaussian_quadrature_.py b/src/alchemlyb/estimators/ti_gaussian_quadrature_.py index e4856046..62e8d66e 100644 --- a/src/alchemlyb/estimators/ti_gaussian_quadrature_.py +++ b/src/alchemlyb/estimators/ti_gaussian_quadrature_.py @@ -35,39 +35,297 @@ class TI_GQ(BaseEstimator, _EstimatorMixOut): """ - special_points = {1: {'lambdas': [0.5], - 'weights': [1.0]}, - 2: {'lambdas': [0.21132, 0.78867], - 'weights': [0.5, 0.5]}, - 3: {'lambdas': [0.1127, 0.5, 0.88729], - 'weights': [0.27777, 0.44444, 0.27777]}, - 4: {'lambdas': [0.06943, 0.33001, 0.66999, 0.93057], - 'weights': [0.17393, 0.32607, 0.32607, 0.17393]}, - 5: {'lambdas': [0.04691, 0.23076, 0.5, 0.76923, 0.95308], - 'weights': [0.11846, 0.23931, 0.28444, 0.23931, 0.11846]}, - 6: {'lambdas': [0.03377, 0.1694 , 0.38069, 0.61931, 0.8306 , 0.96623], - 'weights': [0.08566, 0.18038, 0.23396, 0.23396, 0.18038, 0.08566]}, - 7: {'lambdas': [0.02544, 0.12923, 0.29707, 0.5, 0.70292, 0.87076, 0.97455], - 'weights': [0.06474, 0.13985, 0.19091, 0.20897, 0.19091, 0.13985, 0.06474]}, - 8: {'lambdas': [0.01986, 0.10167, 0.23723, 0.40828, 0.59172, 0.76277, 0.89833, 0.98014], - 'weights': [0.05061, 0.11119, 0.15685, 0.18134, 0.18134, 0.15685, 0.11119, 0.05061]}, - 9: {'lambdas': [0.01592, 0.08198, 0.19331, 0.33787, 0.5, 0.66213, 0.80669, 0.91802, 0.98408], - 'weights': [0.04064, 0.09032, 0.13031, 0.15617, 0.16512, 0.15617, 0.13031, 0.09032, 0.04064]}, - 10: {'lambdas': [0.01305, 0.06747, 0.1603, 0.2833, 0.42556, 0.57444, 0.7167, 0.8397, 0.93253, 0.98695], - 'weights': [0.03334, 0.07473, 0.10954, 0.13463, 0.14776, 0.14776, 0.13463, 0.10954, 0.07473, 0.03334]}, - 11: {'lambdas': [0.01089, 0.05647, 0.13492, 0.24045, 0.36523, 0.5, 0.63477, 0.75955, 0.86508, 0.94353, 0.98911], - 'weights': [0.02783, 0.06279, 0.09315, 0.1166, 0.1314, 0.13646, 0.1314, 0.1166, 0.09315, 0.06279, 0.02783]}, - 12: {'lambdas': [0.00922, 0.04794, 0.11505, 0.20634, 0.31608, 0.43738, 0.56262, 0.68392, 0.79366, 0.88495, 0.95206, 0.99078], - 'weights': [0.02359, 0.05347, 0.08004, 0.10158, 0.11675, 0.12457, 0.12457, 0.11675, 0.10158, 0.08004, 0.05347, 0.02359]}, - 13: {'lambdas': [0.00791, 0.0412, 0.09921, 0.17883, 0.27575, 0.38477, 0.5, 0.61523, 0.72425, 0.82117, 0.90079, 0.9588, 0.99209], - 'weights': [0.02024, 0.04606, 0.06944, 0.08907, 0.10391, 0.11314, 0.11628, 0.11314, 0.10391, 0.08907, 0.06944, 0.04606, 0.02024]}, - 14: {'lambdas': [0.00686, 0.03578, 0.0864, 0.15635, 0.24238, 0.34044, 0.44597, 0.55403, 0.65956, 0.75762, 0.84365, 0.9136, 0.96422, 0.99314], - 'weights': [0.01756, 0.04008, 0.06076, 0.0786, 0.09277, 0.1026, 0.10763, 0.10763, 0.1026, 0.09277, 0.0786, 0.06076, 0.04008, 0.01756]}, - 15: {'lambdas': [0.006, 0.03136, 0.0759, 0.13779, 0.21451, 0.30292, 0.3994 , 0.5, 0.6006, 0.69708, 0.78549, 0.86221, 0.9241, 0.96864, 0.994], - 'weights': [0.01538, 0.03518, 0.05358, 0.06979, 0.08313, 0.09308, 0.09922, 0.10129, 0.09922, 0.09308, 0.08313, 0.06979, 0.05358, 0.03518, 0.01538]}, - 16: {'lambdas': [0.0053, 0.02771, 0.06718, 0.1223, 0.19106, 0.27099, 0.3592, 0.45249, 0.54751, 0.6408, 0.72901, 0.80894, 0.8777, 0.93282, 0.97229, 0.9947], - 'weights': [0.01358, 0.03113, 0.04758, 0.06231, 0.0748, 0.08458, 0.0913 , 0.09473, 0.09473, 0.0913, 0.08458, 0.0748, 0.06231, 0.04758, 0.03113, 0.01358]} - } + special_points = { + 1: {"lambdas": [0.5], "weights": [1.0]}, + 2: {"lambdas": [0.21132, 0.78867], "weights": [0.5, 0.5]}, + 3: {"lambdas": [0.1127, 0.5, 0.88729], "weights": [0.27777, 0.44444, 0.27777]}, + 4: { + "lambdas": [0.06943, 0.33001, 0.66999, 0.93057], + "weights": [0.17393, 0.32607, 0.32607, 0.17393], + }, + 5: { + "lambdas": [0.04691, 0.23076, 0.5, 0.76923, 0.95308], + "weights": [0.11846, 0.23931, 0.28444, 0.23931, 0.11846], + }, + 6: { + "lambdas": [0.03377, 0.1694, 0.38069, 0.61931, 0.8306, 0.96623], + "weights": [0.08566, 0.18038, 0.23396, 0.23396, 0.18038, 0.08566], + }, + 7: { + "lambdas": [0.02544, 0.12923, 0.29707, 0.5, 0.70292, 0.87076, 0.97455], + "weights": [0.06474, 0.13985, 0.19091, 0.20897, 0.19091, 0.13985, 0.06474], + }, + 8: { + "lambdas": [ + 0.01986, + 0.10167, + 0.23723, + 0.40828, + 0.59172, + 0.76277, + 0.89833, + 0.98014, + ], + "weights": [ + 0.05061, + 0.11119, + 0.15685, + 0.18134, + 0.18134, + 0.15685, + 0.11119, + 0.05061, + ], + }, + 9: { + "lambdas": [ + 0.01592, + 0.08198, + 0.19331, + 0.33787, + 0.5, + 0.66213, + 0.80669, + 0.91802, + 0.98408, + ], + "weights": [ + 0.04064, + 0.09032, + 0.13031, + 0.15617, + 0.16512, + 0.15617, + 0.13031, + 0.09032, + 0.04064, + ], + }, + 10: { + "lambdas": [ + 0.01305, + 0.06747, + 0.1603, + 0.2833, + 0.42556, + 0.57444, + 0.7167, + 0.8397, + 0.93253, + 0.98695, + ], + "weights": [ + 0.03334, + 0.07473, + 0.10954, + 0.13463, + 0.14776, + 0.14776, + 0.13463, + 0.10954, + 0.07473, + 0.03334, + ], + }, + 11: { + "lambdas": [ + 0.01089, + 0.05647, + 0.13492, + 0.24045, + 0.36523, + 0.5, + 0.63477, + 0.75955, + 0.86508, + 0.94353, + 0.98911, + ], + "weights": [ + 0.02783, + 0.06279, + 0.09315, + 0.1166, + 0.1314, + 0.13646, + 0.1314, + 0.1166, + 0.09315, + 0.06279, + 0.02783, + ], + }, + 12: { + "lambdas": [ + 0.00922, + 0.04794, + 0.11505, + 0.20634, + 0.31608, + 0.43738, + 0.56262, + 0.68392, + 0.79366, + 0.88495, + 0.95206, + 0.99078, + ], + "weights": [ + 0.02359, + 0.05347, + 0.08004, + 0.10158, + 0.11675, + 0.12457, + 0.12457, + 0.11675, + 0.10158, + 0.08004, + 0.05347, + 0.02359, + ], + }, + 13: { + "lambdas": [ + 0.00791, + 0.0412, + 0.09921, + 0.17883, + 0.27575, + 0.38477, + 0.5, + 0.61523, + 0.72425, + 0.82117, + 0.90079, + 0.9588, + 0.99209, + ], + "weights": [ + 0.02024, + 0.04606, + 0.06944, + 0.08907, + 0.10391, + 0.11314, + 0.11628, + 0.11314, + 0.10391, + 0.08907, + 0.06944, + 0.04606, + 0.02024, + ], + }, + 14: { + "lambdas": [ + 0.00686, + 0.03578, + 0.0864, + 0.15635, + 0.24238, + 0.34044, + 0.44597, + 0.55403, + 0.65956, + 0.75762, + 0.84365, + 0.9136, + 0.96422, + 0.99314, + ], + "weights": [ + 0.01756, + 0.04008, + 0.06076, + 0.0786, + 0.09277, + 0.1026, + 0.10763, + 0.10763, + 0.1026, + 0.09277, + 0.0786, + 0.06076, + 0.04008, + 0.01756, + ], + }, + 15: { + "lambdas": [ + 0.006, + 0.03136, + 0.0759, + 0.13779, + 0.21451, + 0.30292, + 0.3994, + 0.5, + 0.6006, + 0.69708, + 0.78549, + 0.86221, + 0.9241, + 0.96864, + 0.994, + ], + "weights": [ + 0.01538, + 0.03518, + 0.05358, + 0.06979, + 0.08313, + 0.09308, + 0.09922, + 0.10129, + 0.09922, + 0.09308, + 0.08313, + 0.06979, + 0.05358, + 0.03518, + 0.01538, + ], + }, + 16: { + "lambdas": [ + 0.0053, + 0.02771, + 0.06718, + 0.1223, + 0.19106, + 0.27099, + 0.3592, + 0.45249, + 0.54751, + 0.6408, + 0.72901, + 0.80894, + 0.8777, + 0.93282, + 0.97229, + 0.9947, + ], + "weights": [ + 0.01358, + 0.03113, + 0.04758, + 0.06231, + 0.0748, + 0.08458, + 0.0913, + 0.09473, + 0.09473, + 0.0913, + 0.08458, + 0.0748, + 0.06231, + 0.04758, + 0.03113, + 0.01358, + ], + }, + } def __init__(self, verbose=False): self.verbose = verbose @@ -94,26 +352,32 @@ def fit(self, dHdl): # used to calculate mean means = dHdl.groupby(level=dHdl.index.names[1:]).mean() variances = np.square(dHdl.groupby(level=dHdl.index.names[1:]).sem()) - + weights = [] # check if the lambdas in the simulations match the suggested values - lambda_list, means_list, variances_list, index_list = self.separate_mean_variance(means, variances) + lambda_list, means_list, variances_list, index_list = ( + self.separate_mean_variance(means, variances) + ) for lambdas in lambda_list: num_lambdas = len(lambdas) if num_lambdas not in self.special_points: - raise ValueError(f'TI_GQ only supports a set number of lambda windows ({list(self.special_points.keys())}) currently, \ - but {num_lambdas} lambda windows are given.') - suggested_lambdas = self.special_points[num_lambdas]['lambdas'] + raise ValueError( + f"TI_GQ only supports a set number of lambda windows ({list(self.special_points.keys())}) currently, \ + but {num_lambdas} lambda windows are given." + ) + suggested_lambdas = self.special_points[num_lambdas]["lambdas"] if not np.allclose(lambdas, suggested_lambdas, rtol=0.1): - raise ValueError(f'lambda values, {suggested_lambdas}, are expected, but {lambdas} are given. Please use trapezoidal rule instead.') - weights.extend(self.special_points[num_lambdas]['weights']) + raise ValueError( + f"lambda values, {suggested_lambdas}, are expected, but {lambdas} are given. Please use trapezoidal rule instead." + ) + weights.extend(self.special_points[num_lambdas]["weights"]) # means_new and variances_new are similar to means and variances, but with only values relevant to each lambda type (for multi-lambda situation) means_new = concat(means_list) mean_values = means_new.to_numpy() variances_new = concat(variances_list) variance_values = variances_new.to_numpy() - # apply gaussian quadrature multiplication at each lambda state + # apply gaussian quadrature multiplication at each lambda state deltas = weights * mean_values deltas = np.insert(deltas, 0, [0.0], axis=0) deltas = np.append(deltas, [0.0], axis=0) @@ -131,25 +395,25 @@ def fit(self, dHdl): # Append cumulative free energy value from state i to i+j out.append(deltas[i] + deltas[i + 1 : i + j + 1].sum()) # Append cumulative squared deviation of free energy from state i to i+j - dout.append(d_deltas_squared[i] + d_deltas_squared[i + 1 : i + j + 1].sum()) - + dout.append( + d_deltas_squared[i] + d_deltas_squared[i + 1 : i + j + 1].sum() + ) + adelta += np.diagflat(np.array(out), k=j) ad_delta += np.diagflat(np.array(dout), k=j) - adelta = (adelta - adelta.T) + adelta = adelta - adelta.T ad_delta = (ad_delta + ad_delta.T) - 2 * np.diagflat(d_deltas_squared) # yield standard delta_f_ cumulative free energies from one state to another - self._delta_f_ = pd.DataFrame( - adelta, columns=index_list, index=index_list - ) - + self._delta_f_ = pd.DataFrame(adelta, columns=index_list, index=index_list) + # yield standard deviation d_delta_f_ between each state self._d_delta_f_ = pd.DataFrame( np.sqrt(ad_delta), columns=index_list, index=index_list, ) - + self.dhdl = means self.dhdl.attrs = dHdl.attrs self._states_ = means_new.index.values.tolist() @@ -164,8 +428,8 @@ def separate_mean_variance(means, variances): For transitions with multiple lambda, the attr:`dhdl` would return a :class:`~pandas.DataFrame` which gives the dHdl for all the lambda states, regardless of whether it is perturbed or not. This function - creates 3 lists of :class:`numpy.array`, :class:`pandas.Series` and - :class:`pandas.Series` for each lambda, where the lists describe + creates 3 lists of :class:`numpy.array`, :class:`pandas.Series` and + :class:`pandas.Series` for each lambda, where the lists describe the lambda values, potential energy gradient and variance values for the lambdas state that is perturbed. @@ -190,7 +454,7 @@ def separate_mean_variance(means, variances): variance of the potential energy gradient with respect to lambda for each configuration that lambda k is perturbed. index_list : list - A list of :class:`float` or :class:`tuple` such that each :class:`float` + A list of :class:`float` or :class:`tuple` such that each :class:`float` or :class:`tuple` is the index of the final `delta_f_` and `d_delta_f_` """ lambda_list = [] @@ -220,11 +484,15 @@ def separate_mean_variance(means, variances): lambda_list.append(new_means.index) dhdl_list.append(new_means) variance_list.append(new_variances) - + # add two end states at all lambda zeros and ones if len(l_types) == 1: index_list = [0.0] + index_list + [1.0] else: - index_list = [tuple([0.0]*len(l_types))] + index_list + [tuple([1.0]*len(l_types))] + index_list = ( + [tuple([0.0] * len(l_types))] + + index_list + + [tuple([1.0] * len(l_types))] + ) - return lambda_list, dhdl_list, variance_list, index_list + return lambda_list, dhdl_list, variance_list, index_list diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py index a9f83498..5df9ab6c 100644 --- a/src/alchemlyb/parsing/gmx.py +++ b/src/alchemlyb/parsing/gmx.py @@ -1,6 +1,7 @@ """Parsers for extracting alchemical data from `Gromacs `_ output files. """ + import numpy as np import pandas as pd diff --git a/src/alchemlyb/parsing/gomc.py b/src/alchemlyb/parsing/gomc.py index 90124687..a05cf200 100644 --- a/src/alchemlyb/parsing/gomc.py +++ b/src/alchemlyb/parsing/gomc.py @@ -1,6 +1,7 @@ """Parsers for extracting alchemical data from `GOMC `_ output files. """ + import pandas as pd from . import _init_attrs diff --git a/src/alchemlyb/parsing/namd.py b/src/alchemlyb/parsing/namd.py index 5eacf8c4..76657a8f 100644 --- a/src/alchemlyb/parsing/namd.py +++ b/src/alchemlyb/parsing/namd.py @@ -1,6 +1,7 @@ """Parsers for extracting alchemical data from `NAMD `_ output files. """ + from os.path import basename from re import split diff --git a/src/alchemlyb/parsing/util.py b/src/alchemlyb/parsing/util.py index 28e5a568..9feddc9e 100644 --- a/src/alchemlyb/parsing/util.py +++ b/src/alchemlyb/parsing/util.py @@ -1,6 +1,7 @@ """Collection of utilities used by many parsers. """ + import bz2 import gzip import os diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py index 92813375..d2f34476 100644 --- a/src/alchemlyb/preprocessing/subsampling.py +++ b/src/alchemlyb/preprocessing/subsampling.py @@ -1,6 +1,7 @@ """Functions for subsampling datasets. """ + import warnings import pandas as pd diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index bf232b5d..f250495b 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -59,7 +59,7 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): .. versionchanged:: 0.6.0 data now takes in dataframe .. versionadded:: 0.4.0 - + """ if units is not None: dataframe = get_unit_converter(units)(dataframe) @@ -192,7 +192,7 @@ def plot_block_average(dataframe, units=None, final_error=None, ax=None): `Alchemical Analysis `_. .. versionadded:: 2.4.0 - + """ if units is not None: dataframe = get_unit_converter(units)(dataframe) diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index c050cfe5..a7380741 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -266,4 +266,3 @@ def plot_dF_state( leg.get_frame().set_alpha(0.5) return fig - From a3b2c08a3be487c92bf8b916a08f3969c8e8e1cb Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Tue, 17 Sep 2024 09:42:47 -0700 Subject: [PATCH 43/59] replace setup.py,cfg with pyproject.toml - fix #385 - use pyproject.toml instead of setup.py (note: may need to change README for PyPi to exclude banners) - remove versioneer and use versioningit (use alchemlyb.__version__ directly where the version is needed, e.g., for sphinx docs) - updated CHANGES for 2.4.0 --- CHANGES | 9 +- docs/conf.py | 7 +- pyproject.toml | 84 + setup.cfg | 7 - setup.py | 63 - src/alchemlyb/__init__.py | 5 +- src/alchemlyb/_version.py | 683 -------- src/alchemlyb/tests/test_version.py | 6 +- versioneer.py | 2277 --------------------------- 9 files changed, 92 insertions(+), 3049 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100755 setup.py delete mode 100644 src/alchemlyb/_version.py delete mode 100644 versioneer.py diff --git a/CHANGES b/CHANGES index 96bb24ba..9649341a 100644 --- a/CHANGES +++ b/CHANGES @@ -28,15 +28,10 @@ Enhancements Enhancements - Addition of `block_average` function in both `convergence` and `visualization` (Issue #380, PR #381) - - Add support for LAMMPS FEP files (Issue #349, PR #348) - -09/??/2024 orbeckst - - * 2.3.3 - -Enhancements: - add CITATION.cff file with all authors from AUTHORS (issue #394, PR #395) +Changes + - modernize build system: replaced setup.py,cfg with pyproject.toml (#385) 08/24/2024 xiki-tempula diff --git a/docs/conf.py b/docs/conf.py index 5d78c185..8eb645fd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -67,12 +67,11 @@ # We always display the full release string; if something else is # desired, see the commented out code -from alchemlyb._version import get_versions - -release = get_versions()["version"] -del get_versions +import alchemlyb +release = alchemlyb.__version__ version = release + # version = release.split("+")[0] # only major.minor.patch # version = "."join(release.split(".")[:2]) # only major.minor diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..7088f10a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,84 @@ +# alchemlyb + +[build-system] +requires = ["setuptools", "wheel", "versioningit"] +build-backend = "setuptools.build_meta" + +[project] +name = "alchemlyb" +description = "the simple alchemistry library" +authors = [ + { name = "Zhiyi Wu", email = "william@zhiyiwu.me" }, + { name = "David Dotson", email = "dotsdl@gmail.com" } +] +maintainers = [ + { name = "Zhiyi Wu", email = "william@zhiyiwu.me" }, + { name = "Oliver Beckstein", email = "orbeckst@gmail.com" } +] +dynamic = ["version"] + +readme = { file = "README.md", content-type = "text/markdown" } +license = { text = "BSD" } +keywords = ["free energy", "MBAR", "thermodynamic integration", + "free energy perturbation", "FEP", "alchemistry", "analysis", + "GROMACS", "NAMD", "AMBER", "molecular dynamics"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: POSIX", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Chemistry", + "Topic :: Software Development :: Libraries :: Python Modules" +] +requires-python = ">=3.10" + +dependencies = [ + "numpy", + "pandas>=2.1", + "pymbar>=4", + "scipy", + "scikit-learn", + "matplotlib>=3.7", + "loguru", + "pyarrow", +] + + +[project.optional-dependencies] +tests = [ + "pytest", + "alchemtest" +] + +[project.urls] +Homepage = "https://github.com/alchemistry/alchemlyb" +Documentation = "https://alchemlyb.readthedocs.io/" +Repository = "https://github.com/alchemistry/alchemlyb" +Issues = "https://github.com/alchemistry/alchemlyb/issues" +Changelog = "https://github.com/alchemistry/alchemlyb/blob/master/CHANGES" +Discussions = "https://github.com/alchemistry/alchemlyb/discussions" + + +[tool.setuptools.packages.find] +where = ["src"] + + +[tool.versioningit] + +[tool.versioningit.vcs] +default-tag = "0.0.0" +match = ["*"] + +[tool.versioningit.write] +file = "src/alchemlyb/_version.py" + + diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 85363710..00000000 --- a/setup.cfg +++ /dev/null @@ -1,7 +0,0 @@ -[versioneer] -VCS = git -style = pep440 -versionfile_source = src/alchemlyb/_version.py -versionfile_build = alchemlyb/_version.py -tag_prefix = -parentdir_prefix = alchemlyb- diff --git a/setup.py b/setup.py deleted file mode 100755 index fa612485..00000000 --- a/setup.py +++ /dev/null @@ -1,63 +0,0 @@ -#! /usr/bin/python -"""Setuptools-based setup script for alchemlyb. - -For a basic installation just type the command:: - - python setup.py install - -""" - -from setuptools import setup, find_packages - -import versioneer - -setup( - name="alchemlyb", - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - description="the simple alchemistry library", - author="David Dotson", - author_email="dotsdl@gmail.com", - maintainer="Oliver Beckstein", - maintainer_email="orbeckst@gmail.com", - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Operating System :: POSIX", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows ", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Topic :: Scientific/Engineering :: Chemistry", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - packages=find_packages("src"), - package_dir={"": "src"}, - license="BSD", - # Remove the badges as pypi didn't like them. https://github.com/alchemistry/alchemlyb/pull/390 - long_description=open("README.md") - .read() - .replace( - "[![Zenodo DOI](https://zenodo.org/badge/68669096.svg)](https://zenodo.org/badge/latestdoi/68669096) [![Documentation](https://readthedocs.org/projects/alchemlyb/badge/?version=latest)](http://alchemlyb.readthedocs.io/en/latest/) [![Build Status](https://github.com/alchemistry/alchemlyb/actions/workflows/ci.yaml/badge.svg?branch=master)](https://github.com/alchemistry/alchemlyb/actions/workflows/ci.yaml) [![Code coverage](https://codecov.io/gh/alchemistry/alchemlyb/branch/master/graph/badge.svg)](https://codecov.io/gh/alchemistry/alchemlyb) [![anaconda package](https://anaconda.org/conda-forge/alchemlyb/badges/version.svg)](https://anaconda.org/conda-forge/alchemlyb)", - "\n", - ), - long_description_content_type="text/markdown", - python_requires=">=3.10", - tests_require=["pytest", "alchemtest"], - install_requires=[ - "numpy", - "pandas>=2.1", - "pymbar>=4", - "scipy", - "scikit-learn", - "matplotlib>=3.7", - "loguru", - "pyarrow", - ], -) diff --git a/src/alchemlyb/__init__.py b/src/alchemlyb/__init__.py index 8417bcb2..91ee5884 100644 --- a/src/alchemlyb/__init__.py +++ b/src/alchemlyb/__init__.py @@ -2,10 +2,7 @@ import pandas as pd -from ._version import get_versions - -__version__ = get_versions()["version"] -del get_versions +from ._version import __version__ def pass_attrs(func): diff --git a/src/alchemlyb/_version.py b/src/alchemlyb/_version.py deleted file mode 100644 index 8fbc073f..00000000 --- a/src/alchemlyb/_version.py +++ /dev/null @@ -1,683 +0,0 @@ - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.29 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Any, Callable, Dict, List, Optional, Tuple -import functools - - -def get_keywords() -> Dict[str, str]: - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - parentdir_prefix: str - versionfile_source: str - verbose: bool - - -def get_config() -> VersioneerConfig: - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "alchemlyb-" - cfg.versionfile_source = "src/alchemlyb/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions() -> Dict[str, Any]: - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} diff --git a/src/alchemlyb/tests/test_version.py b/src/alchemlyb/tests/test_version.py index ddab2ab6..381690d4 100644 --- a/src/alchemlyb/tests/test_version.py +++ b/src/alchemlyb/tests/test_version.py @@ -10,9 +10,7 @@ def test_version(): assert len(version) > 0 -def test_version_get_versions(): +def test_version__version__(): import alchemlyb._version - version = alchemlyb._version.get_versions() - - assert alchemlyb.__version__ == version["version"] + assert alchemlyb.__version__ == alchemlyb._version.__version__ diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 1e3753e6..00000000 --- a/versioneer.py +++ /dev/null @@ -1,2277 +0,0 @@ - -# Version: 0.29 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain (Unlicense) -* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -Versioneer provides two installation modes. The "classic" vendored mode installs -a copy of versioneer into your repository. The experimental build-time dependency mode -is intended to allow you to skip this step and simplify the process of upgrading. - -### Vendored mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) - * Note that you will need to add `tomli; python_version < "3.11"` to your - build-time dependencies if you use `pyproject.toml` -* run `versioneer install --vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -### Build-time dependency mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) -* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) - to the `requires` key of the `build-system` table in `pyproject.toml`: - ```toml - [build-system] - requires = ["setuptools", "versioneer[toml]"] - build-backend = "setuptools.build_meta" - ``` -* run `versioneer install --no-vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg` and `pyproject.toml`, if necessary, - to include any new configuration settings indicated by the release notes. - See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install --[no-]vendor` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the "Unlicense", as described in -https://unlicense.org/. - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import json -import os -import re -import subprocess -import sys -from pathlib import Path -from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union -from typing import NoReturn -import functools - -have_tomllib = True -if sys.version_info >= (3, 11): - import tomllib -else: - try: - import tomli as tomllib - except ImportError: - have_tomllib = False - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - versionfile_source: str - versionfile_build: Optional[str] - parentdir_prefix: Optional[str] - verbose: Optional[bool] - - -def get_root() -> str: - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - pyproject_toml = os.path.join(root, "pyproject.toml") - versioneer_py = os.path.join(root, "versioneer.py") - if not ( - os.path.exists(setup_py) - or os.path.exists(pyproject_toml) - or os.path.exists(versioneer_py) - ): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - pyproject_toml = os.path.join(root, "pyproject.toml") - versioneer_py = os.path.join(root, "versioneer.py") - if not ( - os.path.exists(setup_py) - or os.path.exists(pyproject_toml) - or os.path.exists(versioneer_py) - ): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root: str) -> VersioneerConfig: - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - root_pth = Path(root) - pyproject_toml = root_pth / "pyproject.toml" - setup_cfg = root_pth / "setup.cfg" - section: Union[Dict[str, Any], configparser.SectionProxy, None] = None - if pyproject_toml.exists() and have_tomllib: - try: - with open(pyproject_toml, 'rb') as fobj: - pp = tomllib.load(fobj) - section = pp['tool']['versioneer'] - except (tomllib.TOMLDecodeError, KeyError) as e: - print(f"Failed to load config from {pyproject_toml}: {e}") - print("Try to load it from setup.cfg") - if not section: - parser = configparser.ConfigParser() - with open(setup_cfg) as cfg_file: - parser.read_file(cfg_file) - parser.get("versioneer", "VCS") # raise error if missing - - section = parser["versioneer"] - - # `cast`` really shouldn't be used, but its simplest for the - # common VersioneerConfig users at the moment. We verify against - # `None` values elsewhere where it matters - - cfg = VersioneerConfig() - cfg.VCS = section['VCS'] - cfg.style = section.get("style", "") - cfg.versionfile_source = cast(str, section.get("versionfile_source")) - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = cast(str, section.get("tag_prefix")) - if cfg.tag_prefix in ("''", '""', None): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - if isinstance(section, configparser.SectionProxy): - # Make sure configparser translates to bool - cfg.verbose = section.getboolean("verbose") - else: - cfg.verbose = section.get("verbose") - - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.29 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Any, Callable, Dict, List, Optional, Tuple -import functools - - -def get_keywords() -> Dict[str, str]: - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - parentdir_prefix: str - versionfile_source: str - verbose: bool - - -def get_config() -> VersioneerConfig: - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions() -> Dict[str, Any]: - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [versionfile_source] - if ipy: - files.append(ipy) - if "VERSIONEER_PEP518" not in globals(): - try: - my_path = __file__ - if my_path.endswith((".pyc", ".pyo")): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes", "r") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.29) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename: str) -> Dict[str, Any]: - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: - """Write the given version number to the given _version.py file.""" - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose: bool = False) -> Dict[str, Any]: - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version() -> str: - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): - """Get the custom setuptools subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to setuptools - from setuptools import Command - - class cmd_version(Command): - description = "report generated version string" - user_options: List[Tuple[str, str, str]] = [] - boolean_options: List[str] = [] - - def initialize_options(self) -> None: - pass - - def finalize_options(self) -> None: - pass - - def run(self) -> None: - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # pip install -e . and setuptool/editable_wheel will invoke build_py - # but the build_py command is not expected to copy any files. - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py: Any = cmds['build_py'] - else: - from setuptools.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - if getattr(self, "editable_mode", False): - # During editable installs `.py` and data files are - # not copied to build_lib - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if 'build_ext' in cmds: - _build_ext: Any = cmds['build_ext'] - else: - from setuptools.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if not cfg.versionfile_build: - return - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - if not os.path.exists(target_versionfile): - print(f"Warning: {target_versionfile} does not exist, skipping " - "version update. This can happen if you are running build_ext " - "without first running build_py.") - return - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe # type: ignore - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore - except ImportError: - from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore - - class cmd_py2exe(_py2exe): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # sdist farms its file list building out to egg_info - if 'egg_info' in cmds: - _egg_info: Any = cmds['egg_info'] - else: - from setuptools.command.egg_info import egg_info as _egg_info - - class cmd_egg_info(_egg_info): - def find_sources(self) -> None: - # egg_info.find_sources builds the manifest list and writes it - # in one shot - super().find_sources() - - # Modify the filelist and normalize it - root = get_root() - cfg = get_config_from_root(root) - self.filelist.append('versioneer.py') - if cfg.versionfile_source: - # There are rare cases where versionfile_source might not be - # included by default, so we must be explicit - self.filelist.append(cfg.versionfile_source) - self.filelist.sort() - self.filelist.remove_duplicates() - - # The write method is hidden in the manifest_maker instance that - # generated the filelist and was thrown away - # We will instead replicate their final normalization (to unicode, - # and POSIX-style paths) - from setuptools import unicode_utils - normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') - for f in self.filelist.files] - - manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') - with open(manifest_filename, 'w') as fobj: - fobj.write('\n'.join(normalized)) - - cmds['egg_info'] = cmd_egg_info - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist: Any = cmds['sdist'] - else: - from setuptools.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self) -> None: - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir: str, files: List[str]) -> None: - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup() -> int: - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - maybe_ipy: Optional[str] = ipy - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - maybe_ipy = None - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(cfg.versionfile_source, maybe_ipy) - return 0 - - -def scan_setup_py() -> int: - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -def setup_command() -> NoReturn: - """Set up Versioneer and exit with appropriate error code.""" - errors = do_setup() - errors += scan_setup_py() - sys.exit(1 if errors else 0) - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - setup_command() From 4379343b0ffdf2051071d26bfd06fb18dc0c1a96 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Tue, 17 Sep 2024 09:37:14 -0700 Subject: [PATCH 44/59] house-keeping - gitignore _version - ignore commits for blame that contain black-reformatting (in particular we forgot to include #280) --- .git-blame-ignore-revs | 6 ++++++ .gitignore | 4 ++++ 2 files changed, 10 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..a8f4d24a --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,6 @@ +# initial blackify #280 +0095be256b369673526dff7ed754cc874f4128a2 + +# ensure all recent changes were black-formatted (prep for 2.4.0) +d7058a5a8e489a9213ed499e999bb226397bc708 + diff --git a/.gitignore b/.gitignore index 7b0ba78a..bd2d735a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ __pycache__ *~ dist/ +# auto-generated files +_version.py + + From 263a4314eec7b8a6cd3153ed4dbd069a8b720022 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 19 Sep 2024 01:00:10 -0700 Subject: [PATCH 45/59] make repo title identical to JOSS paper title (#404) - close #402 - use same title in CITATION.cff for paper and project (required for the JOSS paper publication #71) - added DOI for preferred citation in note because that is the only hint that shows up on zenodo) --- CITATION.cff | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index f9030c7b..5105ba83 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -3,10 +3,11 @@ # Visit https://bit.ly/cffinit to generate yours today! cff-version: 1.2.0 -title: alchemlyb +title: 'alchemlyb: the simple alchemistry library' message: >- If you use this software, please cite it using the - preferred citation together with any other references. + preferred citation (JOSS DOI 10.21105/joss.06934) together + with any other references. type: software authors: - email: david@datryllic.com From f6ad696686bdb988396de7c61dcbb41fec907100 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 19 Sep 2024 01:05:56 -0700 Subject: [PATCH 46/59] [doc] tutorial: use alchemlyb.concat (#399) replace `pandas.concat()` with `alchemlyb.concat()` in the tutorial (given that we explicitly tell users to use it) --- docs/tutorial.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 1a044935..a60c3ba7 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -76,7 +76,7 @@ method. More estimators are available in the section on >>> import pandas as pd >>> mbar = MBAR() - >>> mbar.fit(pd.concat(decorrelated_u_nk_list)) + >>> mbar.fit(alchemlyb.concat(decorrelated_u_nk_list)) >>> mbar.delta_f_ 0.00 0.25 0.50 0.75 1.00 0.00 0.000000 1.613595 2.553407 2.983336 3.039517 @@ -152,4 +152,4 @@ also perform a set of analysis that allows the user to examine the quality of the estimation. -.. _alchemtest: https://github.com/alchemistry/alchemtest \ No newline at end of file +.. _alchemtest: https://github.com/alchemistry/alchemtest From 752cd640f7bfb6fbc2fedff9655626249ba5aa73 Mon Sep 17 00:00:00 2001 From: jac16 Date: Thu, 19 Sep 2024 11:58:11 -0400 Subject: [PATCH 47/59] Update version --- src/alchemlyb/parsing/lammps.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index cce131be..4af59ee6 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -14,7 +14,7 @@ LAMMPS, `fix adapt/fep `_ changes :math:`\lambda` and `compute fep `_ changes :math:`\lambda'`. -.. versionadded:: 2.4.0 +.. versionadded:: 2.4.1 """ @@ -59,7 +59,8 @@ def beta_from_units(T, units): ValueError If unit string is not recognized. - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ if units == "real": # E in kcal/mol, T in K beta = 1 / (R_kJmol * kJ2kcal * T) @@ -105,7 +106,8 @@ def energy_from_units(units): ValueError If unit string is not recognized. - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm beta = constants.atm * constants.angstrom**3 / 1e+3 * kJ2kcal * constants.N_A @@ -153,7 +155,8 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): tuple[float] Tuple of lambda values - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) @@ -199,7 +202,8 @@ def _lambda_from_filename(filename, separator="_", index=-1, prec=4): float Lambda prime value - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) try: @@ -233,7 +237,8 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): lambda_pairs : list List of tuples containing two floats, lambda and lambda'. - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ lambda_pairs = [ @@ -387,7 +392,8 @@ def extract_u_nk_from_u_n( - temperature in K - energy unit in kT - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ # Collect Files files = glob.glob(fep_files) @@ -572,7 +578,8 @@ def extract_u_nk( - temperature in K - energy unit in kT - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ # Collect Files @@ -832,7 +839,8 @@ def extract_dHdl_from_u_n( - temperature in K or dimensionless - energy unit in kT - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ # Collect Files @@ -941,7 +949,8 @@ def extract_dHdl( - temperature in K or dimensionless - energy unit in kT - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ # Collect Files @@ -1124,7 +1133,8 @@ def extract_H( - temperature in K or dimensionless - energy unit in kT - .. versionadded:: 2.4.0 + .. versionadded:: 2.4.1 + """ # Collect Files From 35c6de5c2ef5ca3005ddca6349c121852eebdd54 Mon Sep 17 00:00:00 2001 From: jac16 Date: Thu, 19 Sep 2024 12:01:54 -0400 Subject: [PATCH 48/59] Formatting --- CHANGES | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGES b/CHANGES index 9649341a..e0712674 100644 --- a/CHANGES +++ b/CHANGES @@ -12,8 +12,6 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) * release numbers follow "Semantic Versioning" https://semver.org --------------------------------------------------------------------------------- - ??/??/2024 jaclark5 * 2.4.1 From b8a8c9edfc8455c0bcae614b83e394e7fdce0280 Mon Sep 17 00:00:00 2001 From: jac16 Date: Thu, 19 Sep 2024 17:33:02 -0400 Subject: [PATCH 49/59] FutureWarning --- src/alchemlyb/parsing/lammps.py | 149 +++++++++++++++++--------------- 1 file changed, 79 insertions(+), 70 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 4af59ee6..3de4ffcb 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -86,6 +86,7 @@ def beta_from_units(T, units): return beta + def energy_from_units(units): """Output conversion factor for pressure * volume to LAMMPS energy units @@ -109,8 +110,8 @@ def energy_from_units(units): .. versionadded:: 2.4.1 """ - if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm - beta = constants.atm * constants.angstrom**3 / 1e+3 * kJ2kcal * constants.N_A + if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm + beta = constants.atm * constants.angstrom**3 / 1e3 * kJ2kcal * constants.N_A elif units == "lj": # Nondimensional E scaled by epsilon beta = 1 elif units == "metal": # E in eV, vol in Å^3, pressure in bar @@ -121,11 +122,15 @@ def energy_from_units(units): beta = 1 elif units == "electron": # E in Hartrees, vol in Bohr^3, pressure in Pa Hartree2J = 4.3597447222060e-8 - Bohr2m = 5.29177210544e+11 + Bohr2m = 5.29177210544e11 beta = 1 / Hartree2J / Bohr2m**3 - elif units == "micro": # E in picogram-micrometer^2/microsecond^2, vol in um^3, pressure in picogram/(micrometer-microsecond^2) + elif ( + units == "micro" + ): # E in picogram-micrometer^2/microsecond^2, vol in um^3, pressure in picogram/(micrometer-microsecond^2) beta = 1 - elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) + elif ( + units == "nano" + ): # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) beta = 1 else: raise ValueError( @@ -374,12 +379,12 @@ def extract_u_nk_from_u_n( Number of decimal places defined used in ``round()`` function. ensemble : str, default="nvt" Ensemble from which the given data was generated. Either "nvt" or "npt" is supported where values from NVT are - unaltered, while those from NPT are corrected + unaltered, while those from NPT are corrected pressure : float, default=None The pressure of the system in the NPT ensemble in units of energy / volume, where the units of energy and volume are as recorded in the LAMMPS dump file. column_volume : int, default=4 - The column for the volume in a LAMMPS dump file. + The column for the volume in a LAMMPS dump file. Returns ------- @@ -399,15 +404,19 @@ def extract_u_nk_from_u_n( files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") - + if ensemble == "npt": if pressure is None or not isinstance(pressure, float) or pressure < 0: - raise ValueError("In the npt ensemble, a pressure must be provided in the form of a positive float") + raise ValueError( + "In the npt ensemble, a pressure must be provided in the form of a positive float" + ) elif ensemble != "nvt": raise ValueError("Only ensembles of nvt or npt are supported.") else: if pressure is not None: - raise ValueError("There is no volume correction in the nvt ensemble, the pressure value will not be used.") + raise ValueError( + "There is no volume correction in the nvt ensemble, the pressure value will not be used." + ) beta = beta_from_units(T, units) @@ -451,11 +460,11 @@ def extract_u_nk_from_u_n( columns.append("volume") data.columns = columns lambda1_col = "fep-lambda" - data.loc[:, [lambda1_col]] = data[[lambda1_col]].apply(lambda x: round(x, prec)) + data.loc[:, [lambda1_col]] = data[[lambda1_col]].apply(lambda x: round(x, prec)) for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] - + lr = tmp_df.shape[0] for lambda12 in lambda_values: if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: @@ -469,10 +478,11 @@ def extract_u_nk_from_u_n( ], axis=1, ) - if u_nk.shape[0] == 0: - u_nk = tmp_u_nk - else: - u_nk = pd.concat( [u_nk, tmp_u_nk], axis=0, sort=False) + u_nk = ( + pd.concat([u_nk, tmp_u_nk], axis=0, sort=False) + if len(u_nk) != 0 + else tmp_u_nk + ) if u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12][0] != 0: raise ValueError( @@ -480,17 +490,15 @@ def extract_u_nk_from_u_n( lambda1, lambda12 ) ) - - u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] = ( - beta * ( - tmp_df["U_cross"]* (dependence(lambda12) / dependence(lambda1) - 1) - + tmp_df["U"] - ) - ) + + u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] = beta * ( + tmp_df["U_cross"] * (dependence(lambda12) / dependence(lambda1) - 1) + + tmp_df["U"] + ) if ensemble == "npt": u_nk.loc[u_nk[lambda1_col] == lambda1, lambda12] += ( beta * pressure * tmp_df["volume"] * energy_from_units(units) - ) + ) u_nk.set_index(["time", "fep-lambda"], inplace=True) @@ -513,7 +521,6 @@ def extract_u_nk( column_volume=6, prec=4, force=False, - ): """Return reduced potentials `u_nk` from LAMMPS dump file(s). @@ -524,7 +531,7 @@ def extract_u_nk( or if columns_lambda2 is not None:: [0, columns_lambda1[0] columns_lambda1[1], column_lambda2, column_U, column_dU] - + If the simulation took place in the NPT ensemble, column_volume is appended to the end of this list. @@ -556,12 +563,12 @@ def extract_u_nk( In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. ensemble : str, default="nvt" Ensemble from which the given data was generated. Either "nvt" or "npt" is supported where values from NVT are - unaltered, while those from NPT are corrected + unaltered, while those from NPT are corrected pressure : float, default=None The pressure of the system in the NPT ensemble in units of energy / volume, where the units of energy and volume are as recorded in the LAMMPS dump file. column_volume : int, default=4 - The column for the volume in a LAMMPS dump file. + The column for the volume in a LAMMPS dump file. prec : int, default=4 Number of decimal places defined used in ``round()`` function. force : bool, default=False @@ -586,15 +593,19 @@ def extract_u_nk( files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") - + if ensemble == "npt": if pressure is None or not isinstance(pressure, float) or pressure < 0: - raise ValueError("In the npt ensemble, a pressure must be provided in the form of a positive float") + raise ValueError( + "In the npt ensemble, a pressure must be provided in the form of a positive float" + ) elif ensemble != "nvt": raise ValueError("Only ensembles of nvt or npt are supported.") else: if pressure is not None: - raise ValueError("There is no volume correction in the nvt ensemble, the pressure value will not be used.") + raise ValueError( + "There is no volume correction in the nvt ensemble, the pressure value will not be used." + ) beta = beta_from_units(T, units) @@ -630,11 +641,13 @@ def extract_u_nk( else: u_nk = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda"]) lc = len(lambda_values) ** 2 - col_indices = [0] + list(columns_lambda1) + [column_lambda2, column_U, column_dU] + col_indices = ( + [0] + list(columns_lambda1) + [column_lambda2, column_U, column_dU] + ) if ensemble == "npt": col_indices.append(column_volume) - + for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) @@ -665,7 +678,7 @@ def extract_u_nk( "vdw-lambda", "vdw-lambda2", "coul-lambda", - "U", + "U", "dU_nk", ] if ensemble == "npt": @@ -679,7 +692,7 @@ def extract_u_nk( "coul-lambda", "coul-lambda2", "vdw-lambda", - "U", + "U", "dU_nk", ] if ensemble == "npt": @@ -694,10 +707,10 @@ def extract_u_nk( data.loc[:, columns_a[1:] + [lambda1_2_col]] = data[ columns_a[1:] + [lambda1_2_col] ].apply(lambda x: round(x, prec)) - + for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] - + for lambda12 in list(tmp_df[lambda1_2_col].unique()): tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12] @@ -713,17 +726,11 @@ def extract_u_nk( ], axis=1, ) - if len(u_nk) != 0: - u_nk = pd.concat( - [ - u_nk, - tmp_df3, - ], - axis=0, - sort=False, - ) - else: - u_nk = tmp_df3 + u_nk = ( + pd.concat([u_nk, tmp_df3], axis=0, sort=False) + if len(u_nk) != 0 + else tmp_df3 + ) column_list = [ ii @@ -766,17 +773,14 @@ def extract_u_nk( ], ) ) - if ( - lambda1 == lambda12 - and not np.all(tmp_df2["dU_nk"][0] == 0) - ): + if lambda1 == lambda12 and not np.all(tmp_df2["dU_nk"][0] == 0): raise ValueError( f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," " Check that 'column_dU' was defined correctly." ) # calculate reduced potential u_k = dH + pV + U - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = ( - beta * (tmp_df2["dU_nk"] + tmp_df2["U"]) + u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = beta * ( + tmp_df2["dU_nk"] + tmp_df2["U"] ) if ensemble == "npt": u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] += ( @@ -880,7 +884,7 @@ def extract_dHdl_from_u_n( data["fep"] = dependence(data.loc[:, "fep-lambda"]) * data.U data.drop(columns=["U"], inplace=True) - dHdl = pd.concat([dHdl, data], axis=0, sort=False) + dHdl = pd.concat([dHdl, data], axis=0, sort=False) if len(dHdl) != 0 else data dHdl.set_index(["time", "fep-lambda"], inplace=True) dHdl = dHdl.mul({"fep": beta}) @@ -1005,10 +1009,13 @@ def extract_dHdl( dHdl = pd.DataFrame( columns=["time", "coul-lambda", "vdw-lambda", "coul", "vdw"] ) - col_indices = ( - [0, column_lambda2, column_lambda1, column_dlambda1, column_dlambda2] - + list(columns_derivative) - ) + col_indices = [ + 0, + column_lambda2, + column_lambda1, + column_dlambda1, + column_dlambda2, + ] + list(columns_derivative) for file in files: if not os.path.isfile(file): @@ -1056,10 +1063,7 @@ def extract_dHdl( ], inplace=True, ) - if len(dHdl) != 0: - dHdl = pd.concat([dHdl, data], axis=0, sort=False) - else: - dHdl = data + dHdl = pd.concat([dHdl, data], axis=0, sort=False) if len(dHdl) != 0 else data if column_lambda2 is None: dHdl.set_index(["time", "fep-lambda"], inplace=True) @@ -1115,12 +1119,12 @@ def extract_H( "real", "si" ensemble : str, default="nvt" Ensemble from which the given data was generated. Either "nvt" or "npt" is supported where values from NVT are - unaltered, while those from NPT are corrected + unaltered, while those from NPT are corrected pressure : float, default=None The pressure of the system in the NPT ensemble in units of energy / volume, where the units of energy and volume are as recorded in the LAMMPS dump file. column_volume : int, default=4 - The column for the volume in a LAMMPS dump file. + The column for the volume in a LAMMPS dump file. Results ------- @@ -1144,13 +1148,17 @@ def extract_H( if ensemble == "npt": if pressure is None or not isinstance(pressure, float) or pressure < 0: - raise ValueError("In the npt ensemble, a pressure must be provided in the form of a positive float") + raise ValueError( + "In the npt ensemble, a pressure must be provided in the form of a positive float" + ) elif ensemble != "nvt": raise ValueError("Only ensembles of nvt or npt are supported.") else: if pressure is not None: - raise ValueError("There is no volume correction in the nvt ensemble, the pressure value will not be used.") - + raise ValueError( + "There is no volume correction in the nvt ensemble, the pressure value will not be used." + ) + beta = beta_from_units(T, units) if not isinstance(column_lambda1, int): @@ -1176,7 +1184,7 @@ def extract_H( else: columns = ["time", "coul-lambda", "vdw-lambda", "u_n"] col_indices = [0, column_lambda2, column_lambda1, column_pe] - + if ensemble == "npt": col_indices.append(column_volume) df_H = pd.DataFrame(columns=columns) @@ -1210,7 +1218,8 @@ def extract_H( if ensemble == "npt": data["u_n"] += beta * pressure * data["volume"] * energy_from_units(units) del data["volume"] - df_H = pd.concat([df_H, data], axis=0, sort=False) + + df_H = pd.concat([df_H, data], axis=0, sort=False) if len(df_H) != 0 else data if column_lambda2 is None: df_H.set_index(["time", "fep-lambda"], inplace=True) From 98fd1971c93174aa58c933693dce33860dca479b Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 24 Sep 2024 10:26:28 -0400 Subject: [PATCH 50/59] Added commenting extract_u_nk --- src/alchemlyb/parsing/lammps.py | 130 +++++++++++++++++--------------- 1 file changed, 68 insertions(+), 62 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 3de4ffcb..be6e94fc 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -112,7 +112,9 @@ def energy_from_units(units): """ if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm beta = constants.atm * constants.angstrom**3 / 1e3 * kJ2kcal * constants.N_A - elif units == "lj": # Nondimensional E scaled by epsilon + elif ( + units == "lj" + ): # Nondimensional E scaled by epsilon, vol in sigma^3, pressure in epsilon / sigma^3 beta = 1 elif units == "metal": # E in eV, vol in Å^3, pressure in bar beta = constants.bar * constants.angstrom**3 / constants.eV @@ -124,13 +126,11 @@ def energy_from_units(units): Hartree2J = 4.3597447222060e-8 Bohr2m = 5.29177210544e11 beta = 1 / Hartree2J / Bohr2m**3 - elif ( - units == "micro" - ): # E in picogram-micrometer^2/microsecond^2, vol in um^3, pressure in picogram/(micrometer-microsecond^2) + elif units == "micro": + # E in picogram-micrometer^2/microsecond^2, vol in um^3, pressure in picogram/(micrometer-microsecond^2) beta = 1 - elif ( - units == "nano" - ): # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) + elif units == "nano": + # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) beta = 1 else: raise ValueError( @@ -441,23 +441,24 @@ def extract_u_nk_from_u_n( u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) col_indices = [0, column_lambda, column_U, column_U_cross] + columns = ["time", "fep-lambda", "U", "U_cross"] if ensemble == "npt": col_indices.append(column_volume) + columns.append("volume") for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) tmp_data = pd.read_csv(file, sep=" ", comment="#", header=None) - lx = len(tmp_data.columns) - if [False for x in col_indices if x >= lx]: + ind = [x for x in col_indices if x > len(tmp_data.columns)] + if len(ind) > 0: raise ValueError( - "Number of columns, {}, is less than index: {}".format(lx, col_indices) + "Number of columns, {}, is less than indices: {}".format( + len(tmp_data.columns), ind + ) ) data = tmp_data.iloc[:, col_indices] - columns = ["time", "fep-lambda", "U", "U_cross"] - if ensemble == "npt": - columns.append("volume") data.columns = columns lambda1_col = "fep-lambda" data.loc[:, [lambda1_col]] = data[[lambda1_col]].apply(lambda x: round(x, prec)) @@ -634,88 +635,93 @@ def extract_u_nk( files, indices=indices, prec=prec, force=force ) - if column_lambda2 is None: + if column_lambda2 is None: # No second lambda state value u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) + # columns to pull from lammps dump file col_indices = [0] + list(columns_lambda1) + [column_U, column_dU] - else: + # column names from lammps dump file + columns = ["time", "fep-lambda", "fep-lambda2", "U", "dU_nk"] + columns_a = ["time", "fep-lambda"] # u_nk cols 0, 1 + lambda1_col, lambda1_2_col = ( + "fep-lambda", + "fep-lambda2", + ) # cols for lambda, lambda' + columns_b = lambda_values # u_nk cols > 1 + else: # There is a frozen, second lambda state u_nk = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda"]) lc = len(lambda_values) ** 2 col_indices = ( [0] + list(columns_lambda1) + [column_lambda2, column_U, column_dU] - ) + ) # columns to pull from lammps dump file + if vdw_lambda == 1: + # column names from lammps dump file + columns = ["time", "vdw-lambda", "vdw-lambda2", "coul-lambda", "U", "dU_nk"] + lambda1_col, lambda1_2_col = ( + "vdw-lambda", + "vdw-lambda2", + ) # cols for lambda, lambda' + columns_b = [(lambda2, x) for x in lambda_values] # u_nk cols > 2 + elif vdw_lambda == 2: + # column names from lammps dump file + columns = [ + "time", + "coul-lambda", + "coul-lambda2", + "vdw-lambda", + "U", + "dU_nk", + ] + lambda1_col, lambda1_2_col = ( + "coul-lambda", + "coul-lambda2", + ) # cols for lambda, lambda' + columns_b = [(x, lambda2) for x in lambda_values] # u_nk cols > 2 + else: + raise ValueError(f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'") + columns_a = ["time", "coul-lambda", "vdw-lambda"] # u_nk cols 0, 1, 2 if ensemble == "npt": col_indices.append(column_volume) + columns.append("volume") for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) tmp_data = pd.read_csv(file, sep=" ", comment="#", header=None) - lx = len(tmp_data.columns) - if [False for x in col_indices if x >= lx]: + ind = [x for x in col_indices if x > len(tmp_data.columns)] + if len(ind) > 0: raise ValueError( - "Number of columns, {}, is less than index: {}".format(lx, col_indices) + "Number of columns, {}, is less than indices: {}".format( + len(tmp_data.columns), ind + ) ) data = tmp_data.iloc[:, col_indices] + data.columns = columns + + # Round values of lambda according to ``prec`` variable if column_lambda2 is None: - columns = ["time", "fep-lambda", "fep-lambda2", "U", "dU_nk"] - if ensemble == "npt": - columns.append("volume") - data.columns = columns - lambda1_col, lambda1_2_col = "fep-lambda", "fep-lambda2" - columns_a = ["time", "fep-lambda"] - columns_b = lambda_values data.loc[:, [lambda1_col, lambda1_2_col]] = data[ [lambda1_col, lambda1_2_col] ].apply(lambda x: round(x, prec)) else: - columns_a = ["time", "coul-lambda", "vdw-lambda"] - if vdw_lambda == 1: - columns = [ - "time", - "vdw-lambda", - "vdw-lambda2", - "coul-lambda", - "U", - "dU_nk", - ] - if ensemble == "npt": - columns.append("volume") - data.columns = columns - lambda1_col, lambda1_2_col = "vdw-lambda", "vdw-lambda2" - columns_b = [(lambda2, x) for x in lambda_values] - elif vdw_lambda == 2: - columns = [ - "time", - "coul-lambda", - "coul-lambda2", - "vdw-lambda", - "U", - "dU_nk", - ] - if ensemble == "npt": - columns.append("volume") - data.columns = columns - lambda1_col, lambda1_2_col = "coul-lambda", "coul-lambda2" - columns_b = [(x, lambda2) for x in lambda_values] - else: - raise ValueError( - f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'" - ) data.loc[:, columns_a[1:] + [lambda1_2_col]] = data[ columns_a[1:] + [lambda1_2_col] ].apply(lambda x: round(x, prec)) + # Iterate over lambda states (configurations equilibrated at certain lambda value) for lambda1 in list(data[lambda1_col].unique()): tmp_df = data.loc[data[lambda1_col] == lambda1] - + # Iterate over evaluated lambda' values at specific lambda state for lambda12 in list(tmp_df[lambda1_2_col].unique()): tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12] lr = tmp_df2.shape[0] if u_nk[u_nk[lambda1_col] == lambda1].shape[0] == 0: + # If u_nk doesn't contain rows for this lambda state, + # Create rows with values of zero to populate energies + # from lambda' values tmp_df3 = pd.concat( [ tmp_df2[columns_a], @@ -726,7 +732,7 @@ def extract_u_nk( ], axis=1, ) - u_nk = ( + u_nk = ( # If u_nk is empty, use this df, else concat pd.concat([u_nk, tmp_df3], axis=0, sort=False) if len(u_nk) != 0 else tmp_df3 @@ -759,7 +765,6 @@ def extract_u_nk( lambda1, lambda12 ) ) - if ( u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] != tmp_df2["dU_nk"].shape[0] @@ -778,6 +783,7 @@ def extract_u_nk( f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," " Check that 'column_dU' was defined correctly." ) + # calculate reduced potential u_k = dH + pV + U u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = beta * ( tmp_df2["dU_nk"] + tmp_df2["U"] From 1dd796b1ef930dcf03d44403cb6a1e33ff1872cf Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 22 Oct 2024 14:34:30 -0400 Subject: [PATCH 51/59] Add names, bug fix dHdl --- src/alchemlyb/parsing/lammps.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index be6e94fc..fbe4fee1 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -502,6 +502,7 @@ def extract_u_nk_from_u_n( ) u_nk.set_index(["time", "fep-lambda"], inplace=True) + u_nk.name = "u_nk" return u_nk @@ -548,7 +549,7 @@ def extract_u_nk( in the computation of the potential energy. column_dU : int, default=4 Index for the column (column number minus one) representing the difference in potential energy between lambda states - column_U : int, default=4 + column_U : int, default=3 Index for the column (column number minus one) representing the potential energy column_lambda2 : int Index for column (column number minus one) for the unchanging value of lambda for another potential. @@ -635,6 +636,7 @@ def extract_u_nk( files, indices=indices, prec=prec, force=force ) + # Set-up u_nk and column names / indices if column_lambda2 is None: # No second lambda state value u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) lc = len(lambda_values) @@ -685,6 +687,7 @@ def extract_u_nk( col_indices.append(column_volume) columns.append("volume") + # Parse Files for file in files: if not os.path.isfile(file): raise ValueError("File not found: {}".format(file)) @@ -797,6 +800,7 @@ def extract_u_nk( u_nk.set_index(["time", "fep-lambda"], inplace=True) else: u_nk.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) + u_nk.name = "u_nk" return u_nk @@ -894,6 +898,7 @@ def extract_dHdl_from_u_n( dHdl.set_index(["time", "fep-lambda"], inplace=True) dHdl = dHdl.mul({"fep": beta}) + dHdl.name = "dH_dl" return dHdl @@ -1037,7 +1042,7 @@ def extract_dHdl( data = data.iloc[:, col_indices] if column_lambda2 is None: # dU_back: U(l-dl) - U(l); dU_forw: U(l+dl) - U(l) - data.columns = ["time", "fep-lambda", "dlambda", "dU_back", "dU_forw"] + data.columns = ["time", "fep-lambda", "dlambda", "dU_forw", "dU_back"] data["fep-lambda"] = data["fep-lambda"].apply(lambda x: round(x, prec)) data["fep"] = (data.dU_forw - data.dU_back) / (2 * data.dlambda) data.drop(columns=["dlambda", "dU_back", "dU_forw"], inplace=True) @@ -1050,8 +1055,8 @@ def extract_dHdl( "dlambda_coul", "dU_back_vdw", "dU_forw_vdw", - "dU_back_coul", "dU_forw_coul", + "dU_back_coul", ] data["vdw-lambda"] = data["vdw-lambda"].apply(lambda x: round(x, prec)) data["coul"] = (data.dU_forw_coul - data.dU_back_coul) / ( @@ -1077,6 +1082,7 @@ def extract_dHdl( else: dHdl.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) dHdl = dHdl.mul({"coul": beta, "vdw": beta}) + dHdl.name = "dH_dl" return dHdl From 0e2a312829c71bf3ea831c21bde6a748191108fd Mon Sep 17 00:00:00 2001 From: jac16 Date: Tue, 12 Nov 2024 14:28:50 -0500 Subject: [PATCH 52/59] Bug fix columns, add name attribute --- src/alchemlyb/parsing/lammps.py | 36 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index fbe4fee1..7cded23b 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -768,24 +768,42 @@ def extract_u_nk( lambda1, lambda12 ) ) + if lambda1 == lambda12 and not np.all(tmp_df2["dU_nk"][0] == 0): + raise ValueError( + f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," + " Check that 'column_dU' was defined correctly." + ) + if ( u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] != tmp_df2["dU_nk"].shape[0] ): - raise ValueError( - "Number of energy values in file, {}, N={}, inconsistent with previous files of length, {}.".format( + old_length = tmp_df2["dU_nk"].shape[0] + start = u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[0] + stop = u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[-1] + stepsize = ( + u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[1] + - u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[0] + ) + # Fill in gaps where 'time' is NaN + nan_index = np.unique(np.where(tmp_df2['time'].isnull())[0]) + for index in nan_index: + tmp_df2.loc[index, "time"] = tmp_df2.loc[index-1, "time"] + stepsize + # Add rows of NaN for timesteps that are missing + new_index = pd.Index(np.arange(start, stop+stepsize, stepsize), name="time") + tmp_df2 = tmp_df2.set_index("time").reindex(new_index).reset_index() + warnings.warn( + "Number of energy values in file, {}, N={}, inconsistent with previous".format( file, - tmp_df2["dU_nk"].shape[0], + old_length, + ) + + " files of length, {}. Adding NaN to row: {}".format( u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[ 0 ], + np.unique(np.where(tmp_df2.isna())[0]), ) ) - if lambda1 == lambda12 and not np.all(tmp_df2["dU_nk"][0] == 0): - raise ValueError( - f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," - " Check that 'column_dU' was defined correctly." - ) # calculate reduced potential u_k = dH + pV + U u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = beta * ( @@ -802,6 +820,8 @@ def extract_u_nk( u_nk.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) u_nk.name = "u_nk" + u_nk = u_nk.dropna() + return u_nk From ffe2d7727a8a4935a4dcd3a442331b5aba44a014 Mon Sep 17 00:00:00 2001 From: jac16 Date: Sun, 17 Nov 2024 09:27:46 -0500 Subject: [PATCH 53/59] Add testing --- .gitignore | 2 +- src/alchemlyb/parsing/lammps.py | 267 ++++--- src/alchemlyb/tests/parsing/test_lammps.py | 860 +++++++++++++++++++++ 3 files changed, 1012 insertions(+), 117 deletions(-) create mode 100644 src/alchemlyb/tests/parsing/test_lammps.py diff --git a/.gitignore b/.gitignore index bd2d735a..a86cc435 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ build docs/_build/ src/alchemlyb.egg-info/ __pycache__ -.coverage.* +.coverage* *~ dist/ diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 7cded23b..6560f945 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -67,17 +67,17 @@ def beta_from_units(T, units): elif units == "lj": # Nondimensional E and T scaled by epsilon beta = 1 / T elif units == "metal": # E in eV, T in K - beta = 1 / (R_kJmol * kJ2kcal * T) + beta = 1 / (constants.R * T / constants.eV / constants.Avogadro) elif units == "si": # E in J, T in K - beta = 1 / (constants.R * T * constants.physical_constants["electron volt"][0]) + beta = 1 / (constants.R * T / constants.Avogadro) elif units == "cgs": # E in ergs, T in K - beta = 1 / (constants.R * T * 1e-7) + beta = 1 / (constants.R * T / constants.Avogadro * 1e+7) elif units == "electron": # E in Hartrees, T in K - beta = 1 / (constants.R * T * constants.physical_constants["Hartree energy"][0]) - elif units == "micro": # E in epicogram-micrometer^2/microsecond^2, T in K - beta = 1 / (constants.R * T * 1e-15) + beta = 1 / (constants.R * T / constants.Avogadro / constants.physical_constants["Hartree energy"][0]) + elif units == "micro": # E in picogram-micrometer^2/microsecond^2, T in K + beta = 1 / (constants.R * T / constants.Avogadro * 1e+15) elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, T in K - beta = 1 / (constants.R * T * 1e-21) + beta = 1 / (constants.R * T / constants.Avogadro * 1e+21) else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," @@ -111,34 +111,34 @@ def energy_from_units(units): """ if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm - beta = constants.atm * constants.angstrom**3 / 1e3 * kJ2kcal * constants.N_A + scaling_factor = constants.atm * constants.angstrom**3 / 1e3 * kJ2kcal * constants.N_A elif ( units == "lj" ): # Nondimensional E scaled by epsilon, vol in sigma^3, pressure in epsilon / sigma^3 - beta = 1 + scaling_factor = 1 elif units == "metal": # E in eV, vol in Å^3, pressure in bar - beta = constants.bar * constants.angstrom**3 / constants.eV + scaling_factor = constants.bar * constants.angstrom**3 / constants.eV elif units == "si": # E in J, vol in m^3, pressure in Pa - beta = 1 + scaling_factor = 1 elif units == "cgs": # E in ergs, vol in cm^3, pressure in dyne/cm^2 - beta = 1 + scaling_factor = 1 elif units == "electron": # E in Hartrees, vol in Bohr^3, pressure in Pa - Hartree2J = 4.3597447222060e-8 - Bohr2m = 5.29177210544e11 - beta = 1 / Hartree2J / Bohr2m**3 + Hartree2J = constants.physical_constants["Hartree energy"][0] + Bohr2m = constants.physical_constants["Bohr radius"][0] + scaling_factor = Bohr2m**3 / Hartree2J elif units == "micro": # E in picogram-micrometer^2/microsecond^2, vol in um^3, pressure in picogram/(micrometer-microsecond^2) - beta = 1 + scaling_factor = 1 elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) - beta = 1 + scaling_factor= 1 else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," " lj. metal, micro, nano, real, si".format(units) ) - return beta + return scaling_factor def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): @@ -229,7 +229,7 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): Path(s) to fepout files to extract data from. indices : list[int], default=[1,2] In provided file names, using underscore as a separator, these indices mark the part of the filename - containing the lambda information. + containing the lambda information. If three values, implies a value of lambda2 is present. prec : int, default=4 Number of decimal places defined used in ``round()`` function. force : bool, default=False @@ -241,8 +241,10 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): List of tuples lambda values contained in the file. lambda_pairs : list List of tuples containing two floats, lambda and lambda'. + lambda2 : float + Value of lambda2 that is held constant. - .. versionadded:: 2.4.1 + .. versionadded:: 2.5.0 """ @@ -263,15 +265,13 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): "More than one value of lambda2 is present in the provided files." f" Restrict filename input to one of: {lambda2}" ) + else: + lambda2 = lambda2[0] else: lambda2 = None lambda_values = sorted(list(set([x for y in lambda_pairs for x in y]))) - check_float = [x for x in lambda_values if not _isfloat(x)] - if check_float: - raise ValueError( - "Lambda values must be convertible to floats: {}".format(check_float) - ) + if [x for x in lambda_values if round(float(x), prec) < 0]: raise ValueError("Lambda values must be positive: {}".format(lambda_values)) @@ -354,8 +354,8 @@ def extract_u_nk_from_u_n( Parameters ---------- - fep_files : str - Path to fepout file(s) to extract data from. Filenames and paths are + fep_files : str or list + If not a list, a str representing the path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. @@ -370,7 +370,7 @@ def extract_u_nk_from_u_n( Dependence of changing variable on the potential energy, which must be separable. index : int, default=-1 In provided file names, using underscore as a separator, these indices mark the part of the filename - containing the lambda information for :func:`alchemlyb.parsing._get_bar_lambdas`. If ``column_lambda2 != None`` + containing the lambda information for :func:`alchemlyb.parsing._lambda_from_filename`. If ``column_lambda2 != None`` this list should be of length three, where the last value represents the invariant lambda. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", @@ -401,7 +401,10 @@ def extract_u_nk_from_u_n( """ # Collect Files - files = glob.glob(fep_files) + if isinstance(fep_files, list): + files = fep_files + else: + files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") @@ -539,8 +542,8 @@ def extract_u_nk( Parameters ---------- - fep_files : str - Path to fepout file(s) to extract data from. Filenames and paths are + fep_files : str or list + If not a list of filenames, represents the path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. @@ -592,7 +595,11 @@ def extract_u_nk( """ # Collect Files - files = glob.glob(fep_files) + if isinstance(fep_files, list): + files = fep_files + else: + files = glob.glob(fep_files) + if not files: raise ValueError(f"No files have been found that match: {fep_files}") @@ -603,11 +610,10 @@ def extract_u_nk( ) elif ensemble != "nvt": raise ValueError("Only ensembles of nvt or npt are supported.") - else: - if pressure is not None: - raise ValueError( - "There is no volume correction in the nvt ensemble, the pressure value will not be used." - ) + elif pressure is not None: + raise ValueError( + "There is no volume correction in the nvt ensemble, the pressure value will not be used." + ) beta = beta_from_units(T, units) @@ -635,7 +641,11 @@ def extract_u_nk( lambda_values, _, lambda2 = _get_bar_lambdas( files, indices=indices, prec=prec, force=force ) - + + if column_lambda2 is not None and lambda2 is None: + raise ValueError("If column_lambda2 is defined, the length of `indices` should be 3 indicating the value of the " + "second value of lambda held constant.") + # Set-up u_nk and column names / indices if column_lambda2 is None: # No second lambda state value u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) @@ -652,7 +662,7 @@ def extract_u_nk( columns_b = lambda_values # u_nk cols > 1 else: # There is a frozen, second lambda state u_nk = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda"]) - lc = len(lambda_values) ** 2 + lc = len(lambda_values) col_indices = ( [0] + list(columns_lambda1) + [column_lambda2, column_U, column_dU] ) # columns to pull from lammps dump file @@ -693,10 +703,10 @@ def extract_u_nk( raise ValueError("File not found: {}".format(file)) tmp_data = pd.read_csv(file, sep=" ", comment="#", header=None) - ind = [x for x in col_indices if x > len(tmp_data.columns)] + ind = [x for x in col_indices if x >= len(tmp_data.columns)] if len(ind) > 0: raise ValueError( - "Number of columns, {}, is less than indices: {}".format( + "Number of columns, {}, is less than necessary for indices: {}".format( len(tmp_data.columns), ind ) ) @@ -715,9 +725,29 @@ def extract_u_nk( # Iterate over lambda states (configurations equilibrated at certain lambda value) for lambda1 in list(data[lambda1_col].unique()): + if not np.isnan(lambda1) and lambda1 not in lambda_values: + raise ValueError( + "Lambda value found in a file does not align with those in the filenames." + " Check that 'columns_lambda1[0]' or 'prec' are defined correctly. lambda" + " file: {}; lambda columns: {}".format(lambda1, lambda_values) + ) tmp_df = data.loc[data[lambda1_col] == lambda1] # Iterate over evaluated lambda' values at specific lambda state for lambda12 in list(tmp_df[lambda1_2_col].unique()): + column_list = [ + ii + for ii, x in enumerate(lambda_values) + if round(float(x), prec) == lambda12 + ] + if not column_list: + raise ValueError( + "Lambda value found in a file does not align with those in the filenames. " + "Check that 'columns_lambda1[1]' or 'prec' are defined correctly. lambda" + " file: {}; lambda columns: {}".format(lambda12, lambda_values) + ) + else: + column_name = lambda_values[column_list[0]] + tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12] lr = tmp_df2.shape[0] @@ -741,20 +771,6 @@ def extract_u_nk( else tmp_df3 ) - column_list = [ - ii - for ii, x in enumerate(lambda_values) - if round(float(x), prec) == lambda12 - ] - if not column_list: - raise ValueError( - "Lambda values found in files do not align with those in the filenames. " - "Check that 'columns_lambda' or 'prec' are defined correctly. lambda" - " file: {}; lambda columns: {}".format(lambda12, lambda_values) - ) - else: - column_name = lambda_values[column_list[0]] - if column_lambda2 is not None: column_name = ( (lambda2, column_name) @@ -762,7 +778,10 @@ def extract_u_nk( else (column_name, lambda2) ) - if u_nk.loc[u_nk[lambda1_col] == lambda1, column_name][0] != abs(0): + column_index = list(u_nk.columns).index(column_name) + row_indices = np.where(u_nk[lambda1_col] == lambda1)[0] + + if u_nk.iloc[row_indices, column_index][0] != 0: raise ValueError( "Energy values already available for lambda, {}, lambda', {}. Check for a duplicate file.".format( lambda1, lambda12 @@ -773,14 +792,12 @@ def extract_u_nk( f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," " Check that 'column_dU' was defined correctly." ) - + if ( - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[0] + u_nk.iloc[row_indices, column_index].shape[0] != tmp_df2["dU_nk"].shape[0] ): old_length = tmp_df2["dU_nk"].shape[0] - start = u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[0] - stop = u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[-1] stepsize = ( u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[1] - u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[0] @@ -789,16 +806,18 @@ def extract_u_nk( nan_index = np.unique(np.where(tmp_df2['time'].isnull())[0]) for index in nan_index: tmp_df2.loc[index, "time"] = tmp_df2.loc[index-1, "time"] + stepsize + # Add rows of NaN for timesteps that are missing - new_index = pd.Index(np.arange(start, stop+stepsize, stepsize), name="time") + new_index = pd.Index(list(u_nk["time"].iloc[row_indices]), name="time") tmp_df2 = tmp_df2.set_index("time").reindex(new_index).reset_index() + warnings.warn( "Number of energy values in file, {}, N={}, inconsistent with previous".format( file, old_length, ) + " files of length, {}. Adding NaN to row: {}".format( - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name].shape[ + u_nk.iloc[row_indices, column_index].shape[ 0 ], np.unique(np.where(tmp_df2.isna())[0]), @@ -806,11 +825,11 @@ def extract_u_nk( ) # calculate reduced potential u_k = dH + pV + U - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] = beta * ( + u_nk.iloc[row_indices, column_index] = beta * ( tmp_df2["dU_nk"] + tmp_df2["U"] ) if ensemble == "npt": - u_nk.loc[u_nk[lambda1_col] == lambda1, column_name] += ( + u_nk.iloc[row_indices, column_index] += ( beta * pressure * tmp_df2["volume"] * energy_from_units(units) ) @@ -843,8 +862,8 @@ def extract_dHdl_from_u_n( Parameters ---------- - fep_files : str - Path to fepout file(s) to extract data from. Filenames and paths are + fep_files : str or list + If not a list, represents a path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. @@ -878,7 +897,10 @@ def extract_dHdl_from_u_n( """ # Collect Files - files = glob.glob(fep_files) + if isinstance(fep_files, list): + files = fep_files + else: + files = glob.glob(fep_files) if not files: raise ValueError(f"No files have been found that match: {fep_files}") @@ -930,8 +952,8 @@ def extract_dHdl( column_lambda1=1, column_dlambda1=2, column_lambda2=None, - column_dlambda2=None, columns_derivative=[8, 7], + vdw_lambda=1, units="real", prec=4, ): @@ -950,8 +972,8 @@ def extract_dHdl( Parameters ---------- - fep_files : str - Path to fepout file(s) to extract data from. Filenames and paths are + fep_files : str or list + If not a list, represents the path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. @@ -962,11 +984,11 @@ def extract_dHdl( column_lambda2 : int, default=None Index for column (column number minus one) for a second value of lambda. If this array is ``None`` then we do not expect two lambda values. - column_dlambda2 : int, default=None - Index for column (column number minus one) for the change in lambda2. columns_derivative : list[int], default=[8, 7] Indices for columns (column number minus one) representing the the forward and backward derivative respectively. + vdw_lambda : int, default=1 + In the case that ``column_lambda2 is not None``, this integer represents which lambda represents vdw interactions. units : str, default="real" Unit system used in LAMMPS calculation. Currently supported: "cgs", "electron", "lj". "metal", "micro", "nano", "real", "si" @@ -989,7 +1011,10 @@ def extract_dHdl( """ # Collect Files - files = glob.glob(fep_files) + if isinstance(fep_files, list): + files = fep_files + else: + files = glob.glob(fep_files) if not files: raise ValueError("No files have been found that match: {}".format(fep_files)) @@ -997,28 +1022,22 @@ def extract_dHdl( if not isinstance(column_lambda1, int): raise ValueError( - "Provided column_lambda1 must be type 'int', instead: {}".format( + "Provided column_lambda1 must be type 'int', instead of {}".format( type(column_lambda1) ) ) if column_lambda2 is not None and not isinstance(column_lambda2, int): raise ValueError( - "Provided column_lambda2 must be type 'int', instead: {}".format( + "Provided column_lambda2 must be type 'int', instead of {}".format( type(column_lambda2) ) ) if not isinstance(column_dlambda1, int): raise ValueError( - "Provided column_dlambda1 must be type 'int', instead: {}".format( + "Provided column_dlambda1 must be type 'int', instead of {}".format( type(column_dlambda1) ) ) - if column_dlambda2 is not None and not isinstance(column_dlambda2, int): - raise ValueError( - "Provided column_dlambda2 must be type 'int', instead: {}".format( - type(column_dlambda2) - ) - ) if len(columns_derivative) != 2: raise ValueError( @@ -1037,15 +1056,19 @@ def extract_dHdl( dHdl = pd.DataFrame(columns=["time", "fep-lambda", "fep"]) col_indices = [0, column_lambda1, column_dlambda1] + list(columns_derivative) else: - dHdl = pd.DataFrame( - columns=["time", "coul-lambda", "vdw-lambda", "coul", "vdw"] - ) + if vdw_lambda == 1: + dHdl = pd.DataFrame( + columns=["time", "vdw-lambda", "coul-lambda", "vdw"] + ) + else: + dHdl = pd.DataFrame( + columns=["time", "coul-lambda", "vdw-lambda", "coul"] + ) col_indices = [ 0, - column_lambda2, column_lambda1, + column_lambda2, column_dlambda1, - column_dlambda2, ] + list(columns_derivative) for file in files: @@ -1067,31 +1090,35 @@ def extract_dHdl( data["fep"] = (data.dU_forw - data.dU_back) / (2 * data.dlambda) data.drop(columns=["dlambda", "dU_back", "dU_forw"], inplace=True) else: - data.columns = [ - "time", - "coul-lambda", - "vdw-lambda", - "dlambda_vdw", - "dlambda_coul", - "dU_back_vdw", - "dU_forw_vdw", - "dU_forw_coul", - "dU_back_coul", - ] - data["vdw-lambda"] = data["vdw-lambda"].apply(lambda x: round(x, prec)) - data["coul"] = (data.dU_forw_coul - data.dU_back_coul) / ( - 2 * data.dlambda_coul - ) - data["vdw"] = (data.dU_forw_vdw - data.dU_back_vdw) / (2 * data.dlambda_vdw) - data.drop( - columns=[ + if vdw_lambda == 1: + columns = [ + "time", + "vdw-lambda", + "coul-lambda", "dlambda_vdw", + "dU_back_vdw", + "dU_forw_vdw", + ] + data.columns = columns + data["vdw"] = (data.dU_forw_vdw - data.dU_back_vdw) / (2 * data.dlambda_vdw) + elif vdw_lambda == 2: + columns = [ + "time", + "coul-lambda", + "vdw-lambda", "dlambda_coul", "dU_back_coul", "dU_forw_coul", - "dU_back_vdw", - "dU_forw_vdw", - ], + ] + data.columns = columns + data["coul"] = (data.dU_forw_coul - data.dU_back_coul) / ( + 2 * data.dlambda_coul + ) + data["vdw-lambda"] = data["vdw-lambda"].apply(lambda x: round(x, prec)) + data["coul-lambda"] = data["coul-lambda"].apply(lambda x: round(x, prec)) + + data.drop( + columns=columns[3:], inplace=True, ) dHdl = pd.concat([dHdl, data], axis=0, sort=False) if len(dHdl) != 0 else data @@ -1101,7 +1128,11 @@ def extract_dHdl( dHdl = dHdl.mul({"fep": beta}) else: dHdl.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) - dHdl = dHdl.mul({"coul": beta, "vdw": beta}) + if vdw_lambda == 1: + dHdl = dHdl.mul({"vdw": beta}) + elif vdw_lambda == 2: + dHdl = dHdl.mul({"coul": beta}) + dHdl.name = "dH_dl" return dHdl @@ -1134,12 +1165,12 @@ def extract_H( Parameters ---------- - fep_files : str - Path to fepout file(s) to extract data from. Filenames and paths are + fep_files : str or list + If not a list, represents the path to fepout file(s) to extract data from. Filenames and paths are aggregated using `glob `_. For example, "/path/to/files/something_*_*.txt". T : float Temperature in Kelvin at which the simulation was sampled. - column_lambda1 : int, default=2 + column_lambda1 : int, default=1 Index for column (column number minus one) representing the lambda at which the system is equilibrated. column_pe : int, default=5 Index for column (column number minus one) representing the potential energy of the system. @@ -1174,7 +1205,10 @@ def extract_H( """ # Collect Files - files = glob.glob(fep_files) + if isinstance(fep_files, list): + files = fep_files + else: + files = glob.glob(fep_files) if not files: raise ValueError("No files have been found that match: {}".format(fep_files)) @@ -1195,17 +1229,17 @@ def extract_H( if not isinstance(column_lambda1, int): raise ValueError( - "Provided column_lambda1 must be type 'int', instead: {}".format( + "Provided column_lambda1 must be type 'int', instead of {}".format( type(column_lambda1) ) ) if not isinstance(column_pe, int): raise ValueError( - "Provided column_pe must be type 'int', instead: {}".format(type(column_pe)) + "Provided column_pe must be type 'int', instead of {}".format(type(column_pe)) ) if column_lambda2 is not None and not isinstance(column_lambda2, int): raise ValueError( - "Provided column_lambda2 must be type 'int', instead: {}".format( + "Provided column_lambda2 must be type 'int', instead of {}".format( type(column_lambda2) ) ) @@ -1219,6 +1253,7 @@ def extract_H( if ensemble == "npt": col_indices.append(column_volume) + df_H = pd.DataFrame(columns=columns) for file in files: diff --git a/src/alchemlyb/tests/parsing/test_lammps.py b/src/alchemlyb/tests/parsing/test_lammps.py new file mode 100644 index 00000000..7244a62b --- /dev/null +++ b/src/alchemlyb/tests/parsing/test_lammps.py @@ -0,0 +1,860 @@ +"""LAMMPS parser tests. + +""" + +import copy +import pytest +from numpy.testing import assert_almost_equal + +from alchemlyb.parsing import lammps as lmp +from alchemtest.lammps import load_benzene, load_lj_dimer + +T_K = 300 +pressure = 1.01325 +kwargs_ti = {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8,7]} +kwargs_mbar = { + "1_coul-off": {"indices": [2,3], "ensemble": "npt", "prec": 3, "pressure": pressure,}, + "2_vdw": {"column_dU": 5, "column_U": 4, "indices": [2,3], "ensemble": "npt", "prec": 3, + "pressure": pressure, "column_volume": 7,}, + "3_coul-on": { "column_dU": 4, "column_U": 3, "indices": [2,3], "prec": 3,}, +} +kwargs_ti = { + "1_coul-off": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8,7]}, + "2_vdw": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [9,8]}, + "3_coul-on": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8,7]}, +} + +T_lj = 0.7 +P_lj = 0.01 + + +def test_beta_from_units(): + """Test value of beta in different units. + """ + + assert_almost_equal(lmp.beta_from_units(T_K, "real"), 1.6774, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_lj, "lj"), 1.4286, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "metal"), 38.6817, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "si"), 2.414323505391137e+20, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "cgs"), 24143235053911.37, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "electron"), 1052.5834, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "micro"), 241432.3505, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "nano"), 0.24143, decimal=4) + +def test_energy_from_units(): + """Test value of beta in different units. + """ + + assert_almost_equal(lmp.energy_from_units("real"), 1.4584e-05, decimal=4) + assert_almost_equal(lmp.energy_from_units("lj"), 1, decimal=4) + assert_almost_equal(lmp.energy_from_units("metal"), 6.2415e-07, decimal=4) + assert_almost_equal(lmp.energy_from_units("si"), 1, decimal=4) + assert_almost_equal(lmp.energy_from_units("cgs"), 1, decimal=4) + assert_almost_equal(lmp.energy_from_units("electron"), 3.3989309217431655e-14, decimal=4) + assert_almost_equal(lmp.energy_from_units("micro"), 1, decimal=4) + assert_almost_equal(lmp.energy_from_units("nano"), 1, decimal=4) + + +def test_u_nk(): + """Test that u_nk has the correct form when extracted from files.""" + dataset = load_benzene() + + for leg, filenames in dataset["data"]["mbar"].items(): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs_mbar[leg]) + + assert u_nk.index.names == ["time", "fep-lambda"] + if leg == "1_coul-off": + assert u_nk.shape == (30006, 6) + elif leg == "2_vdw": + assert u_nk.shape == (78681, 16) + elif leg == "3_coul-on": + assert u_nk.shape == (30006, 6) + assert u_nk.attrs["temperature"] == 300 + assert u_nk.attrs["energy_unit"] == "kT" + + +def test_u_nk_glob_error(): + """Test if files are not found. + """ + with pytest.raises( + ValueError, + match=r"No files have been found that match: test_\*.txt", + ): + u_nk = lmp.extract_u_nk( + "test_*.txt", T=300 + ) + + +def test_dHdl(): + """Test that dHdl has the correct form when extracted from files.""" + dataset = load_benzene() + + leg = "1_coul-off" + dHdl = lmp.extract_dHdl(dataset["data"]["ti"][leg], T=300, **kwargs_ti[leg]) + + assert dHdl.index.names == ["time", "fep-lambda"] + assert dHdl.shape == (30006, 1) + assert dHdl.attrs["temperature"] == 300 + assert dHdl.attrs["energy_unit"] == "kT" + + +def test_dHdl_glob_error(): + """Test if files are not found. + """ + with pytest.raises( + ValueError, + match=r"No files have been found that match: test_\*.txt", + ): + u_nk = lmp.extract_dHdl( + "test_*.txt", T=300 + ) + + +class TestLammpsMbar: + + @staticmethod + @pytest.fixture(scope="class") + def data(): + dataset = load_benzene() + leg = "2_vdw" + filenames = dataset["data"]["mbar"][leg] + kwargs = kwargs_mbar[leg] + filenames2 = load_benzene()["data"]["mbar"]["1_coul-off"] + return filenames, kwargs, filenames2 + + + def test_u_nk_npt_error(self, data,): + """Test that initializing u_nk from NPT fails without pressure + """ + filenames, kwargs, _ = copy.deepcopy(data) + del kwargs["pressure"] + + with pytest.raises( + ValueError, + match=r"In the npt ensemble, a pressure must be provided in the form of a positive float", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + + def test_u_nk_unknown_ensemble(self, data,): + """Test that initializing u_nk that only known ensembles are accepted + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["ensemble"] = "test" + with pytest.raises( + ValueError, + match=r"Only ensembles of nvt or npt are supported.", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + + def test_u_nk_nvt_with_pressure(self, data,): + """Test that initializing u_nk that only known ensembles are accepted + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["ensemble"] = "nvt" + with pytest.raises( + ValueError, + match=r"There is no volume correction in the nvt ensemble, the pressure value will not be used.", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_wrong_cols(self, data,): + """Test length of columns + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["columns_lambda1"] = [1, 2, 2] + with pytest.raises( + ValueError, + match=r"Provided columns for lambda1 must have a length of two, columns_lambda1: \[1, 2, 2\]", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_wrong_col_type(self, data,): + """Test columns_lambda type error + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["columns_lambda1"] = ['test', 2] + with pytest.raises( + ValueError, + match=r"Provided column for columns_lambda1 must be type int. columns_lambda1: \['test', 2\]", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_col2_type_error(self, data,): + """Test column_lambda2 type error + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["column_lambda2"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for lambda must be type int. column_lambda2: test, type: ", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_col_dU_type_error(self, data,): + """Test column_dU type error + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["column_dU"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for dU_nk must be type int. column_dU: test, type: ", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_col_U_type_error(self, data,): + """Test column_U type error + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["column_U"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for U must be type int. column_U: test, type: ", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_col_lambda2_error(self, data,): + """Test that initializing u_nk that only known ensembles are accepted + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["column_lambda2"] = 3 + + with pytest.raises( + ValueError, + match=r"If column_lambda2 is defined, the length of `indices` should be 3 indicating the value of the second value of lambda held constant.", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_col_lambda2(self, data,): + """Test that initializing u_nk that only known ensembles are accepted + """ + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["column_lambda2"] = 3 + kwargs["indices"].append(-1) + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + assert u_nk.index.names == ["time", "coul-lambda", "vdw-lambda"] + + kwargs["vdw_lambda"] = 2 + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + assert u_nk.index.names == ["time", "coul-lambda", "vdw-lambda"] + + with pytest.raises( + ValueError, + match=r"vdw_lambda must be either 1 or 2, not: 3", + ): + kwargs["vdw_lambda"] = 3 + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_error_no_file(self, data,): + """Test error no file + """ + filenames, kwargs, _ = copy.deepcopy(data) + filenames.append("test_test_1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"File not found: test_test_1_1_test_1.txt", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_inconsistent_lambda(self, data): + """Test error no file + """ + + filenames, kwargs, filenames2 = copy.deepcopy(data) + filenames[:4] = filenames2[:4] + + with pytest.raises( + ValueError, + match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(0.6, 0.5\)\]", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_error_nonfloat_lambda(self, data): + """Test nonfloat lambda + """ + + filenames, kwargs, filenames2 = copy.deepcopy(data) + kwargs["indices"] = [1,2] + + with pytest.raises( + ValueError, + match=r"Entry, 1 in filename cannot be converted to float: ", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + kwargs["indices"] = [2,1] + + with pytest.raises( + ValueError, + match=r"Entry, 1 in filename cannot be converted to float: ", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + kwargs["indices"] = [2,3,1] + + with pytest.raises( + ValueError, + match=r"Entry, 1 in filename cannot be converted to float: ", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_error_multiple_values_lambda2(self, data): + """Test multiple values of lambda + """ + + filenames, kwargs, filenames2 = copy.deepcopy(data) + kwargs["indices"] = [2,3,2] + + with pytest.raises( + ValueError, + match=r"More than one value of lambda2 is present in the provided files.", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_error_num_cols(self, data): + """Test error no file + """ + + filenames, kwargs, filenames2 = copy.deepcopy(data) + ind1 = [i for i, x in enumerate(filenames) if "_1_1" in x][0] + ind2 = [i for i, x in enumerate(filenames2) if "_1_1" in x][0] + filenames[ind1] = filenames2[ind2] + with pytest.raises( + ValueError, + match=r"Number of columns, 7, is less than necessary for indices: \[7\]", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + + def test_u_nk_error_prec_1(self, data): + """Test error no file + """ + + filenames, kwargs, filenames2 = copy.deepcopy(data) + kwargs["columns_lambda1"] = [1, 4] + with pytest.raises( + ValueError, + match=r"Lambda value found in a file does not align with those in the filenames. Check that 'columns_lambda1\[1\]'", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + + def test_u_nk_error_prec_2(self, data): + """Test error no file + """ + + filenames, kwargs, filenames2 = copy.deepcopy(data) + kwargs["columns_lambda1"] = [4, 1] + with pytest.raises( + ValueError, + match=r"Lambda value found in a file does not align with those in the filenames. Check that 'columns_lambda1\[0\]'", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + + def test_u_nk_error_duplicate_files(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs, _ = copy.deepcopy(data) + filenames.append(filenames[2]) + with pytest.raises( + ValueError, + match=r"Energy values already available for lambda,", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_error_dU(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs, _ = copy.deepcopy(data) + kwargs["column_dU"] = 1 + with pytest.raises( + ValueError, + match=r"The difference in dU should be zero when lambda = lambda'", + ): + u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + +class TestLammpsTI: + + @staticmethod + @pytest.fixture(scope="class") + def data(): + dataset = load_benzene() + leg = "2_vdw" + filenames = dataset["data"]["ti"][leg] + kwargs = kwargs_ti[leg] + return filenames, kwargs + + def test_dHdl_error_col_lam1(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda1"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column_lambda1 must be type 'int', instead of ", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + + def test_dHdl_error_col_lam2(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda2"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column_lambda2 must be type 'int', instead of ", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + + + def test_dHdl_error_col_dlam1(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_dlambda1"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column_dlambda1 must be type 'int', instead of ", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + + + def test_dHdl_error_col_dU(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["columns_derivative"] = [1.1] + with pytest.raises( + ValueError, + match=r"Provided columns for derivative values must have a length of two,", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + + kwargs["columns_derivative"] = [1.1, 1] + with pytest.raises( + ValueError, + match=r"Provided column for columns_derivative must be type int. columns_derivative:", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + + def test_lam2(self, data): + """Test two lambda values + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda2"] = 3 + + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + assert dHdl.index.names == ["time", "coul-lambda", "vdw-lambda"] + + kwargs["vdw_lambda"] = 2 + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + assert dHdl.index.names == ["time", "coul-lambda", "vdw-lambda"] + + + def test_dHdl_error_no_file(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames.append("test_test_1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"File not found: test_test_1_1_test_1.txt", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + + def test_dHdl_error_num_cols(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames2 = load_benzene()["data"]["ti"]["1_coul-off"] + ind1 = [i for i, x in enumerate(filenames) if "_1" in x][0] + ind2 = [i for i, x in enumerate(filenames2) if "_1" in x][0] + filenames[ind1] = filenames2[ind2] + filenames.append("test_test_1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"Number of columns, 9, is less than index: \[0, 1, 2, 9, 8\]", + ): + dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + +class TestLammpsH: + + @staticmethod + @pytest.fixture(scope="class") + def data(): + dataset = load_benzene() + leg = "2_vdw" + filenames = dataset["data"]["ti"][leg] + kwargs = {"column_lambda1": 1, "column_pe": 5, "ensemble": "npt", "pressure": pressure} + return filenames, kwargs + + + def test_H_error_no_glob(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames = "test_test_1_1_test_1.txt" + + with pytest.raises( + ValueError, + match=r"No files have been found that match: test_test_1_1_test_1.txt", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_H_error_no_file(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames.append("test_test_1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"File not found: test_test_1_1_test_1.txt", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_H_npt_nvt(self, data): + """Test ensembles + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + + H = lmp.extract_H(filenames, 300, **kwargs) + assert H.index.names == ["time", "fep-lambda"] + + kwargs["ensemble"] = "nvt" + del kwargs["pressure"] + H = lmp.extract_H(filenames, 300, **kwargs) + assert H.index.names == ["time", "fep-lambda"] + + kwargs["ensemble"] = "test" + with pytest.raises( + ValueError, + match=r"Only ensembles of nvt or npt are supported.", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_H_error_npt_nvt_pressure(self, data,): + """Test ensembles with incorrect pressure + """ + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + + kwargs["ensemble"] = "nvt" + with pytest.raises( + ValueError, + match=r"There is no volume correction in the nvt ensemble, the pressure value will not be used.", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + kwargs["ensemble"] = "npt" + del kwargs["pressure"] + with pytest.raises( + ValueError, + match=r"In the npt ensemble, a pressure must be provided in the form of a positive float", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_H_error_col_lam1(self, data): + """Test type col lambda 1 + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda1"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column_lambda1 must be type 'int', instead of", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_u_nk_error_col_lam2(self, data): + """Test error type col lambda 2 + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda2"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column_lambda2 must be type 'int', instead of ", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_H_error_col_pe(self, data): + """Test error col pe + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_pe"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column_pe must be type 'int', instead of ", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + + def test_H_error_num_cols(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_volume"] = 10 + + with pytest.raises( + ValueError, + match=r"Number of columns, 10, is less than index: \[0, 1, 5, 10\]", + ): + H = lmp.extract_H(filenames, 300, **kwargs) + + +class TestLammpsLJDimer_TI: + + @staticmethod + @pytest.fixture(scope="class") + def data(): + dataset = load_lj_dimer() + filenames = dataset["data"] + kwargs = {"column_lambda": 1, "column_u_cross": 10, "units": "lj", "prec": 1} + return filenames, kwargs + + + def test_H_error_no_glob(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames = "test_test_1_1_test_1.txt" + + with pytest.raises( + ValueError, + match=r"No files have been found that match: test_test_1_1_test_1.txt", + ): + H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + + + def test_H_error_no_file(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames = copy.deepcopy(filenames) + filenames.append("test_test_1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"File not found: test_test_1_1_test_1.txt", + ): + H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + + + def test_H_error_col_lam1(self, data): + """Test type col lambda 1 + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column for lambda must be type int. column_lambda:", + ): + H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + + + def test_H_error_col_u_cross(self, data): + """Test error col u_cross + """ + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_u_cross"] = "test" + with pytest.raises( + ValueError, + match=r"Provided column for u_cross must be type int. column_u_cross:", + ): + H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + + + def test_H_error_num_cols(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_u_cross"] = 12 + + with pytest.raises( + ValueError, + match=r"Number of columns, 11, is less than index: \[0, 1, 12\]", + ): + H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + +class TestLammpsLJDimer_MBAR: + + @staticmethod + @pytest.fixture(scope="class") + def data(): + dataset = load_lj_dimer() + filenames = dataset["data"] + kwargs = {"column_lambda": 1, "column_U_cross": 10, "units": "lj", "prec": 1, + "pressure": P_lj, "ensemble": "npt", "column_U": 5} + return filenames, kwargs + + + def test_u_nk_npt_error(self, data,): + """Test that initializing u_nk from NPT fails without pressure + """ + filenames, kwargs = copy.deepcopy(data) + del kwargs["pressure"] + + with pytest.raises( + ValueError, + match=r"In the npt ensemble, a pressure must be provided in the form of a positive float", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_unknown_ensemble(self, data,): + """Test that initializing u_nk that only known ensembles are accepted + """ + filenames, kwargs = copy.deepcopy(data) + kwargs["ensemble"] = "test" + with pytest.raises( + ValueError, + match=r"Only ensembles of nvt or npt are supported.", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_nvt_with_pressure(self, data,): + """Test that initializing u_nk that only known ensembles are accepted + """ + filenames, kwargs = copy.deepcopy(data) + kwargs["ensemble"] = "nvt" + with pytest.raises( + ValueError, + match=r"There is no volume correction in the nvt ensemble, the pressure value will not be used.", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_error_no_file(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames = copy.deepcopy(filenames) + filenames.append("test_test_1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"File not found: test_test_1_1_test_1.txt", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_error_no_path(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + filenames = "test_test_1_1_test_1.txt" + + with pytest.raises( + ValueError, + match=r"No files have been found that match: test_test_1_1_test_1.txt", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_col_type_error(self, data,): + """Test columns_lambda type error + """ + filenames, kwargs = copy.deepcopy(data) + kwargs["column_lambda"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for lambda must be type int. column_u_lambda:", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + def test_u_nk_col_Ucross_type_error(self, data,): + """Test column_U_cross type error + """ + filenames, kwargs = copy.deepcopy(data) + kwargs["column_U_cross"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for `U_cross` must be type int. column_U_cross:", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + def test_u_nk_col_U_type_error(self, data,): + """Test column_dU type error + """ + filenames, kwargs = copy.deepcopy(data) + kwargs["column_dU"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for `U` must be type int. column_U:", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + def test_u_nk_col_U_type_error(self, data,): + """Test column_U type error + """ + filenames, kwargs = copy.deepcopy(data) + kwargs["column_U"] = 'test' + with pytest.raises( + ValueError, + match=r"Provided column for `U` must be type int. column_U: test, type: ", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_error_duplicate_files(self, data): + """Test error when two files for the same data is present. + """ + + filenames, kwargs = copy.deepcopy(data) + filenames.append(filenames[2]) + with pytest.raises( + ValueError, + match=r"Energy values already available for lambda,", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + + def test_u_nk_error_num_cols(self, data,): + """Test error no file + """ + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_U_cross"] = 12 + + with pytest.raises( + ValueError, + match=r"Number of columns, 11, is less than indices: \[12\]", + ): + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) From a5db56199733eaa36033f0bd3e3d41dbf7307ae1 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 2 Jan 2025 08:58:07 -0500 Subject: [PATCH 54/59] Black, add handling for compressed files --- src/alchemlyb/tests/parsing/test_lammps.py | 505 +++++++++++---------- 1 file changed, 275 insertions(+), 230 deletions(-) diff --git a/src/alchemlyb/tests/parsing/test_lammps.py b/src/alchemlyb/tests/parsing/test_lammps.py index 7244a62b..425ae73b 100644 --- a/src/alchemlyb/tests/parsing/test_lammps.py +++ b/src/alchemlyb/tests/parsing/test_lammps.py @@ -11,17 +11,42 @@ T_K = 300 pressure = 1.01325 -kwargs_ti = {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8,7]} +kwargs_ti = {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8, 7]} kwargs_mbar = { - "1_coul-off": {"indices": [2,3], "ensemble": "npt", "prec": 3, "pressure": pressure,}, - "2_vdw": {"column_dU": 5, "column_U": 4, "indices": [2,3], "ensemble": "npt", "prec": 3, - "pressure": pressure, "column_volume": 7,}, - "3_coul-on": { "column_dU": 4, "column_U": 3, "indices": [2,3], "prec": 3,}, + "1_coul-off": { + "indices": [2, 3], + "ensemble": "npt", + "prec": 3, + "pressure": pressure, + }, + "2_vdw": { + "column_dU": 5, + "column_U": 4, + "indices": [2, 3], + "ensemble": "npt", + "prec": 3, + "pressure": pressure, + "column_volume": 7, + }, + "3_coul-on": { + "column_dU": 4, + "column_U": 3, + "indices": [2, 3], + "prec": 3, + }, } kwargs_ti = { - "1_coul-off": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8,7]}, - "2_vdw": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [9,8]}, - "3_coul-on": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [8,7]}, + "1_coul-off": { + "column_lambda1": 1, + "column_dlambda1": 2, + "columns_derivative": [8, 7], + }, + "2_vdw": {"column_lambda1": 1, "column_dlambda1": 2, "columns_derivative": [9, 8]}, + "3_coul-on": { + "column_lambda1": 1, + "column_dlambda1": 2, + "columns_derivative": [8, 7], + }, } T_lj = 0.7 @@ -29,28 +54,29 @@ def test_beta_from_units(): - """Test value of beta in different units. - """ + """Test value of beta in different units.""" assert_almost_equal(lmp.beta_from_units(T_K, "real"), 1.6774, decimal=4) assert_almost_equal(lmp.beta_from_units(T_lj, "lj"), 1.4286, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "metal"), 38.6817, decimal=4) - assert_almost_equal(lmp.beta_from_units(T_K, "si"), 2.414323505391137e+20, decimal=4) + assert_almost_equal(lmp.beta_from_units(T_K, "si"), 2.414323505391137e20, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "cgs"), 24143235053911.37, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "electron"), 1052.5834, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "micro"), 241432.3505, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "nano"), 0.24143, decimal=4) + def test_energy_from_units(): - """Test value of beta in different units. - """ + """Test value of beta in different units.""" assert_almost_equal(lmp.energy_from_units("real"), 1.4584e-05, decimal=4) assert_almost_equal(lmp.energy_from_units("lj"), 1, decimal=4) assert_almost_equal(lmp.energy_from_units("metal"), 6.2415e-07, decimal=4) assert_almost_equal(lmp.energy_from_units("si"), 1, decimal=4) assert_almost_equal(lmp.energy_from_units("cgs"), 1, decimal=4) - assert_almost_equal(lmp.energy_from_units("electron"), 3.3989309217431655e-14, decimal=4) + assert_almost_equal( + lmp.energy_from_units("electron"), 3.3989309217431655e-14, decimal=4 + ) assert_almost_equal(lmp.energy_from_units("micro"), 1, decimal=4) assert_almost_equal(lmp.energy_from_units("nano"), 1, decimal=4) @@ -74,15 +100,12 @@ def test_u_nk(): def test_u_nk_glob_error(): - """Test if files are not found. - """ + """Test if files are not found.""" with pytest.raises( ValueError, match=r"No files have been found that match: test_\*.txt", ): - u_nk = lmp.extract_u_nk( - "test_*.txt", T=300 - ) + u_nk = lmp.extract_u_nk("test_*.txt", T=300) def test_dHdl(): @@ -99,15 +122,12 @@ def test_dHdl(): def test_dHdl_glob_error(): - """Test if files are not found. - """ + """Test if files are not found.""" with pytest.raises( ValueError, match=r"No files have been found that match: test_\*.txt", ): - u_nk = lmp.extract_dHdl( - "test_*.txt", T=300 - ) + u_nk = lmp.extract_dHdl("test_*.txt", T=300) class TestLammpsMbar: @@ -122,23 +142,25 @@ def data(): filenames2 = load_benzene()["data"]["mbar"]["1_coul-off"] return filenames, kwargs, filenames2 - - def test_u_nk_npt_error(self, data,): - """Test that initializing u_nk from NPT fails without pressure - """ + def test_u_nk_npt_error( + self, + data, + ): + """Test that initializing u_nk from NPT fails without pressure""" filenames, kwargs, _ = copy.deepcopy(data) del kwargs["pressure"] - + with pytest.raises( ValueError, match=r"In the npt ensemble, a pressure must be provided in the form of a positive float", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - - def test_u_nk_unknown_ensemble(self, data,): - """Test that initializing u_nk that only known ensembles are accepted - """ + + def test_u_nk_unknown_ensemble( + self, + data, + ): + """Test that initializing u_nk that only known ensembles are accepted""" filenames, kwargs, _ = copy.deepcopy(data) kwargs["ensemble"] = "test" with pytest.raises( @@ -146,11 +168,12 @@ def test_u_nk_unknown_ensemble(self, data,): match=r"Only ensembles of nvt or npt are supported.", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - - def test_u_nk_nvt_with_pressure(self, data,): - """Test that initializing u_nk that only known ensembles are accepted - """ + + def test_u_nk_nvt_with_pressure( + self, + data, + ): + """Test that initializing u_nk that only known ensembles are accepted""" filenames, kwargs, _ = copy.deepcopy(data) kwargs["ensemble"] = "nvt" with pytest.raises( @@ -158,10 +181,12 @@ def test_u_nk_nvt_with_pressure(self, data,): match=r"There is no volume correction in the nvt ensemble, the pressure value will not be used.", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - def test_u_nk_wrong_cols(self, data,): - """Test length of columns - """ + + def test_u_nk_wrong_cols( + self, + data, + ): + """Test length of columns""" filenames, kwargs, _ = copy.deepcopy(data) kwargs["columns_lambda1"] = [1, 2, 2] with pytest.raises( @@ -169,54 +194,64 @@ def test_u_nk_wrong_cols(self, data,): match=r"Provided columns for lambda1 must have a length of two, columns_lambda1: \[1, 2, 2\]", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - def test_u_nk_wrong_col_type(self, data,): - """Test columns_lambda type error - """ + + def test_u_nk_wrong_col_type( + self, + data, + ): + """Test columns_lambda type error""" filenames, kwargs, _ = copy.deepcopy(data) - kwargs["columns_lambda1"] = ['test', 2] + kwargs["columns_lambda1"] = ["test", 2] with pytest.raises( ValueError, match=r"Provided column for columns_lambda1 must be type int. columns_lambda1: \['test', 2\]", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - def test_u_nk_col2_type_error(self, data,): - """Test column_lambda2 type error - """ + + def test_u_nk_col2_type_error( + self, + data, + ): + """Test column_lambda2 type error""" filenames, kwargs, _ = copy.deepcopy(data) - kwargs["column_lambda2"] = 'test' + kwargs["column_lambda2"] = "test" with pytest.raises( ValueError, match=r"Provided column for lambda must be type int. column_lambda2: test, type: ", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_col_dU_type_error(self, data,): - """Test column_dU type error - """ + def test_u_nk_col_dU_type_error( + self, + data, + ): + """Test column_dU type error""" filenames, kwargs, _ = copy.deepcopy(data) - kwargs["column_dU"] = 'test' + kwargs["column_dU"] = "test" with pytest.raises( ValueError, match=r"Provided column for dU_nk must be type int. column_dU: test, type: ", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_col_U_type_error(self, data,): - """Test column_U type error - """ + def test_u_nk_col_U_type_error( + self, + data, + ): + """Test column_U type error""" filenames, kwargs, _ = copy.deepcopy(data) - kwargs["column_U"] = 'test' + kwargs["column_U"] = "test" with pytest.raises( ValueError, match=r"Provided column for U must be type int. column_U: test, type: ", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - def test_u_nk_col_lambda2_error(self, data,): - """Test that initializing u_nk that only known ensembles are accepted - """ + + def test_u_nk_col_lambda2_error( + self, + data, + ): + """Test that initializing u_nk that only known ensembles are accepted""" filenames, kwargs, _ = copy.deepcopy(data) kwargs["column_lambda2"] = 3 @@ -225,20 +260,22 @@ def test_u_nk_col_lambda2_error(self, data,): match=r"If column_lambda2 is defined, the length of `indices` should be 3 indicating the value of the second value of lambda held constant.", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - def test_u_nk_col_lambda2(self, data,): - """Test that initializing u_nk that only known ensembles are accepted - """ + + def test_u_nk_col_lambda2( + self, + data, + ): + """Test that initializing u_nk that only known ensembles are accepted""" filenames, kwargs, _ = copy.deepcopy(data) kwargs["column_lambda2"] = 3 kwargs["indices"].append(-1) u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) assert u_nk.index.names == ["time", "coul-lambda", "vdw-lambda"] - + kwargs["vdw_lambda"] = 2 u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) assert u_nk.index.names == ["time", "coul-lambda", "vdw-lambda"] - + with pytest.raises( ValueError, match=r"vdw_lambda must be either 1 or 2, not: 3", @@ -246,9 +283,11 @@ def test_u_nk_col_lambda2(self, data,): kwargs["vdw_lambda"] = 3 u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_error_no_file(self, data,): - """Test error no file - """ + def test_u_nk_error_no_file( + self, + data, + ): + """Test error no file""" filenames, kwargs, _ = copy.deepcopy(data) filenames.append("test_test_1_1_test_1.txt") @@ -257,55 +296,52 @@ def test_u_nk_error_no_file(self, data,): match=r"File not found: test_test_1_1_test_1.txt", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - + def test_u_nk_inconsistent_lambda(self, data): - """Test error no file - """ + """Test error no file""" filenames, kwargs, filenames2 = copy.deepcopy(data) filenames[:4] = filenames2[:4] with pytest.raises( ValueError, - match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(0.6, 0.5\)\]", + match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(0.95, 1.0\)\]", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - + def test_u_nk_error_nonfloat_lambda(self, data): - """Test nonfloat lambda - """ + """Test nonfloat lambda""" filenames, kwargs, filenames2 = copy.deepcopy(data) - kwargs["indices"] = [1,2] + kwargs["indices"] = [1, 2] with pytest.raises( ValueError, match=r"Entry, 1 in filename cannot be converted to float: ", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - kwargs["indices"] = [2,1] + + kwargs["indices"] = [2, 1] with pytest.raises( ValueError, match=r"Entry, 1 in filename cannot be converted to float: ", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - - kwargs["indices"] = [2,3,1] + + kwargs["indices"] = [2, 3, 1] with pytest.raises( ValueError, match=r"Entry, 1 in filename cannot be converted to float: ", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - + def test_u_nk_error_multiple_values_lambda2(self, data): - """Test multiple values of lambda - """ + """Test multiple values of lambda""" filenames, kwargs, filenames2 = copy.deepcopy(data) - kwargs["indices"] = [2,3,2] + kwargs["indices"] = [2, 3, 2] with pytest.raises( ValueError, @@ -314,8 +350,7 @@ def test_u_nk_error_multiple_values_lambda2(self, data): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) def test_u_nk_error_num_cols(self, data): - """Test error no file - """ + """Test error no file""" filenames, kwargs, filenames2 = copy.deepcopy(data) ind1 = [i for i, x in enumerate(filenames) if "_1_1" in x][0] @@ -327,10 +362,8 @@ def test_u_nk_error_num_cols(self, data): ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_error_prec_1(self, data): - """Test error no file - """ + """Test error no file""" filenames, kwargs, filenames2 = copy.deepcopy(data) kwargs["columns_lambda1"] = [1, 4] @@ -340,10 +373,8 @@ def test_u_nk_error_prec_1(self, data): ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_error_prec_2(self, data): - """Test error no file - """ + """Test error no file""" filenames, kwargs, filenames2 = copy.deepcopy(data) kwargs["columns_lambda1"] = [4, 1] @@ -353,10 +384,8 @@ def test_u_nk_error_prec_2(self, data): ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_error_duplicate_files(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs, _ = copy.deepcopy(data) filenames.append(filenames[2]) @@ -365,10 +394,9 @@ def test_u_nk_error_duplicate_files(self, data): match=r"Energy values already available for lambda,", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - + def test_u_nk_error_dU(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs, _ = copy.deepcopy(data) kwargs["column_dU"] = 1 @@ -377,7 +405,8 @@ def test_u_nk_error_dU(self, data): match=r"The difference in dU should be zero when lambda = lambda'", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - + + class TestLammpsTI: @staticmethod @@ -390,8 +419,7 @@ def data(): return filenames, kwargs def test_dHdl_error_col_lam1(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -401,10 +429,9 @@ def test_dHdl_error_col_lam1(self, data): match=r"Provided column_lambda1 must be type 'int', instead of ", ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) - + def test_dHdl_error_col_lam2(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -415,10 +442,8 @@ def test_dHdl_error_col_lam2(self, data): ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) - def test_dHdl_error_col_dlam1(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -429,10 +454,8 @@ def test_dHdl_error_col_dlam1(self, data): ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) - def test_dHdl_error_col_dU(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -442,17 +465,16 @@ def test_dHdl_error_col_dU(self, data): match=r"Provided columns for derivative values must have a length of two,", ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) - - kwargs["columns_derivative"] = [1.1, 1] + + kwargs["columns_derivative"] = [1.1, 1] with pytest.raises( ValueError, match=r"Provided column for columns_derivative must be type int. columns_derivative:", ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) - + def test_lam2(self, data): - """Test two lambda values - """ + """Test two lambda values""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -460,15 +482,16 @@ def test_lam2(self, data): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) assert dHdl.index.names == ["time", "coul-lambda", "vdw-lambda"] - + kwargs["vdw_lambda"] = 2 dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) assert dHdl.index.names == ["time", "coul-lambda", "vdw-lambda"] - - def test_dHdl_error_no_file(self, data,): - """Test error no file - """ + def test_dHdl_error_no_file( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames.append("test_test_1_1_test_1.txt") @@ -478,9 +501,11 @@ def test_dHdl_error_no_file(self, data,): ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) - def test_dHdl_error_num_cols(self, data,): - """Test error no file - """ + def test_dHdl_error_num_cols( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames2 = load_benzene()["data"]["ti"]["1_coul-off"] ind1 = [i for i, x in enumerate(filenames) if "_1" in x][0] @@ -494,6 +519,7 @@ def test_dHdl_error_num_cols(self, data,): ): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) + class TestLammpsH: @staticmethod @@ -502,13 +528,19 @@ def data(): dataset = load_benzene() leg = "2_vdw" filenames = dataset["data"]["ti"][leg] - kwargs = {"column_lambda1": 1, "column_pe": 5, "ensemble": "npt", "pressure": pressure} + kwargs = { + "column_lambda1": 1, + "column_pe": 5, + "ensemble": "npt", + "pressure": pressure, + } return filenames, kwargs - - def test_H_error_no_glob(self, data,): - """Test error no file - """ + def test_H_error_no_glob( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames = "test_test_1_1_test_1.txt" @@ -518,10 +550,11 @@ def test_H_error_no_glob(self, data,): ): H = lmp.extract_H(filenames, 300, **kwargs) - - def test_H_error_no_file(self, data,): - """Test error no file - """ + def test_H_error_no_file( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames.append("test_test_1_1_test_1.txt") @@ -531,22 +564,20 @@ def test_H_error_no_file(self, data,): ): H = lmp.extract_H(filenames, 300, **kwargs) - def test_H_npt_nvt(self, data): - """Test ensembles - """ + """Test ensembles""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) - + H = lmp.extract_H(filenames, 300, **kwargs) assert H.index.names == ["time", "fep-lambda"] - + kwargs["ensemble"] = "nvt" del kwargs["pressure"] H = lmp.extract_H(filenames, 300, **kwargs) assert H.index.names == ["time", "fep-lambda"] - + kwargs["ensemble"] = "test" with pytest.raises( ValueError, @@ -554,10 +585,11 @@ def test_H_npt_nvt(self, data): ): H = lmp.extract_H(filenames, 300, **kwargs) - - def test_H_error_npt_nvt_pressure(self, data,): - """Test ensembles with incorrect pressure - """ + def test_H_error_npt_nvt_pressure( + self, + data, + ): + """Test ensembles with incorrect pressure""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -576,10 +608,8 @@ def test_H_error_npt_nvt_pressure(self, data,): ): H = lmp.extract_H(filenames, 300, **kwargs) - def test_H_error_col_lam1(self, data): - """Test type col lambda 1 - """ + """Test type col lambda 1""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -590,10 +620,8 @@ def test_H_error_col_lam1(self, data): ): H = lmp.extract_H(filenames, 300, **kwargs) - def test_u_nk_error_col_lam2(self, data): - """Test error type col lambda 2 - """ + """Test error type col lambda 2""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -604,10 +632,8 @@ def test_u_nk_error_col_lam2(self, data): ): H = lmp.extract_H(filenames, 300, **kwargs) - def test_H_error_col_pe(self, data): - """Test error col pe - """ + """Test error col pe""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -617,11 +643,12 @@ def test_H_error_col_pe(self, data): match=r"Provided column_pe must be type 'int', instead of ", ): H = lmp.extract_H(filenames, 300, **kwargs) - - def test_H_error_num_cols(self, data,): - """Test error no file - """ + def test_H_error_num_cols( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) kwargs["column_volume"] = 10 @@ -643,10 +670,11 @@ def data(): kwargs = {"column_lambda": 1, "column_u_cross": 10, "units": "lj", "prec": 1} return filenames, kwargs - - def test_H_error_no_glob(self, data,): - """Test error no file - """ + def test_H_error_no_glob( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames = "test_test_1_1_test_1.txt" @@ -656,10 +684,11 @@ def test_H_error_no_glob(self, data,): ): H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) - - def test_H_error_no_file(self, data,): - """Test error no file - """ + def test_H_error_no_file( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames = copy.deepcopy(filenames) filenames.append("test_test_1_1_test_1.txt") @@ -670,10 +699,8 @@ def test_H_error_no_file(self, data,): ): H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) - def test_H_error_col_lam1(self, data): - """Test type col lambda 1 - """ + """Test type col lambda 1""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -684,10 +711,8 @@ def test_H_error_col_lam1(self, data): ): H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) - def test_H_error_col_u_cross(self, data): - """Test error col u_cross - """ + """Test error col u_cross""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) @@ -697,11 +722,12 @@ def test_H_error_col_u_cross(self, data): match=r"Provided column for u_cross must be type int. column_u_cross:", ): H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) - - def test_H_error_num_cols(self, data,): - """Test error no file - """ + def test_H_error_num_cols( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) kwargs["column_u_cross"] = 12 @@ -711,7 +737,8 @@ def test_H_error_num_cols(self, data,): match=r"Number of columns, 11, is less than index: \[0, 1, 12\]", ): H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) - + + class TestLammpsLJDimer_MBAR: @staticmethod @@ -719,27 +746,36 @@ class TestLammpsLJDimer_MBAR: def data(): dataset = load_lj_dimer() filenames = dataset["data"] - kwargs = {"column_lambda": 1, "column_U_cross": 10, "units": "lj", "prec": 1, - "pressure": P_lj, "ensemble": "npt", "column_U": 5} + kwargs = { + "column_lambda": 1, + "column_U_cross": 10, + "units": "lj", + "prec": 1, + "pressure": P_lj, + "ensemble": "npt", + "column_U": 5, + } return filenames, kwargs - - def test_u_nk_npt_error(self, data,): - """Test that initializing u_nk from NPT fails without pressure - """ + def test_u_nk_npt_error( + self, + data, + ): + """Test that initializing u_nk from NPT fails without pressure""" filenames, kwargs = copy.deepcopy(data) del kwargs["pressure"] - + with pytest.raises( ValueError, match=r"In the npt ensemble, a pressure must be provided in the form of a positive float", ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - - def test_u_nk_unknown_ensemble(self, data,): - """Test that initializing u_nk that only known ensembles are accepted - """ + + def test_u_nk_unknown_ensemble( + self, + data, + ): + """Test that initializing u_nk that only known ensembles are accepted""" filenames, kwargs = copy.deepcopy(data) kwargs["ensemble"] = "test" with pytest.raises( @@ -747,11 +783,12 @@ def test_u_nk_unknown_ensemble(self, data,): match=r"Only ensembles of nvt or npt are supported.", ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - - def test_u_nk_nvt_with_pressure(self, data,): - """Test that initializing u_nk that only known ensembles are accepted - """ + + def test_u_nk_nvt_with_pressure( + self, + data, + ): + """Test that initializing u_nk that only known ensembles are accepted""" filenames, kwargs = copy.deepcopy(data) kwargs["ensemble"] = "nvt" with pytest.raises( @@ -760,10 +797,11 @@ def test_u_nk_nvt_with_pressure(self, data,): ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - def test_u_nk_error_no_file(self, data,): - """Test error no file - """ + def test_u_nk_error_no_file( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames = copy.deepcopy(filenames) filenames.append("test_test_1_1_test_1.txt") @@ -774,10 +812,11 @@ def test_u_nk_error_no_file(self, data,): ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - def test_u_nk_error_no_path(self, data,): - """Test error no file - """ + def test_u_nk_error_no_path( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) filenames = "test_test_1_1_test_1.txt" @@ -787,55 +826,60 @@ def test_u_nk_error_no_path(self, data,): ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - def test_u_nk_col_type_error(self, data,): - """Test columns_lambda type error - """ + def test_u_nk_col_type_error( + self, + data, + ): + """Test columns_lambda type error""" filenames, kwargs = copy.deepcopy(data) - kwargs["column_lambda"] = 'test' + kwargs["column_lambda"] = "test" with pytest.raises( ValueError, match=r"Provided column for lambda must be type int. column_u_lambda:", ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - def test_u_nk_col_Ucross_type_error(self, data,): - """Test column_U_cross type error - """ + + def test_u_nk_col_Ucross_type_error( + self, + data, + ): + """Test column_U_cross type error""" filenames, kwargs = copy.deepcopy(data) - kwargs["column_U_cross"] = 'test' + kwargs["column_U_cross"] = "test" with pytest.raises( ValueError, match=r"Provided column for `U_cross` must be type int. column_U_cross:", ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - def test_u_nk_col_U_type_error(self, data,): - """Test column_dU type error - """ + def test_u_nk_col_U_type_error( + self, + data, + ): + """Test column_dU type error""" filenames, kwargs = copy.deepcopy(data) - kwargs["column_dU"] = 'test' + kwargs["column_dU"] = "test" with pytest.raises( ValueError, match=r"Provided column for `U` must be type int. column_U:", ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - def test_u_nk_col_U_type_error(self, data,): - """Test column_U type error - """ + def test_u_nk_col_U_type_error( + self, + data, + ): + """Test column_U type error""" filenames, kwargs = copy.deepcopy(data) - kwargs["column_U"] = 'test' + kwargs["column_U"] = "test" with pytest.raises( ValueError, match=r"Provided column for `U` must be type int. column_U: test, type: ", ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - def test_u_nk_error_duplicate_files(self, data): - """Test error when two files for the same data is present. - """ + """Test error when two files for the same data is present.""" filenames, kwargs = copy.deepcopy(data) filenames.append(filenames[2]) @@ -845,10 +889,11 @@ def test_u_nk_error_duplicate_files(self, data): ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) - - def test_u_nk_error_num_cols(self, data,): - """Test error no file - """ + def test_u_nk_error_num_cols( + self, + data, + ): + """Test error no file""" filenames, kwargs = copy.deepcopy(data) kwargs = copy.deepcopy(kwargs) kwargs["column_U_cross"] = 12 From 925f6b24c6f02ba723e1f0d0a93ecce67dceeb0c Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 2 Jan 2025 08:58:56 -0500 Subject: [PATCH 55/59] Black for lammps parsing and handling compressed files --- src/alchemlyb/parsing/lammps.py | 98 ++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 43 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index 6560f945..f3b8b63d 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -1,4 +1,4 @@ -""" Parsers for extracting alchemical data from `LAMMPS `_ output files. +r""" Parsers for extracting alchemical data from `LAMMPS `_ output files. For clarity, we would like to distinguish the difference between :math:`\lambda` and :math:`\lambda'`. We refer to :math:`\lambda` as the potential scaling of the equilibrated system, so that when this value is changed, the system undergoes another equilibration @@ -71,13 +71,18 @@ def beta_from_units(T, units): elif units == "si": # E in J, T in K beta = 1 / (constants.R * T / constants.Avogadro) elif units == "cgs": # E in ergs, T in K - beta = 1 / (constants.R * T / constants.Avogadro * 1e+7) + beta = 1 / (constants.R * T / constants.Avogadro * 1e7) elif units == "electron": # E in Hartrees, T in K - beta = 1 / (constants.R * T / constants.Avogadro / constants.physical_constants["Hartree energy"][0]) + beta = 1 / ( + constants.R + * T + / constants.Avogadro + / constants.physical_constants["Hartree energy"][0] + ) elif units == "micro": # E in picogram-micrometer^2/microsecond^2, T in K - beta = 1 / (constants.R * T / constants.Avogadro * 1e+15) + beta = 1 / (constants.R * T / constants.Avogadro * 1e15) elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, T in K - beta = 1 / (constants.R * T / constants.Avogadro * 1e+21) + beta = 1 / (constants.R * T / constants.Avogadro * 1e21) else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," @@ -111,7 +116,9 @@ def energy_from_units(units): """ if units == "real": # E in kcal/mol, Vol in Å^3, pressure in atm - scaling_factor = constants.atm * constants.angstrom**3 / 1e3 * kJ2kcal * constants.N_A + scaling_factor = ( + constants.atm * constants.angstrom**3 / 1e3 * kJ2kcal * constants.N_A + ) elif ( units == "lj" ): # Nondimensional E scaled by epsilon, vol in sigma^3, pressure in epsilon / sigma^3 @@ -131,7 +138,7 @@ def energy_from_units(units): scaling_factor = 1 elif units == "nano": # E in attogram-nanometer^2/nanosecond^2, vol in nm^3, pressure in attogram/(nanometer-nanosecond^2) - scaling_factor= 1 + scaling_factor = 1 else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," @@ -142,7 +149,7 @@ def energy_from_units(units): def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): - """Pull a tuple representing the lambda values used, as defined by the filenames. + r"""Pull a tuple representing the lambda values used, as defined by the filenames. Parameters ---------- @@ -163,30 +170,30 @@ def _tuple_from_filename(filename, separator="_", indices=[2, 3], prec=4): .. versionadded:: 2.4.1 """ - + filename = filename.replace(".bz2", "").replace(".gz", "") name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) try: - float(name_array[indices[0]]) + value1 = float(name_array[indices[0]]) except ValueError: raise ValueError( f"Entry, {indices[0]} in filename cannot be converted to float: {name_array[indices[0]]}" ) try: - float(name_array[indices[1]]) + value2 = float(name_array[indices[1]]) except ValueError: raise ValueError( f"Entry, {indices[1]} in filename cannot be converted to float: {name_array[indices[1]]}" ) return ( - round(float(name_array[indices[0]]), prec), - round(float(name_array[indices[1]]), prec), + round(value1, prec), + round(value2, prec), ) def _lambda_from_filename(filename, separator="_", index=-1, prec=4): - """Pull the :math:`\lambda'` value, as defined by the filenames. + r"""Pull the :math:`\lambda'` value, as defined by the filenames. Here :math:`\lambda'` is the scaling value applied to a configuration that is equilibrated to a different value of :math:`\lambda`. @@ -210,14 +217,15 @@ def _lambda_from_filename(filename, separator="_", index=-1, prec=4): .. versionadded:: 2.4.1 """ + filename = filename.replace(".bz2", "").replace(".gz", "") name_array = ".".join(os.path.split(filename)[-1].split(".")[:-1]).split(separator) try: - float(name_array[index]) + value = float(name_array[index]) except: raise ValueError( f"Entry, {index} in filename cannot be converted to float: {name_array[index]}" ) - return round(float(name_array[index]), prec) + return round(value, prec) def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): @@ -599,7 +607,7 @@ def extract_u_nk( files = fep_files else: files = glob.glob(fep_files) - + if not files: raise ValueError(f"No files have been found that match: {fep_files}") @@ -641,11 +649,13 @@ def extract_u_nk( lambda_values, _, lambda2 = _get_bar_lambdas( files, indices=indices, prec=prec, force=force ) - + if column_lambda2 is not None and lambda2 is None: - raise ValueError("If column_lambda2 is defined, the length of `indices` should be 3 indicating the value of the " - "second value of lambda held constant.") - + raise ValueError( + "If column_lambda2 is defined, the length of `indices` should be 3 indicating the value of the " + "second value of lambda held constant." + ) + # Set-up u_nk and column names / indices if column_lambda2 is None: # No second lambda state value u_nk = pd.DataFrame(columns=["time", "fep-lambda"] + lambda_values) @@ -730,7 +740,7 @@ def extract_u_nk( "Lambda value found in a file does not align with those in the filenames." " Check that 'columns_lambda1[0]' or 'prec' are defined correctly. lambda" " file: {}; lambda columns: {}".format(lambda1, lambda_values) - ) + ) tmp_df = data.loc[data[lambda1_col] == lambda1] # Iterate over evaluated lambda' values at specific lambda state for lambda12 in list(tmp_df[lambda1_2_col].unique()): @@ -747,7 +757,7 @@ def extract_u_nk( ) else: column_name = lambda_values[column_list[0]] - + tmp_df2 = tmp_df.loc[tmp_df[lambda1_2_col] == lambda12] lr = tmp_df2.shape[0] @@ -780,7 +790,7 @@ def extract_u_nk( column_index = list(u_nk.columns).index(column_name) row_indices = np.where(u_nk[lambda1_col] == lambda1)[0] - + if u_nk.iloc[row_indices, column_index][0] != 0: raise ValueError( "Energy values already available for lambda, {}, lambda', {}. Check for a duplicate file.".format( @@ -792,34 +802,36 @@ def extract_u_nk( f"The difference in dU should be zero when lambda = lambda', {lambda1} = {lambda12}," " Check that 'column_dU' was defined correctly." ) - + if ( u_nk.iloc[row_indices, column_index].shape[0] != tmp_df2["dU_nk"].shape[0] ): old_length = tmp_df2["dU_nk"].shape[0] stepsize = ( - u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[1] + u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[1] - u_nk.loc[u_nk[lambda1_col] == lambda1, "time"].iloc[0] ) # Fill in gaps where 'time' is NaN - nan_index = np.unique(np.where(tmp_df2['time'].isnull())[0]) + nan_index = np.unique(np.where(tmp_df2["time"].isnull())[0]) for index in nan_index: - tmp_df2.loc[index, "time"] = tmp_df2.loc[index-1, "time"] + stepsize - + tmp_df2.loc[index, "time"] = ( + tmp_df2.loc[index - 1, "time"] + stepsize + ) + # Add rows of NaN for timesteps that are missing - new_index = pd.Index(list(u_nk["time"].iloc[row_indices]), name="time") + new_index = pd.Index( + list(u_nk["time"].iloc[row_indices]), name="time" + ) tmp_df2 = tmp_df2.set_index("time").reindex(new_index).reset_index() - + warnings.warn( "Number of energy values in file, {}, N={}, inconsistent with previous".format( file, old_length, ) + " files of length, {}. Adding NaN to row: {}".format( - u_nk.iloc[row_indices, column_index].shape[ - 0 - ], + u_nk.iloc[row_indices, column_index].shape[0], np.unique(np.where(tmp_df2.isna())[0]), ) ) @@ -840,7 +852,7 @@ def extract_u_nk( u_nk.name = "u_nk" u_nk = u_nk.dropna() - + return u_nk @@ -1057,13 +1069,9 @@ def extract_dHdl( col_indices = [0, column_lambda1, column_dlambda1] + list(columns_derivative) else: if vdw_lambda == 1: - dHdl = pd.DataFrame( - columns=["time", "vdw-lambda", "coul-lambda", "vdw"] - ) + dHdl = pd.DataFrame(columns=["time", "vdw-lambda", "coul-lambda", "vdw"]) else: - dHdl = pd.DataFrame( - columns=["time", "coul-lambda", "vdw-lambda", "coul"] - ) + dHdl = pd.DataFrame(columns=["time", "coul-lambda", "vdw-lambda", "coul"]) col_indices = [ 0, column_lambda1, @@ -1100,7 +1108,9 @@ def extract_dHdl( "dU_forw_vdw", ] data.columns = columns - data["vdw"] = (data.dU_forw_vdw - data.dU_back_vdw) / (2 * data.dlambda_vdw) + data["vdw"] = (data.dU_forw_vdw - data.dU_back_vdw) / ( + 2 * data.dlambda_vdw + ) elif vdw_lambda == 2: columns = [ "time", @@ -1235,7 +1245,9 @@ def extract_H( ) if not isinstance(column_pe, int): raise ValueError( - "Provided column_pe must be type 'int', instead of {}".format(type(column_pe)) + "Provided column_pe must be type 'int', instead of {}".format( + type(column_pe) + ) ) if column_lambda2 is not None and not isinstance(column_lambda2, int): raise ValueError( From 15316f2f6fc726ea7560dbf302b99e6c90d91f8d Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Sat, 4 Jan 2025 22:51:16 -0500 Subject: [PATCH 56/59] Address code coverage --- src/alchemlyb/parsing/lammps.py | 40 +++------ src/alchemlyb/tests/parsing/test_lammps.py | 99 +++++++++++++++++++++- 2 files changed, 108 insertions(+), 31 deletions(-) diff --git a/src/alchemlyb/parsing/lammps.py b/src/alchemlyb/parsing/lammps.py index f3b8b63d..b9201282 100644 --- a/src/alchemlyb/parsing/lammps.py +++ b/src/alchemlyb/parsing/lammps.py @@ -29,14 +29,6 @@ from ..postprocessors.units import R_kJmol, kJ2kcal -def _isfloat(x): - try: - float(x) - return True - except ValueError: - return False - - def beta_from_units(T, units): """Output value of beta from temperature and units. @@ -86,7 +78,7 @@ def beta_from_units(T, units): else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," - " lj. metal, micro, nano, real, si".format(units) + " lj, metal, micro, nano, real, si".format(units) ) return beta @@ -142,7 +134,7 @@ def energy_from_units(units): else: raise ValueError( "LAMMPS unit type, {}, is not supported. Supported types are: cgs, electron," - " lj. metal, micro, nano, real, si".format(units) + " lj, metal, micro, nano, real, si".format(units) ) return scaling_factor @@ -288,7 +280,6 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): # Check for MBAR content missing_combinations_mbar = [] - missing_combinations_bar = [] for lambda_value, lambda_array in lambda_dict.items(): missing_combinations_mbar.extend( [(lambda_value, x) for x in lambda_values if x not in lambda_array] @@ -305,7 +296,6 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): # Check for BAR content missing_combinations_bar = [] - extra_combinations_bar = [] lambda_values.sort() for ind, (lambda_value, lambda_array) in enumerate(lambda_dict.items()): if ind == 0: @@ -322,9 +312,6 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): missing_combinations_bar.extend( [(lambda_value, x) for x in tmp_array if x not in lambda_array] ) - extra_combinations_bar.extend( - [(lambda_value, x) for x in lambda_array if x not in tmp_array] - ) if missing_combinations_bar and not force: raise ValueError( @@ -332,13 +319,6 @@ def _get_bar_lambdas(fep_files, indices=[2, 3], prec=4, force=False): missing_combinations_bar ) ) - if extra_combinations_bar and not force: - warnings.warn( - "The following combinations of lambda and lambda prime are extra and being discarded for BAR analysis: {}".format( - extra_combinations_bar - ) - ) - lambda_pairs = [x for x in lambda_pairs if x not in extra_combinations_bar] return lambda_values, lambda_pairs, lambda2 @@ -618,10 +598,11 @@ def extract_u_nk( ) elif ensemble != "nvt": raise ValueError("Only ensembles of nvt or npt are supported.") - elif pressure is not None: - raise ValueError( - "There is no volume correction in the nvt ensemble, the pressure value will not be used." - ) + else: + if pressure is not None: + raise ValueError( + "There is no volume correction in the nvt ensemble, the pressure value will not be used." + ) beta = beta_from_units(T, units) @@ -1124,6 +1105,11 @@ def extract_dHdl( data["coul"] = (data.dU_forw_coul - data.dU_back_coul) / ( 2 * data.dlambda_coul ) + else: + raise ValueError( + f"'vdw_lambda must be either 1 or 2, not: {vdw_lambda}'" + ) + data["vdw-lambda"] = data["vdw-lambda"].apply(lambda x: round(x, prec)) data["coul-lambda"] = data["coul-lambda"].apply(lambda x: round(x, prec)) @@ -1140,7 +1126,7 @@ def extract_dHdl( dHdl.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True) if vdw_lambda == 1: dHdl = dHdl.mul({"vdw": beta}) - elif vdw_lambda == 2: + else: dHdl = dHdl.mul({"coul": beta}) dHdl.name = "dH_dl" diff --git a/src/alchemlyb/tests/parsing/test_lammps.py b/src/alchemlyb/tests/parsing/test_lammps.py index 425ae73b..37c65520 100644 --- a/src/alchemlyb/tests/parsing/test_lammps.py +++ b/src/alchemlyb/tests/parsing/test_lammps.py @@ -64,6 +64,11 @@ def test_beta_from_units(): assert_almost_equal(lmp.beta_from_units(T_K, "electron"), 1052.5834, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "micro"), 241432.3505, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "nano"), 0.24143, decimal=4) + with pytest.raises( + ValueError, + match=r"Supported types are: cgs, electron,", + ): + _ = lmp.beta_from_units(T_K, "not a unit") def test_energy_from_units(): @@ -79,6 +84,11 @@ def test_energy_from_units(): ) assert_almost_equal(lmp.energy_from_units("micro"), 1, decimal=4) assert_almost_equal(lmp.energy_from_units("nano"), 1, decimal=4) + with pytest.raises( + ValueError, + match=r"Supported types are: cgs, electron,", + ): + _ = lmp.energy_from_units("not a unit") def test_u_nk(): @@ -297,8 +307,22 @@ def test_u_nk_error_no_file( ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) - def test_u_nk_inconsistent_lambda(self, data): - """Test error no file""" + def test_u_nk_error_neg_lambda( + self, + data, + ): + """Test error lambda is negative""" + filenames, kwargs, _ = copy.deepcopy(data) + filenames.append("test_test_-1_1_test_1.txt") + + with pytest.raises( + ValueError, + match=r"Lambda values must be positive:", + ): + _ = lmp.extract_u_nk(filenames, 300, **kwargs) + + def test_u_nk_error_inconsistent_lambda(self, data): + """Test error inconsistent lambda values in filenames""" filenames, kwargs, filenames2 = copy.deepcopy(data) filenames[:4] = filenames2[:4] @@ -309,6 +333,18 @@ def test_u_nk_inconsistent_lambda(self, data): ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) + def test_u_nk_force_inconsistent_lambda_missing(self, data): + """Test warning force inconsistent lambda values in filenames""" + + filenames, kwargs, _ = copy.deepcopy(data) + filenames = filenames[:4] + + with pytest.warns( + UserWarning, + match="The following combinations of lambda and lambda prime are missing", + ): + _ = lmp.extract_u_nk(filenames, 300, force=True, **kwargs) + def test_u_nk_error_nonfloat_lambda(self, data): """Test nonfloat lambda""" @@ -487,6 +523,19 @@ def test_lam2(self, data): dHdl = lmp.extract_dHdl(filenames, 300, **kwargs) assert dHdl.index.names == ["time", "coul-lambda", "vdw-lambda"] + def test_vdw_lambda_over_2(self, data): + """Test when vdw_lambda is not a valid input""" + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda2"] = 3 + kwargs["vdw_lambda"] = 3 + with pytest.raises( + ValueError, + match=r"vdw_lambda must be either 1 or 2, not: 3", + ): + _ = lmp.extract_dHdl(filenames, 300, **kwargs) + def test_dHdl_error_no_file( self, data, @@ -620,7 +669,7 @@ def test_H_error_col_lam1(self, data): ): H = lmp.extract_H(filenames, 300, **kwargs) - def test_u_nk_error_col_lam2(self, data): + def test_H_error_col_lam2(self, data): """Test error type col lambda 2""" filenames, kwargs = copy.deepcopy(data) @@ -632,6 +681,24 @@ def test_u_nk_error_col_lam2(self, data): ): H = lmp.extract_H(filenames, 300, **kwargs) + def test_H(self, data): + """Test error type col lambda 2""" + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + + H = lmp.extract_H(filenames, 300, **kwargs) + + def test_H_lam2(self, data): + """Test two lambda values""" + + filenames, kwargs = copy.deepcopy(data) + kwargs = copy.deepcopy(kwargs) + kwargs["column_lambda2"] = 3 + + dHdl = lmp.extract_H(filenames, 300, **kwargs) + assert dHdl.index.names == ["time", "coul-lambda", "vdw-lambda"] + def test_H_error_col_pe(self, data): """Test error col pe""" @@ -738,6 +805,17 @@ def test_H_error_num_cols( ): H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + def test_H( + self, + data, + ): + """Test full run through""" + filenames, kwargs = copy.deepcopy(data) + + H = lmp.extract_dHdl_from_u_n(filenames, T_lj, **kwargs) + + assert H.shape == (11011, 1) + class TestLammpsLJDimer_MBAR: @@ -788,7 +866,7 @@ def test_u_nk_nvt_with_pressure( self, data, ): - """Test that initializing u_nk that only known ensembles are accepted""" + """Test that initializing u_nk with nvt and pressure""" filenames, kwargs = copy.deepcopy(data) kwargs["ensemble"] = "nvt" with pytest.raises( @@ -797,6 +875,19 @@ def test_u_nk_nvt_with_pressure( ): u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + def test_u_nk_nvt( + self, + data, + ): + """Test that initializing u_nk""" + filenames, kwargs = copy.deepcopy(data) + kwargs["ensemble"] = "nvt" + del kwargs["pressure"] + + u_nk = lmp.extract_u_nk_from_u_n(filenames, T_lj, **kwargs) + + assert u_nk.shape == (11011, 11) + def test_u_nk_error_no_file( self, data, From 13f9215ae24b8b21f8e9841156d12fe89f6d51ca Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Sat, 4 Jan 2025 23:30:22 -0500 Subject: [PATCH 57/59] Address test issues --- src/alchemlyb/tests/parsing/test_lammps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/tests/parsing/test_lammps.py b/src/alchemlyb/tests/parsing/test_lammps.py index 37c65520..a768e54f 100644 --- a/src/alchemlyb/tests/parsing/test_lammps.py +++ b/src/alchemlyb/tests/parsing/test_lammps.py @@ -4,7 +4,7 @@ import copy import pytest -from numpy.testing import assert_almost_equal +from numpy.testing import assert_almost_equal, assert_approx_equal from alchemlyb.parsing import lammps as lmp from alchemtest.lammps import load_benzene, load_lj_dimer @@ -60,7 +60,7 @@ def test_beta_from_units(): assert_almost_equal(lmp.beta_from_units(T_lj, "lj"), 1.4286, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "metal"), 38.6817, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "si"), 2.414323505391137e20, decimal=4) - assert_almost_equal(lmp.beta_from_units(T_K, "cgs"), 24143235053911.37, decimal=4) + assert_approx_equal(lmp.beta_from_units(T_K, "cgs"), 24143235053911.37, significant=7) assert_almost_equal(lmp.beta_from_units(T_K, "electron"), 1052.5834, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "micro"), 241432.3505, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "nano"), 0.24143, decimal=4) @@ -325,11 +325,11 @@ def test_u_nk_error_inconsistent_lambda(self, data): """Test error inconsistent lambda values in filenames""" filenames, kwargs, filenames2 = copy.deepcopy(data) - filenames[:4] = filenames2[:4] + filenames = filenames[:244] with pytest.raises( ValueError, - match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(0.95, 1.0\)\]", + match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(0.75, 0.7\)\]", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs) From 52c047a5232757f932fd8cef1b7483c5db00e290 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Sat, 4 Jan 2025 23:39:43 -0500 Subject: [PATCH 58/59] Address test issues --- src/alchemlyb/tests/parsing/test_lammps.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/tests/parsing/test_lammps.py b/src/alchemlyb/tests/parsing/test_lammps.py index a768e54f..3f696fec 100644 --- a/src/alchemlyb/tests/parsing/test_lammps.py +++ b/src/alchemlyb/tests/parsing/test_lammps.py @@ -59,8 +59,12 @@ def test_beta_from_units(): assert_almost_equal(lmp.beta_from_units(T_K, "real"), 1.6774, decimal=4) assert_almost_equal(lmp.beta_from_units(T_lj, "lj"), 1.4286, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "metal"), 38.6817, decimal=4) - assert_almost_equal(lmp.beta_from_units(T_K, "si"), 2.414323505391137e20, decimal=4) - assert_approx_equal(lmp.beta_from_units(T_K, "cgs"), 24143235053911.37, significant=7) + assert_approx_equal( + lmp.beta_from_units(T_K, "si"), 2.414323505391137e20, significant=7 + ) + assert_approx_equal( + lmp.beta_from_units(T_K, "cgs"), 24143235053911.37, significant=7 + ) assert_almost_equal(lmp.beta_from_units(T_K, "electron"), 1052.5834, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "micro"), 241432.3505, decimal=4) assert_almost_equal(lmp.beta_from_units(T_K, "nano"), 0.24143, decimal=4) From 243b5c80d3ca08460c7feac30e1e48decbd54ab5 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Sat, 4 Jan 2025 23:49:04 -0500 Subject: [PATCH 59/59] Fix test issue --- src/alchemlyb/tests/parsing/test_lammps.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/tests/parsing/test_lammps.py b/src/alchemlyb/tests/parsing/test_lammps.py index 3f696fec..c94bdeb0 100644 --- a/src/alchemlyb/tests/parsing/test_lammps.py +++ b/src/alchemlyb/tests/parsing/test_lammps.py @@ -329,11 +329,12 @@ def test_u_nk_error_inconsistent_lambda(self, data): """Test error inconsistent lambda values in filenames""" filenames, kwargs, filenames2 = copy.deepcopy(data) - filenames = filenames[:244] + filenames.sort() + filenames = filenames[:-1] with pytest.raises( ValueError, - match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(0.75, 0.7\)\]", + match=r"BAR calculation cannot be performed without the following lambda-lambda prime combinations: \[\(1.0, 1.0\)\]", ): u_nk = lmp.extract_u_nk(filenames, 300, **kwargs)