Skip to content

Commit

Permalink
Pass rCutSq and rCutCoulombSq instead of recomputing on the GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
LSchwiebert committed Dec 30, 2024
1 parent 9fb3701 commit 4dd07b9
Show file tree
Hide file tree
Showing 13 changed files with 421 additions and 372 deletions.
5 changes: 2 additions & 3 deletions src/Ewald.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1610,9 +1610,8 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords,

CallBoxForceReciprocalGPU(ff.particles->getCUDAVars(), atomForceRec,
molForceRec, particleCharge, particleMol,
particleUsed, startMol, lengthMol, ff.alpha[box],
ff.alphaSq[box], constValue, imageSizeRef[box],
molCoords, currentAxes, box);
particleUsed, startMol, lengthMol, constValue,
imageSizeRef[box], molCoords, currentAxes, box);
#else
// molecule iterator
MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(box);
Expand Down
7 changes: 4 additions & 3 deletions src/FFParticle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ void FFParticle::Init(ff_setup::Particle const &mie,
double diElectric_1 = 1.0 / forcefield.dielectric;
InitGPUForceField(*varCUDA, sigmaSq, epsilon_cn, n, forcefield.vdwKind,
forcefield.isMartini, count, forcefield.rCut,
forcefield.rCutCoulomb, forcefield.rCutLow,
forcefield.rswitch, forcefield.alpha, forcefield.ewald,
diElectric_1);
forcefield.rCutSq, forcefield.rCutCoulomb,
forcefield.rCutCoulombSq, forcefield.rCutLow,
forcefield.rswitch, forcefield.alpha, forcefield.alphaSq,
forcefield.ewald, diElectric_1);
#endif
}

Expand Down
4 changes: 2 additions & 2 deletions src/Forcefield.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ class Forcefield {
double tolerance; // Ewald sum terms
double rswitch; // Switch distance
double dielectric; // dielectric for martini
double scaling_14; //!< Scaling factor for 1-4 pairs' ewald interactions
double scaling_14; //!< Scaling factor for 1-4 pairs' Ewald interactions
double sc_alpha; // Free energy parameter
double sc_sigma, sc_sigma_6; // Free energy parameter

bool OneThree, OneFour, OneN; // To include 1-3, 1-4 and more interaction
bool electrostatic, ewald; // To consider columb interaction
bool electrostatic, ewald; // To consider coulomb interaction
bool vdwGeometricSigma; // For sigma combining rule
bool isMartini;
bool exp6;
Expand Down
159 changes: 81 additions & 78 deletions src/GPU/CalculateEnergyCUDAKernel.cu

Large diffs are not rendered by default.

60 changes: 30 additions & 30 deletions src/GPU/CalculateEnergyCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -32,35 +32,35 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn,
double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n,
int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count,
double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow,
double *gpu_rOn, double *gpu_alpha, int *gpu_ewald,
double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x,
double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul,
double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin,
double *gpu_rCut, double *gpu_rCutSq, double *gpu_rCutCoulomb,
double *gpu_rCutCoulombSq, double *gpu_rCutLow, double *gpu_rOn,
double *gpu_alpha, int *gpu_ewald, double *gpu_diElectric_1,
int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y,
double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y,
double *gpu_Invcell_z, bool sc_coul, double sc_sigma_6,
double sc_alpha, uint sc_power, double *gpu_rMin,
double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex,
double *gpu_lambdaVDW, double *gpu_lambdaCoulomb,
bool *gpu_isFraction, int box);

__device__ double
CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact,
double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind,
double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini,
double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul,
double sc_sigma_6, double sc_alpha, uint sc_power,
double *gpu_sigmaSq, int gpu_count);
__device__ double CalcCoulombGPU(
double distSq, int kind1, int kind2, double qi_qj_fact, double gpu_rCutLow,
int gpu_ewald, int gpu_VDW_Kind, double gpu_alpha, double gpu_rCutCoulomb,
double gpu_rCutCoulombSq, int gpu_isMartini, double gpu_diElectric_1,
double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha,
uint sc_power, double *gpu_sigmaSq, int gpu_count);
__device__ double CalcCoulombVirGPU(double distSq, double qi_qj,
double gpu_rCutCoulomb, double gpu_alpha,
int gpu_VDW_Kind, int gpu_ewald,
double gpu_diElectric_1, int gpu_isMartini);
__device__ double CalcEnGPU(double distSq, int kind1, int kind2,
double *gpu_sigmaSq, double *gpu_n,
double *gpu_epsilon_Cn, int gpu_VDW_Kind,
int gpu_isMartini, double gpu_rCut, double gpu_rOn,
int gpu_count, double gpu_lambdaVDW,
double sc_sigma_6, double sc_alpha, uint sc_power,
double *gpu_rMin, double *gpu_rMaxSq,
double *gpu_expConst);
int gpu_isMartini, double gpu_rCut,
double gpu_rCutSq, double gpu_rOn, int gpu_count,
double gpu_lambdaVDW, double sc_sigma_6,
double sc_alpha, uint sc_power, double *gpu_rMin,
double *gpu_rMaxSq, double *gpu_expConst);

// ElectroStatic Calculation
//**************************************************************//
Expand Down Expand Up @@ -91,25 +91,25 @@ __device__ double CalcCoulombExp6GPU(double distSq, int index,
double *gpu_sigmaSq);
__device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact,
int gpu_ewald, double gpu_alpha);
__device__ double
CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact,
int gpu_ewald, double gpu_alpha, double gpu_rCut,
double gpu_diElectric_1, double gpu_lambdaCoulomb,
bool sc_coul, double sc_sigma_6, double sc_alpha,
uint sc_power, double *gpu_sigmaSq);
__device__ double
CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact,
int gpu_ewald, double gpu_alpha,
double gpu_rCut, double gpu_diElectric_1);
__device__ double CalcCoulombSwitchMartiniGPU(
double distSq, int index, double qi_qj_fact, int gpu_ewald,
double gpu_alpha, double gpu_rCut, double gpu_rCutSq,
double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul,
double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq);
__device__ double CalcCoulombSwitchMartiniGPUNoLambda(
double distSq, double qi_qj_fact, int gpu_ewald, double gpu_alpha,
double gpu_rCut, double gpu_rCutSq, double gpu_diElectric_1);
__device__ double CalcCoulombSwitchGPU(double distSq, int index,
double qi_qj_fact, double gpu_alpha,
int gpu_ewald, double gpu_rCut,
double gpu_rCutSq,
double gpu_lambdaCoulomb, bool sc_coul,
double sc_sigma_6, double sc_alpha,
uint sc_power, double *gpu_sigmaSq);
__device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact,
int gpu_ewald, double gpu_alpha,
double gpu_rCut);
double gpu_rCut,
double gpu_rCutSq);

// VDW Calculation
//*****************************************************************//
Expand All @@ -123,7 +123,7 @@ __device__ double CalcEnParticleGPUNoLambda(double distSq, int index,
double *gpu_epsilon_Cn);
__device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq,
double *gpu_n, double *gpu_epsilon_Cn,
double gpu_rCut, double gpu_lambdaVDW,
double gpu_rCutSq, double gpu_lambdaVDW,
double sc_sigma_6, double sc_alpha,
uint sc_power);
__device__ double CalcEnShiftGPUNoLambda(double distSq, int index,
Expand Down
72 changes: 37 additions & 35 deletions src/GPU/CalculateEwaldCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,11 @@ __global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y,
int image = blockIdx.x;
double sumR = 0.0, sumI = 0.0;
#pragma unroll 8
for (int particleID = threadIdx.x; particleID < atomNumber; particleID += THREADS_PER_BLOCK_SM) {
double dot = DotProductGPU(gpu_kx[image], gpu_ky[image],
gpu_kz[image], gpu_x[particleID],
gpu_y[particleID],
gpu_z[particleID]);
for (int particleID = threadIdx.x; particleID < atomNumber;
particleID += THREADS_PER_BLOCK_SM) {
double dot =
DotProductGPU(gpu_kx[image], gpu_ky[image], gpu_kz[image],
gpu_x[particleID], gpu_y[particleID], gpu_z[particleID]);
double dotsin, dotcos;
sincos(dot, &dotsin, &dotcos);
sumR += gpu_molCharge[particleID] * dotcos;
Expand Down Expand Up @@ -343,8 +343,8 @@ void CallBoxForceReciprocalGPU(
const std::vector<double> &particleCharge,
const std::vector<int> &particleMol, const std::vector<int> &particleUsed,
const std::vector<int> &startMol, const std::vector<int> &lengthMol,
double alpha, double alphaSq, double constValue, uint imageSize,
XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) {
double constValue, uint imageSize, XYZArray const &molCoords,
BoxDimensions const &boxAxes, int box) {
int atomCount = atomForceRec.Count();
int molCount = molForceRec.Count();
int *gpu_particleUsed;
Expand All @@ -370,8 +370,8 @@ void CallBoxForceReciprocalGPU(
cudaMemcpyHostToDevice);
cudaMemcpy(vars->gpu_mForceRecz, molForceRec.z, sizeof(double) * molCount,
cudaMemcpyHostToDevice);
cudaMemcpy(gpu_particleUsed, &particleUsed[0], sizeof(int) * particleUsed.size(),
cudaMemcpyHostToDevice);
cudaMemcpy(gpu_particleUsed, &particleUsed[0],
sizeof(int) * particleUsed.size(), cudaMemcpyHostToDevice);
cudaMemcpy(vars->gpu_x, molCoords.x, sizeof(double) * atomCount,
cudaMemcpyHostToDevice);
cudaMemcpy(vars->gpu_y, molCoords.y, sizeof(double) * atomCount,
Expand All @@ -391,15 +391,15 @@ void CallBoxForceReciprocalGPU(
vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz,
vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz,
vars->gpu_particleCharge, vars->gpu_particleMol, gpu_particleUsed,
gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue, imageSize,
vars->gpu_kxRef[box], vars->gpu_kyRef[box], vars->gpu_kzRef[box],
vars->gpu_x, vars->gpu_y, vars->gpu_z, vars->gpu_prefactRef[box],
vars->gpu_sumRnew[box], vars->gpu_sumInew[box], vars->gpu_isFraction,
vars->gpu_molIndex, vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box],
vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box],
vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth,
boxAxes.GetAxis(box).x, boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z,
box);
gpu_startMol, gpu_lengthMol, vars->gpu_alpha, vars->gpu_alphaSq,
constValue, imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box],
vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z,
vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box],
vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb,
vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x,
boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box);
#ifndef NDEBUG
cudaDeviceSynchronize();
checkLastErrorCUDA(__FILE__, __LINE__);
Expand Down Expand Up @@ -429,18 +429,19 @@ void CallBoxForceReciprocalGPU(
__global__ void BoxForceReciprocalGPU(
double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz,
double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
double *gpu_particleCharge, int *gpu_particleMol, const int *gpu_particleUsed,
int *gpu_startMol, int *gpu_lengthMol, double alpha, double alphaSq,
double constValue, int imageSize, double *gpu_kx, double *gpu_ky,
double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z,
double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,
bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb,
double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
int *gpu_nonOrth, double axx, double axy, double axz, int box) {

__shared__ int particleID, moleculeID;
__shared__ double x, y, z, lambdaCoef, fixed;
double *gpu_particleCharge, int *gpu_particleMol,
const int *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol,
double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize,
double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x,
double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew,
double *gpu_sumInew, bool *gpu_isFraction, int *gpu_molIndex,
double *gpu_lambdaCoulomb, double *gpu_cell_x, double *gpu_cell_y,
double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y,
double *gpu_Invcell_z, int *gpu_nonOrth, double axx, double axy, double axz,
int box) {

__shared__ int particleID, moleculeID;
__shared__ double x, y, z, lambdaCoef, fixed;

if (threadIdx.x == 0) {
// The particleID is the atom that corresponds to this particleUsed entry
Expand All @@ -462,16 +463,16 @@ __global__ void BoxForceReciprocalGPU(
double dot = x * gpu_kx[image] + y * gpu_ky[image] + z * gpu_kz[image];
double dotsin, dotcos;
sincos(dot, &dotsin, &dotcos);
double factor = fixed * gpu_prefact[image] * (dotsin * gpu_sumRnew[image] -
dotcos * gpu_sumInew[image]);
double factor = fixed * gpu_prefact[image] *
(dotsin * gpu_sumRnew[image] - dotcos * gpu_sumInew[image]);
forceX += factor * gpu_kx[image];
forceY += factor * gpu_ky[image];
forceZ += factor * gpu_kz[image];
}

// loop over other particles within the same molecule
// Pick the thread most likely to exit the for loop early
if (threadIdx.x == THREADS_PER_BLOCK-1) {
if (threadIdx.x == THREADS_PER_BLOCK - 1) {
double intraForce = 0.0, distSq = 0.0, dist = 0.0;
double3 distVect;
int lastParticleWithinSameMolecule =
Expand All @@ -485,11 +486,12 @@ __global__ void BoxForceReciprocalGPU(
gpu_Invcell_z);
dist = sqrt(distSq);

double expConstValue = exp(-1.0 * alphaSq * distSq);
double expConstValue = exp(-1.0 * gpu_alphaSq[box] * distSq);
double qiqj = gpu_particleCharge[particleID] *
gpu_particleCharge[otherParticle] * qqFactGPU;
intraForce = qiqj * lambdaCoef * lambdaCoef / distSq;
intraForce *= (erf(alpha * dist) / dist) - constValue * expConstValue;
intraForce *=
(erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue;
forceX -= intraForce * distVect.x;
forceY -= intraForce * distVect.y;
forceZ -= intraForce * distVect.z;
Expand Down
23 changes: 12 additions & 11 deletions src/GPU/CalculateEwaldCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ void CallBoxForceReciprocalGPU(
const std::vector<double> &particleCharge,
const std::vector<int> &particleMol, const std::vector<int> &particleUsed,
const std::vector<int> &startMol, const std::vector<int> &lengthMol,
double alpha, double alphaSq, double constValue, uint imageSize,
XYZArray const &molCoords, BoxDimensions const &boxAxes, int box);
double constValue, uint imageSize, XYZArray const &molCoords,
BoxDimensions const &boxAxes, int box);

void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords,
double const *kx, double const *ky,
Expand Down Expand Up @@ -57,15 +57,16 @@ void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, uint box,
__global__ void BoxForceReciprocalGPU(
double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz,
double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
double *gpu_particleCharge, int *gpu_particleMol, const int *gpu_particleUsed,
int *gpu_startMol, int *gpu_lengthMol, double alpha, double alphaSq,
double constValue, int imageSize, double *gpu_kx, double *gpu_ky,
double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z,
double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,
bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb,
double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
int *gpu_nonOrth, double axx, double axy, double axz, int box);
double *gpu_particleCharge, int *gpu_particleMol,
const int *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol,
double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize,
double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x,
double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew,
double *gpu_sumInew, bool *gpu_isFraction, int *gpu_molIndex,
double *gpu_lambdaCoulomb, double *gpu_cell_x, double *gpu_cell_y,
double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y,
double *gpu_Invcell_z, int *gpu_nonOrth, double axx, double axy, double axz,
int box);

__global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y,
double *gpu_z, double *gpu_kx,
Expand Down
Loading

0 comments on commit 4dd07b9

Please sign in to comment.