From 3c7b0bbf37a29086fc5cc016409dcd157d47193b Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Fri, 12 Jul 2024 08:39:34 -0400 Subject: [PATCH 1/3] Fix calls to Mersenne Twister for CBMC functions --- src/cbmc/DCFreeCycle.cpp | 12 ++++++++++-- src/cbmc/DCFreeCycleSeed.cpp | 12 ++++++++++-- src/cbmc/DCFreeHedron.cpp | 12 ++++++++++-- src/cbmc/DCFreeHedronSeed.cpp | 12 ++++++++++-- src/cbmc/DCRotateCOM.cpp | 12 ++++++++++-- 5 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/cbmc/DCFreeCycle.cpp b/src/cbmc/DCFreeCycle.cpp index e12557a19..5dc4eb307 100644 --- a/src/cbmc/DCFreeCycle.cpp +++ b/src/cbmc/DCFreeCycle.cpp @@ -139,9 +139,13 @@ void DCFreeCycle::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -217,9 +221,13 @@ void DCFreeCycle::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCFreeCycleSeed.cpp b/src/cbmc/DCFreeCycleSeed.cpp index 2ed2829fd..f57a2aced 100644 --- a/src/cbmc/DCFreeCycleSeed.cpp +++ b/src/cbmc/DCFreeCycleSeed.cpp @@ -138,9 +138,13 @@ void DCFreeCycleSeed::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -216,9 +220,13 @@ void DCFreeCycleSeed::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCFreeHedron.cpp b/src/cbmc/DCFreeHedron.cpp index f13c03e0a..89aa59efc 100644 --- a/src/cbmc/DCFreeHedron.cpp +++ b/src/cbmc/DCFreeHedron.cpp @@ -118,9 +118,13 @@ void DCFreeHedron::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -196,9 +200,13 @@ void DCFreeHedron::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCFreeHedronSeed.cpp b/src/cbmc/DCFreeHedronSeed.cpp index 6907994c1..b6d527069 100644 --- a/src/cbmc/DCFreeHedronSeed.cpp +++ b/src/cbmc/DCFreeHedronSeed.cpp @@ -117,9 +117,13 @@ void DCFreeHedronSeed::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -195,9 +199,13 @@ void DCFreeHedronSeed::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCRotateCOM.cpp b/src/cbmc/DCRotateCOM.cpp index ff112b468..19471438f 100644 --- a/src/cbmc/DCRotateCOM.cpp +++ b/src/cbmc/DCRotateCOM.cpp @@ -182,7 +182,11 @@ void DCRotateCOM::BuildNew(TrialMol &newMol, uint molIndex) { RandRotateZ(); } else { // convert chosen torsion to 3D positions - spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + double u1, u2, u3; + u1 = prng(); + u2 = prng(); + u3 = prng(); + spin = RotationMatrix::UniformRandom(u1, u2, u3); } for (uint a = 0; a < atomNumber; ++a) { @@ -287,7 +291,11 @@ void DCRotateCOM::BuildOld(TrialMol &oldMol, uint molIndex) { RandRotateZ(); } else { // convert chosen torsion to 3D positions - spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + double u1, u2, u3; + u1 = prng(); + u2 = prng(); + u3 = prng(); + spin = RotationMatrix::UniformRandom(u1, u2, u3); } for (uint a = 0; a < atomNumber; ++a) { From 9563e139ed3323933cf3067c01ab9f0577898aa7 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Thu, 12 Dec 2024 21:47:34 -0500 Subject: [PATCH 2/3] Pass more forcefield parameters to the GPU for consistency --- src/Ewald.cpp | 3 +- src/FFParticle.cpp | 7 ++- src/Forcefield.h | 4 +- src/GPU/CalculateEwaldCUDAKernel.cu | 24 ++++---- src/GPU/CalculateEwaldCUDAKernel.cuh | 12 ++-- src/GPU/CalculateForceCUDAKernel.cu | 71 ++++++++++++----------- src/GPU/CalculateForceCUDAKernel.cuh | 40 +++++++------ src/GPU/ConstantDefinitionsCUDAKernel.cu | 17 ++++-- src/GPU/ConstantDefinitionsCUDAKernel.cuh | 12 ++-- src/GPU/VariablesCUDA.cuh | 9 ++- 10 files changed, 106 insertions(+), 93 deletions(-) diff --git a/src/Ewald.cpp b/src/Ewald.cpp index de23f1bc7..d97d192cc 100644 --- a/src/Ewald.cpp +++ b/src/Ewald.cpp @@ -1530,8 +1530,7 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords, CallBoxForceReciprocalGPU( ff.particles->getCUDAVars(), atomForceRec, molForceRec, particleCharge, particleMol, particleHasNoCharge, particleUsed, startMol, lengthMol, - ff.alpha[box], ff.alphaSq[box], constValue, imageSizeRef[box], - molCoords, currentAxes, box); + constValue, imageSizeRef[box], molCoords, currentAxes, box); delete[] particleUsed; #else // molecule iterator diff --git a/src/FFParticle.cpp b/src/FFParticle.cpp index db35b589d..11e169b8b 100644 --- a/src/FFParticle.cpp +++ b/src/FFParticle.cpp @@ -88,9 +88,10 @@ void FFParticle::Init(ff_setup::Particle const &mie, double diElectric_1 = 1.0 / forcefield.dielectric; InitGPUForceField(*varCUDA, sigmaSq, epsilon_cn, n, forcefield.vdwKind, forcefield.isMartini, count, forcefield.rCut, - forcefield.rCutCoulomb, forcefield.rCutLow, - forcefield.rswitch, forcefield.alpha, forcefield.ewald, - diElectric_1); + forcefield.rCutSq, forcefield.rCutCoulomb, + forcefield.rCutCoulombSq, forcefield.rCutLow, + forcefield.rswitch, forcefield.alpha, forcefield.alphaSq, + forcefield.ewald, diElectric_1); #endif } diff --git a/src/Forcefield.h b/src/Forcefield.h index 85ddcd5d5..64ac8f6ba 100644 --- a/src/Forcefield.h +++ b/src/Forcefield.h @@ -54,12 +54,12 @@ class Forcefield { double tolerance; // Ewald sum terms double rswitch; // Switch distance double dielectric; // dielectric for martini - double scaling_14; //!< Scaling factor for 1-4 pairs' ewald interactions + double scaling_14; //!< Scaling factor for 1-4 pairs' Ewald interactions double sc_alpha; // Free energy parameter double sc_sigma, sc_sigma_6; // Free energy parameter bool OneThree, OneFour, OneN; // To include 1-3, 1-4 and more interaction - bool electrostatic, ewald; // To consider columb interaction + bool electrostatic, ewald; // To consider coulomb interaction bool vdwGeometricSigma; // For sigma combining rule bool isMartini; bool exp6; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 5812426b6..164091427 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -450,8 +450,8 @@ void CallBoxForceReciprocalGPU( const std::vector &particleMol, const std::vector &particleHasNoCharge, const bool *particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, double alphaSq, double constValue, uint imageSize, - XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) { + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box) { int atomCount = atomForceRec.Count(); int molCount = molForceRec.Count(); double *gpu_particleCharge; @@ -518,13 +518,13 @@ void CallBoxForceReciprocalGPU( vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz, vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz, gpu_particleCharge, gpu_particleMol, gpu_particleHasNoCharge, - gpu_particleUsed, gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue, - imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box], - vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z, - vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box], - vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + gpu_particleUsed, gpu_startMol, gpu_lengthMol, vars->gpu_alpha, + vars->gpu_alphaSq, constValue, imageSize, vars->gpu_kxRef[box], + vars->gpu_kyRef[box], vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, + vars->gpu_z, vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], + vars->gpu_sumInew[box], vars->gpu_isFraction, vars->gpu_molIndex, + vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box], vars->gpu_cell_y[box], + vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x, boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box, atomCount); cudaDeviceSynchronize(); @@ -558,7 +558,7 @@ __global__ void BoxForceReciprocalGPU( double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, - int *gpu_lengthMol, double alpha, double alphaSq, double constValue, + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction, @@ -627,11 +627,11 @@ __global__ void BoxForceReciprocalGPU( gpu_Invcell_z); dist = sqrt(distSq); - double expConstValue = exp(-1.0 * alphaSq * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq[box] * distSq); double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= ((erf(alpha * dist) / dist) - constValue * expConstValue); + intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 16a3fc532..80062586b 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CALCULATE_EWALD_CUDA_KERNEL -#define CALCULATE_EWALD_CUDA_KERNEL +#ifndef CALCULATE_EWALD_CUDA_KERNEL_H +#define CALCULATE_EWALD_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -23,8 +23,6 @@ void CallBoxForceReciprocalGPU(VariablesCUDA *vars, const bool *particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, - double alphaSq, double constValue, uint imageSize, XYZArray const &molCoords, @@ -103,8 +101,8 @@ __global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx, bool *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol, - double alpha, - double alphaSq, + double *gpu_alpha, + double *gpu_alphaSq, double constValue, int imageSize, double *gpu_kx, @@ -190,4 +188,4 @@ __global__ void BoxReciprocalGPU(double *gpu_prefact, int imageSize); #endif /*GOMC_CUDA*/ -#endif /*CALCULATE_EWALD_CUDA_KERNEL*/ +#endif /*CALCULATE_EWALD_CUDA_KERNEL_H*/ diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index c827a803d..219a6cafb 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -125,7 +125,7 @@ void CallBoxInterForceGPU( vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, @@ -302,7 +302,7 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], @@ -469,7 +469,7 @@ __global__ void BoxInterForceGPU( double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, @@ -577,7 +577,7 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); @@ -608,7 +608,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, @@ -708,7 +708,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box], + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); } @@ -868,12 +868,12 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, + double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { @@ -886,20 +886,20 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -909,13 +909,13 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { @@ -928,20 +928,20 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -950,13 +950,13 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -968,20 +968,20 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -990,12 +990,12 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchMartiniGPU( - double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } @@ -1009,11 +1009,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } else { return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } @@ -1021,13 +1021,14 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -1053,7 +1054,7 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, @@ -1061,7 +1062,7 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCut); } if (sc_coul) { @@ -1075,21 +1076,21 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * CalcCoulombVirSwitchGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCut); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_alphaSq, gpu_rCut); } } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut) { + double gpu_alphaSq, double gpu_rCut) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 29d7953db..072939c24 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CALCULATE_FORCE_CUDA_KERNEL -#define CALCULATE_FORCE_CUDA_KERNEL +#ifndef CALCULATE_FORCE_CUDA_KERNEL_H +#define CALCULATE_FORCE_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -114,6 +114,7 @@ __global__ void BoxForceGPU(int *gpu_cellStartIndex, double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, @@ -183,6 +184,7 @@ __global__ void BoxInterForceGPU(int *gpu_cellStartIndex, double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, @@ -249,32 +251,32 @@ __device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, //ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, @@ -286,18 +288,18 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut); //VDW Calculation @@ -359,7 +361,7 @@ __device__ double CalcVirSwitchGPU(double distSq, int index, __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, int gpu_isMartini, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq, @@ -377,25 +379,25 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int index = FlatIndexGPU(kind1, kind2, gpu_count); if(gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else - return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); @@ -403,4 +405,4 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, #endif /*GOMC_CUDA*/ -#endif /*CALCULATE_FORCE_CUDA_KERNEL*/ +#endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/ diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cu b/src/GPU/ConstantDefinitionsCUDAKernel.cu index eda129319..e32aa3098 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cu +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cu @@ -31,9 +31,10 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, - int isMartini, int count, double Rcut, - double const *rCutCoulomb, double RcutLow, double Ron, - double const *alpha, int ewald, double diElectric_1) { + int isMartini, int count, double Rcut, double RcutSq, + double const *rCutCoulomb, double const *rCutCoulombSq, + double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1) { int countSq = count * count; CUMALLOC((void **)&vars.gpu_sigmaSq, countSq * sizeof(double)); CUMALLOC((void **)&vars.gpu_epsilon_Cn, countSq * sizeof(double)); @@ -42,14 +43,17 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, CUMALLOC((void **)&vars.gpu_isMartini, sizeof(int)); CUMALLOC((void **)&vars.gpu_count, sizeof(int)); CUMALLOC((void **)&vars.gpu_rCut, sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutSq, sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutCoulomb, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutCoulombSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutLow, sizeof(double)); CUMALLOC((void **)&vars.gpu_rOn, sizeof(double)); CUMALLOC((void **)&vars.gpu_alpha, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_alphaSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_ewald, sizeof(int)); CUMALLOC((void **)&vars.gpu_diElectric_1, sizeof(double)); - // allocate gpu memory for lambda variables + // allocate GPU memory for lambda variables CUMALLOC((void **)&vars.gpu_molIndex, (int)BOX_TOTAL * sizeof(int)); CUMALLOC((void **)&vars.gpu_lambdaVDW, (int)BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_lambdaCoulomb, (int)BOX_TOTAL * sizeof(double)); @@ -65,13 +69,18 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_count, &count, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCut, &Rcut, sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutSq, &RcutSq, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutCoulomb, rCutCoulomb, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutCoulombSq, rCutCoulombSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutLow, &RcutLow, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rOn, &Ron, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_alpha, alpha, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_alphaSq, alphaSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_ewald, &ewald, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_diElectric_1, &diElectric_1, sizeof(double), cudaMemcpyHostToDevice); diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 68eafb71b..3584e46c2 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL -#define CONSTANT_DEFINITIONS_CUDA_KERNEL +#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H +#define CONSTANT_DEFINITIONS_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -25,9 +25,9 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, int isMartini, int count, - double Rcut, double const *rCutCoulomb, - double RcutLow, double Ron, double const *alpha, - int ewald, double diElectric_1); + double Rcut, double RcutSq, double const *rCutCoulomb, + double const *rCutCoulombSq, double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1); void InitCoordinatesCUDA(VariablesCUDA *vars, uint atomNumber, uint maxAtomsInMol, uint maxMolNumber); void InitEwaldVariablesCUDA(VariablesCUDA *vars, uint imageTotal); @@ -47,4 +47,4 @@ void DestroyExp6CUDAVars(VariablesCUDA *vars); void DestroyCUDAVars(VariablesCUDA *vars); #endif /*GOMC_CUDA*/ -#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL*/ +#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL_H*/ diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh index 687b6e2c6..00b63e272 100644 --- a/src/GPU/VariablesCUDA.cuh +++ b/src/GPU/VariablesCUDA.cuh @@ -65,10 +65,13 @@ public: gpu_isMartini = NULL; gpu_count = NULL; gpu_rCut = NULL; + gpu_rCutSq = NULL; gpu_rCutLow = NULL; gpu_rOn = NULL; gpu_alpha = NULL; + gpu_alphaSq = NULL; gpu_rCutCoulomb = NULL; + gpu_rCutCoulombSq = NULL; gpu_ewald = NULL; gpu_diElectric_1 = NULL; gpu_aForcex = NULL; @@ -92,11 +95,11 @@ public: int *gpu_isMartini; int *gpu_count; int *gpu_startAtomIdx; //start atom index of the molecule - double *gpu_rCut; - double *gpu_rCutCoulomb; + double *gpu_rCut, *gpu_rCutSq; + double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow; double *gpu_rOn; - double *gpu_alpha; + double *gpu_alpha, *gpu_alphaSq; int *gpu_ewald; double *gpu_diElectric_1; double *gpu_x, *gpu_y, *gpu_z; From b0b0b3f1234016bdaefbcc61378eb8fe66e39763 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Thu, 12 Dec 2024 21:56:49 -0500 Subject: [PATCH 3/3] Reformat per clang formatting rules --- src/BlockOutput.h | 8 +- src/ConfigSetup.cpp | 59 +-- src/GPU/CUDAMemoryManager.cuh | 19 +- src/GPU/CalculateEnergyCUDAKernel.cuh | 238 ++++------ src/GPU/CalculateEwaldCUDAKernel.cu | 20 +- src/GPU/CalculateEwaldCUDAKernel.cuh | 215 +++------ src/GPU/CalculateForceCUDAKernel.cu | 185 ++++---- src/GPU/CalculateForceCUDAKernel.cuh | 521 ++++++++-------------- src/GPU/ConstantDefinitionsCUDAKernel.cuh | 19 +- src/GPU/VariablesCUDA.cuh | 62 +-- 10 files changed, 544 insertions(+), 802 deletions(-) diff --git a/src/BlockOutput.h b/src/BlockOutput.h index 302a427a3..047bcb226 100644 --- a/src/BlockOutput.h +++ b/src/BlockOutput.h @@ -111,13 +111,13 @@ struct BlockAverages : OutputableBase { stepsPerOut = event.frequency; invSteps = 1.0 / stepsPerOut; firstInvSteps = invSteps; - //Handle the case where we are restarting from a checkpoint and the first - //interval is smaller than expected because we create a checkpoint more - //often than the Block output frequency. + // Handle the case where we are restarting from a checkpoint and the first + // interval is smaller than expected because we create a checkpoint more + // often than the Block output frequency. if (startStep != 0 && (startStep % stepsPerOut) != 0) { ulong diff; diff = stepsPerOut - (startStep % stepsPerOut); - firstInvSteps = 1.0/diff; + firstInvSteps = 1.0 / diff; } enableOut = event.enable; } diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp index 5e087604d..7cd429c54 100644 --- a/src/ConfigSetup.cpp +++ b/src/ConfigSetup.cpp @@ -1897,7 +1897,6 @@ void ConfigSetup::verifyInputs(void) { std::cout << "ERROR: Impulse Pressure Correction cannot be " << "used with LJ long-range corrections." << std::endl; exit(EXIT_FAILURE); - } if (((sys.ff.VDW_KIND == sys.ff.VDW_SHIFT_KIND) || (sys.ff.VDW_KIND == sys.ff.VDW_SWITCH_KIND)) && @@ -1905,7 +1904,6 @@ void ConfigSetup::verifyInputs(void) { std::cout << "ERROR: Impulse Pressure Correction is not supported " << "for SWITCH or SHIFT potentials." << std::endl; exit(EXIT_FAILURE); - } if (sys.ff.doImpulsePressureCorr && sys.ff.doTailCorr) { std::cout << "ERROR: Both LRC (Long Range Correction) and " @@ -2104,9 +2102,10 @@ void ConfigSetup::verifyInputs(void) { if (in.restart.restartFromBinaryCoorFile) { for (i = 0; i < BOX_TOTAL; i++) { if (!in.files.binaryCoorInput.defined[i]) { - std::cout << "ERROR: Binary coordinate file was not specified for box " - "number " - << i << "!" << std::endl; + std::cout + << "ERROR: Binary coordinate file was not specified for box " + "number " + << i << "!" << std::endl; exit(EXIT_FAILURE); } } @@ -2174,25 +2173,30 @@ void ConfigSetup::verifyInputs(void) { if ((sys.memcVal.MEMC1 && sys.memcVal.MEMC2) || (sys.memcVal.MEMC1 && sys.memcVal.MEMC3) || (sys.memcVal.MEMC2 && sys.memcVal.MEMC3)) { - std::cout << "ERROR: Multiple MEMC methods were specified, but only one is allowed!\n"; + std::cout << "ERROR: Multiple MEMC methods were specified, but only one " + "is allowed!\n"; exit(EXIT_FAILURE); } if ((sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC2) || (sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC3) || (sys.intraMemcVal.MEMC2 && sys.intraMemcVal.MEMC3)) { - std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only one is allowed!\n"; + std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only " + "one is allowed!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readVol || !sys.intraMemcVal.readVol) { - std::cout << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n"; + std::cout + << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readRatio || !sys.intraMemcVal.readRatio) { - std::cout << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n"; + std::cout + << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.largeKind.size() != sys.memcVal.exchangeRatio.size()) { - std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was " + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds was " << sys.memcVal.largeKind.size() << ", but " << sys.memcVal.exchangeRatio.size() << " exchange ratio was specified!\n"; @@ -2209,49 +2213,52 @@ void ConfigSetup::verifyInputs(void) { if ((sys.memcVal.largeKind.size() != sys.memcVal.smallKind.size()) || (sys.intraMemcVal.largeKind.size() != sys.intraMemcVal.smallKind.size())) { - std::cout - << "ERROR: In the MEMC method, the specified number of Large Kinds is not " - << " equal as specified number of Small Kinds!\n"; + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds is not " + << " equal as specified number of Small Kinds!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readLargeBB || !sys.intraMemcVal.readLargeBB) { - std::cout - << "ERROR: In the MEMC method, Large Kind BackBone was not specified!\n"; + std::cout << "ERROR: In the MEMC method, Large Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.largeKind.size() != sys.memcVal.largeBBAtom1.size()) { - std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was " + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds was " << sys.memcVal.largeKind.size() << ", but " << sys.memcVal.largeBBAtom1.size() << " sets of Large Molecule BackBone was specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.MEMC2 && !sys.memcVal.readSmallBB) { - std::cout - << "ERROR: In the MEMC-2 method, Small Kind BackBone was not specified!\n"; + std::cout << "ERROR: In the MEMC-2 method, Small Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.MEMC2 && (sys.memcVal.smallKind.size() != sys.memcVal.smallBBAtom1.size())) { - std::cout - << "ERROR: In the MEMC-2 method, the specified number of Small Kinds was " - << sys.memcVal.smallKind.size() << ", but " - << sys.memcVal.smallBBAtom1.size() - << " sets of Small Molecule BackBone was specified!\n"; + std::cout << "ERROR: In the MEMC-2 method, the specified number of Small " + "Kinds was " + << sys.memcVal.smallKind.size() << ", but " + << sys.memcVal.smallBBAtom1.size() + << " sets of Small Molecule BackBone was specified!\n"; exit(EXIT_FAILURE); } if (sys.intraMemcVal.MEMC2 && !sys.intraMemcVal.readSmallBB) { - std::cout << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not " - "specified!\n"; + std::cout + << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.enable && sys.intraMemcVal.enable) { if ((sys.memcVal.MEMC1 && !sys.intraMemcVal.MEMC1) || (sys.memcVal.MEMC2 && !sys.intraMemcVal.MEMC2) || (sys.memcVal.MEMC3 && !sys.intraMemcVal.MEMC3)) { - std::cout << "ERROR: The selected intra-MEMC method was not same as the inter-MEMC method!\n"; + std::cout << "ERROR: The selected intra-MEMC method was not same as " + "the inter-MEMC method!\n"; exit(EXIT_FAILURE); } } diff --git a/src/GPU/CUDAMemoryManager.cuh b/src/GPU/CUDAMemoryManager.cuh index 0b5b52dcf..06e476a9d 100644 --- a/src/GPU/CUDAMemoryManager.cuh +++ b/src/GPU/CUDAMemoryManager.cuh @@ -2,28 +2,31 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA #include #include -#include #include +#include -#define CUMALLOC(address,size) CUDAMemoryManager::mallocMemory(address,size,#address) -#define CUFREE(address) CUDAMemoryManager::freeMemory(address,#address) +#define CUMALLOC(address, size) \ + CUDAMemoryManager::mallocMemory(address, size, #address) +#define CUFREE(address) CUDAMemoryManager::freeMemory(address, #address) -class CUDAMemoryManager -{ +class CUDAMemoryManager { public: - static cudaError_t mallocMemory(void **address, unsigned int size, std::string var_name); + static cudaError_t mallocMemory(void **address, unsigned int size, + std::string var_name); static cudaError_t freeMemory(void *address, std::string var_name); static bool isFreed(); private: static long long totalAllocatedBytes; - static std::unordered_map > allocatedPointers; + static std::unordered_map> + allocatedPointers; }; #endif diff --git a/src/GPU/CalculateEnergyCUDAKernel.cuh b/src/GPU/CalculateEnergyCUDAKernel.cuh index 9f1366400..b0367cc3c 100644 --- a/src/GPU/CalculateEnergyCUDAKernel.cuh +++ b/src/GPU/CalculateEnergyCUDAKernel.cuh @@ -2,93 +2,54 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA +#include "BoxDimensions.h" +#include "VariablesCUDA.cuh" +#include "XYZArray.h" #include #include #include -#include "XYZArray.h" -#include "BoxDimensions.h" -#include "VariablesCUDA.cuh" -void CallBoxInterGPU(VariablesCUDA *vars, - const std::vector &cellVector, +void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, const std::vector &cellStartIndex, - const std::vector > &neighborList, - XYZArray const &coords, - BoxDimensions const &boxAxes, + const std::vector> &neighborList, + XYZArray const &coords, BoxDimensions const &boxAxes, bool electrostatic, const std::vector &particleCharge, const std::vector &particleKind, - const std::vector &particleMol, - double &REn, - double &LJEn, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - uint const box); - -__global__ void BoxInterGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_REn, - double *gpu_LJEn, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - int *gpu_ewald, - double *gpu_diElectric_1, - int *gpu_nonOrth, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); + const std::vector &particleMol, double &REn, + double &LJEn, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, uint const box); +__global__ void +BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, double *gpu_x, double *gpu_y, double *gpu_z, + double3 axis, double3 halfAx, bool electrostatic, + double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, + double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, + double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex, + double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, + bool *gpu_isFraction, int box); -__device__ double CalcCoulombGPU(double distSq, int kind1, int kind2, - double qi_qj_fact, double gpu_rCutLow, - int gpu_ewald, int gpu_VDW_Kind, - double gpu_alpha, double gpu_rCutCoulomb, - int gpu_isMartini, double gpu_diElectric_1, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq, - int gpu_count); +__device__ double +CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact, + double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind, + double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini, + double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, + double *gpu_sigmaSq, int gpu_count); __device__ double CalcCoulombVirGPU(double distSq, double qi_qj, double gpu_rCutCoulomb, double gpu_alpha, int gpu_VDW_Kind, int gpu_ewald, @@ -102,78 +63,74 @@ __device__ double CalcEnGPU(double distSq, int kind1, int kind2, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst); -//ElectroStatic Calculation +// ElectroStatic Calculation //**************************************************************// -__device__ double CalcCoulombParticleGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); +__device__ double CalcCoulombParticleGPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, + double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombParticleGPUNoLambda(double distSq, - double qi_qj_fact, - int gpu_ewald, - double gpu_alpha); -__device__ double CalcCoulombShiftGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double *gpu_sigmaSq); + double qi_qj_fact, + int gpu_ewald, + double gpu_alpha); +__device__ double CalcCoulombShiftGPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_rCut, + double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombShiftGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut); -__device__ double CalcCoulombExp6GPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); + int gpu_ewald, double gpu_alpha, + double gpu_rCut); +__device__ double CalcCoulombExp6GPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_lambdaCoulomb, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, + double *gpu_sigmaSq); __device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha); -__device__ double CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1, - double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double *gpu_sigmaSq); -__device__ double CalcCoulombSwitchMartiniGPUNoLambda(double distSq, - double qi_qj_fact, - int gpu_ewald, - double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1); -__device__ double CalcCoulombSwitchGPU(double distSq, int index, double qi_qj_fact, - double gpu_alpha, int gpu_ewald, - double gpu_rCut, + int gpu_ewald, double gpu_alpha); +__device__ double +CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, + int gpu_ewald, double gpu_alpha, double gpu_rCut, + double gpu_diElectric_1, double gpu_lambdaCoulomb, + bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); +__device__ double +CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, + int gpu_ewald, double gpu_alpha, + double gpu_rCut, double gpu_diElectric_1); +__device__ double CalcCoulombSwitchGPU(double distSq, int index, + double qi_qj_fact, double gpu_alpha, + int gpu_ewald, double gpu_rCut, double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, double gpu_rCut); - + int gpu_ewald, double gpu_alpha, + double gpu_rCut); -//VDW Calculation +// VDW Calculation //*****************************************************************// __device__ double CalcEnParticleGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power); + double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power); __device__ double CalcEnParticleGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn); __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnShiftGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, - double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut); __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, @@ -181,28 +138,23 @@ __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_rMaxSq, double *gpu_expConst); __device__ double CalcEnExp6GPUNoLambda(double distSq, int index, double *gpu_n, double *gpu_rMin, double *gpu_expConst); -__device__ double CalcEnSwitchMartiniGPU(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power); -__device__ double CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, - double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, - double gpu_rOn); +__device__ double +CalcEnSwitchMartiniGPU(double distSq, int index, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, + double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power); +__device__ double +CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, + double gpu_rCut, double gpu_rOn); __device__ double CalcEnSwitchGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double gpu_rOn); #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 164091427..8c92d9f26 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -558,14 +558,15 @@ __global__ void BoxForceReciprocalGPU( double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, - int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue, - int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz, - double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact, - double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction, - int *gpu_molIndex, double *gpu_lambdaCoulomb, double *gpu_cell_x, - double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, - double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, double axx, - double axy, double axz, int box, int atomCount) { + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, + double constValue, int imageSize, double *gpu_kx, double *gpu_ky, + double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, + double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, + bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, + double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, + double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, + int *gpu_nonOrth, double axx, double axy, double axz, int box, + int atomCount) { __shared__ double shared_kvector[IMAGES_PER_BLOCK * 3]; int particleID = blockDim.x * blockIdx.x + threadIdx.x; int offset_vector_index = blockIdx.y * IMAGES_PER_BLOCK; @@ -631,7 +632,8 @@ __global__ void BoxForceReciprocalGPU( double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); + intraForce *= + ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 80062586b..638d5662c 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -2,189 +2,112 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CALCULATE_EWALD_CUDA_KERNEL_H #define CALCULATE_EWALD_CUDA_KERNEL_H #ifdef GOMC_CUDA +#include "VariablesCUDA.cuh" +#include "XYZArray.h" #include #include -#include "VariablesCUDA.cuh" #include -#include "XYZArray.h" -void CallBoxForceReciprocalGPU(VariablesCUDA *vars, - XYZArray &atomForceRec, - XYZArray &molForceRec, - const std::vector &particleCharge, - const std::vector &particleMol, - const std::vector &particleHasNoCharge, - const bool *particleUsed, - const std::vector &startMol, - const std::vector &lengthMol, - double constValue, - uint imageSize, - XYZArray const &molCoords, - BoxDimensions const &boxAxes, - int box); +void CallBoxForceReciprocalGPU( + VariablesCUDA *vars, XYZArray &atomForceRec, XYZArray &molForceRec, + const std::vector &particleCharge, + const std::vector &particleMol, + const std::vector &particleHasNoCharge, const bool *particleUsed, + const std::vector &startMol, const std::vector &lengthMol, + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box); -void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, - XYZArray const &coords, - double const *kx, - double const *ky, +void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords, + double const *kx, double const *ky, double const *kz, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double *prefact, - double *hsqr, - double &energyRecip, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double *prefact, double *hsqr, + double &energyRecip, uint box); -void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, - XYZArray const &coords, +void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecip, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double &energyRecip, uint box); -void CallMolReciprocalGPU(VariablesCUDA *vars, - XYZArray const ¤tCoords, +void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const &newCoords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecipNew, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double &energyRecipNew, uint box); -//Calculate reciprocal term for lambdaNew and Old with same coordinates +// Calculate reciprocal term for lambdaNew and Old with same coordinates void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecipNew, - const double lambdaCoef, - uint box); + uint imageSize, double *sumRnew, + double *sumInew, double &energyRecipNew, + const double lambdaCoef, uint box); -void CallSwapReciprocalGPU(VariablesCUDA *vars, - XYZArray const &coords, +void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - const bool insert, - double &energyRecipNew, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + const bool insert, double &energyRecipNew, uint box); -void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, - uint imageSize, - double *sumRnew, - double *sumInew, - uint box); +void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, + double *sumRnew, double *sumInew, uint box); -__global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx, - double *gpu_aForceRecy, - double *gpu_aForceRecz, - double *gpu_mForceRecx, - double *gpu_mForceRecy, - double *gpu_mForceRecz, - double *gpu_particleCharge, - int *gpu_particleMol, - bool *gpu_particleHasNoCharge, - bool *gpu_particleUsed, - int *gpu_startMol, - int *gpu_lengthMol, - double *gpu_alpha, - double *gpu_alphaSq, - double constValue, - int imageSize, - double *gpu_kx, - double *gpu_ky, - double *gpu_kz, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_prefact, - double *gpu_sumRnew, - double *gpu_sumInew, - bool *gpu_isFraction, - int *gpu_molIndex, - double *gpu_lambdaCoulomb, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - int *gpu_nonOrth, - double axx, - double axy, - double axz, - int box, - int atomCount); +__global__ void BoxForceReciprocalGPU( + double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz, + double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, + double *gpu_particleCharge, int *gpu_particleMol, + bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, + double constValue, int imageSize, double *gpu_kx, double *gpu_ky, + double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, + double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, + bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, + double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, + double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, + int *gpu_nonOrth, double axx, double axy, double axz, int box, + int atomCount); -__global__ void BoxReciprocalSumsGPU(double * gpu_x, - double * gpu_y, - double * gpu_z, - double * gpu_kx, - double * gpu_ky, - double * gpu_kz, - int atomNumber, - double * gpu_particleCharge, - double * gpu_sumRnew, - double * gpu_sumInew, +__global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y, + double *gpu_z, double *gpu_kx, + double *gpu_ky, double *gpu_kz, + int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, int imageSize); __global__ void MolReciprocalGPU(double *gpu_cx, double *gpu_cy, double *gpu_cz, double *gpu_nx, double *gpu_ny, double *gpu_nz, double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, - double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, + int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, + double *gpu_sumRref, double *gpu_sumIref, double *gpu_prefactRef, - double *gpu_energyRecipNew, - int imageSize); + double *gpu_energyRecipNew, int imageSize); -__global__ void ChangeLambdaMolReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, - double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_prefactRef, - double *gpu_energyRecipNew, - double lambdaCoef, - int imageSize); +__global__ void ChangeLambdaMolReciprocalGPU( + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, double *gpu_ky, + double *gpu_kz, int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, double *gpu_sumRref, + double *gpu_sumIref, double *gpu_prefactRef, double *gpu_energyRecipNew, + double lambdaCoef, int imageSize); __global__ void SwapReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, + double *gpu_kx, double *gpu_ky, + double *gpu_kz, int atomNumber, double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_prefactRef, - const bool insert, - double *gpu_energyRecipNew, - int imageSize); + double *gpu_sumRnew, double *gpu_sumInew, + double *gpu_sumRref, double *gpu_sumIref, + double *gpu_prefactRef, const bool insert, + double *gpu_energyRecipNew, int imageSize); -__global__ void BoxReciprocalGPU(double *gpu_prefact, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_energyRecip, +__global__ void BoxReciprocalGPU(double *gpu_prefact, double *gpu_sumRnew, + double *gpu_sumInew, double *gpu_energyRecip, int imageSize); #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index 219a6cafb..79360471a 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -125,13 +125,14 @@ void CallBoxInterForceGPU( vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], - vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], - vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, - sc_coul, sc_sigma_6, sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, - vars->gpu_expConst, vars->gpu_molIndex, vars->gpu_lambdaVDW, - vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box); + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, + vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], + vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_Invcell_z[box], vars->gpu_nonOrth, sc_coul, sc_sigma_6, + sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, + vars->gpu_molIndex, vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, + vars->gpu_isFraction, box); checkLastErrorCUDA(__FILE__, __LINE__); cudaDeviceSynchronize(); // ReduceSum // Virial of LJ @@ -302,10 +303,10 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, + vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], + vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_aForcex, vars->gpu_aForcey, vars->gpu_aForcez, vars->gpu_mForcex, vars->gpu_mForcey, vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, sc_power, @@ -577,9 +578,9 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], - gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, - lambdaCoulomb, gpu_count[0], kA, kB); + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], + gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, + sc_power, lambdaCoulomb, gpu_count[0], kA, kB); gpu_rT11[threadID] += pRF * (virComponents.x * diff_com.x); gpu_rT22[threadID] += pRF * (virComponents.y * diff_com.y); @@ -599,25 +600,24 @@ __global__ void BoxInterForceGPU( } } -__global__ void -BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, - int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, - double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, - double3 halfAx, bool electrostatic, double *gpu_particleCharge, - int *gpu_particleKind, int *gpu_particleMol, double *gpu_REn, - double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, - int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, - int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - double *gpu_aForcex, double *gpu_aForcey, double *gpu_aForcez, - double *gpu_mForcex, double *gpu_mForcey, double *gpu_mForcez, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, - int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, - bool *gpu_isFraction, int box) { +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex, + double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex, + double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) { __shared__ double shr_cutoff; __shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs; __shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex; @@ -708,9 +708,10 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], - gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, - sc_power, lambdaCoulomb, gpu_count[0], kA, kB); + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], + gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, + sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, + kB); } } @@ -868,12 +869,14 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_alphaSq, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -886,15 +889,18 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirParticleGPU(distSq, qi_qj, + gpu_ewald, gpu_alpha, + gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -909,13 +915,15 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -928,15 +936,17 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -950,13 +960,15 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -968,15 +980,17 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -990,13 +1004,14 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchMartiniGPU( - double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb) { + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } if (sc_coul) { @@ -1009,21 +1024,20 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + return gpu_lambdaCoulomb * CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, - double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1) { +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -1054,11 +1068,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, @@ -1079,7 +1093,8 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, gpu_alphaSq, gpu_rCut); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_alphaSq, gpu_rCut); + gpu_alpha, gpu_alphaSq, + gpu_rCut); } } diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 072939c24..87aaca0c8 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -2,407 +2,244 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CALCULATE_FORCE_CUDA_KERNEL_H #define CALCULATE_FORCE_CUDA_KERNEL_H #ifdef GOMC_CUDA -#include -#include "XYZArray.h" #include "BoxDimensions.h" -#include "VariablesCUDA.cuh" -#include "ConstantDefinitionsCUDAKernel.cuh" #include "CalculateMinImageCUDAKernel.cuh" +#include "ConstantDefinitionsCUDAKernel.cuh" +#include "VariablesCUDA.cuh" +#include "XYZArray.h" +#include -void CallBoxForceGPU(VariablesCUDA *vars, - const std::vector &cellVector, +void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, const std::vector &cellStartIndex, - const std::vector > &neighborList, + const std::vector> &neighborList, const std::vector &mapParticleToCell, - XYZArray const &coords, - BoxDimensions const &boxAxes, + XYZArray const &coords, BoxDimensions const &boxAxes, bool electrostatic, const std::vector &particleCharge, const std::vector &particleKind, - const std::vector &particleMol, - double &REn, - double &LJEn, - double *aForcex, - double *aForcey, - double *aForcez, - double *mForcex, - double *mForcey, - double *mForcez, - int atomCount, - int molCount, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, + const std::vector &particleMol, double &REn, + double &LJEn, double *aForcex, double *aForcey, + double *aForcez, double *mForcex, double *mForcey, + double *mForcez, int atomCount, int molCount, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, uint const box); -void CallBoxInterForceGPU(VariablesCUDA *vars, - const std::vector &cellVector, - const std::vector &cellStartIndex, - const std::vector > &neighborList, - const std::vector &mapParticleToCell, - XYZArray const ¤tCoords, - XYZArray const ¤tCOM, - BoxDimensions const& boxAxes, - bool electrostatic, - const std::vector &particleCharge, - const std::vector &particleKind, - const std::vector &particleMol, - double &rT11, - double &rT12, - double &rT13, - double &rT22, - double &rT23, - double &rT33, - double &vT11, - double &vT12, - double &vT13, - double &vT22, - double &vT23, - double &vT33, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - uint const box); +void CallBoxInterForceGPU( + VariablesCUDA *vars, const std::vector &cellVector, + const std::vector &cellStartIndex, + const std::vector> &neighborList, + const std::vector &mapParticleToCell, XYZArray const ¤tCoords, + XYZArray const ¤tCOM, BoxDimensions const &boxAxes, + bool electrostatic, const std::vector &particleCharge, + const std::vector &particleKind, const std::vector &particleMol, + double &rT11, double &rT12, double &rT13, double &rT22, double &rT23, + double &rT33, double &vT11, double &vT12, double &vT13, double &vT22, + double &vT23, double &vT33, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, uint const box); -void CallVirialReciprocalGPU(VariablesCUDA *vars, - XYZArray const ¤tCoords, +void CallVirialReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const ¤tCOMDiff, const std::vector &particleCharge, - double &rT11, - double &rT12, - double &rT13, - double &rT22, - double &rT23, - double &rT33, - uint imageSize, - double constVal, - uint box); + double &rT11, double &rT12, double &rT13, + double &rT22, double &rT23, double &rT33, + uint imageSize, double constVal, uint box); -__global__ void BoxForceGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - int atomNumber, - int *gpu_mapParticleToCell, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_REn, - double *gpu_LJEn, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - double *gpu_alphaSq, - int *gpu_ewald, - double *gpu_diElectric_1, - int *gpu_nonOrth, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - double *gpu_aForcex, - double *gpu_aForcey, - double *gpu_aForcez, - double *gpu_mForcex, - double *gpu_mForcey, - double *gpu_mForcez, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex, + double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex, + double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__global__ void BoxInterForceGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - int atomNumber, - int *gpu_mapParticleToCell, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_comx, - double *gpu_comy, - double *gpu_comz, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_rT11, - double *gpu_rT12, - double *gpu_rT13, - double *gpu_rT22, - double *gpu_rT23, - double *gpu_rT33, - double *gpu_vT11, - double *gpu_vT12, - double *gpu_vT13, - double *gpu_vT22, - double *gpu_vT23, - double *gpu_vT33, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - double *gpu_alphaSq, - int *gpu_ewald, - double *gpu_diElectric_1, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - int *gpu_nonOrth, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); +__global__ void BoxInterForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, + double *gpu_rT22, double *gpu_rT23, double *gpu_rT33, double *gpu_vT11, + double *gpu_vT12, double *gpu_vT13, double *gpu_vT22, double *gpu_vT23, + double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, + double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, + double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, + double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__global__ void VirialReciprocalGPU(double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_comDx, - double *gpu_comDy, - double *gpu_comDz, - double *gpu_kxRef, - double *gpu_kyRef, - double *gpu_kzRef, - double *gpu_prefactRef, - double *gpu_hsqrRef, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_particleCharge, - double *gpu_rT11, - double *gpu_rT12, - double *gpu_rT13, - double *gpu_rT22, - double *gpu_rT23, - double *gpu_rT33, - double constVal, - uint imageSize, - uint atomNumber); +__global__ void VirialReciprocalGPU( + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comDx, + double *gpu_comDy, double *gpu_comDz, double *gpu_kxRef, double *gpu_kyRef, + double *gpu_kzRef, double *gpu_prefactRef, double *gpu_hsqrRef, + double *gpu_sumRref, double *gpu_sumIref, double *gpu_particleCharge, + double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, double *gpu_rT22, + double *gpu_rT23, double *gpu_rT33, double constVal, uint imageSize, + uint atomNumber); -__device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, - double *gpu_sigmaSq, - double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, - double gpu_rOn, - int gpu_isMartini, - int gpu_VDW_Kind, - int gpu_count, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst); +__device__ double +CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, + double gpu_rOn, int gpu_isMartini, int gpu_VDW_Kind, + int gpu_count, double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst); -//ElectroStatic Calculation +// ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); -__device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double +CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1, - int index, - double gpu_sigmaSq, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double CalcCoulombVirSwitchMartiniGPU( + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb); +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut); -//VDW Calculation +// VDW Calculation //*****************************************************************// __device__ double CalcVirParticleGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double sc_sigma_6, + double *gpu_epsilon_Cn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirParticleGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn); -__device__ double CalcVirShiftGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, +__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); -__device__ double CalcVirShiftGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn); -__device__ double CalcVirExp6GPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_rMin, double *gpu_rMaxSq, - double *gpu_expConst, + uint sc_power, double gpu_lambdaVDW); +__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn); +__device__ double CalcVirExp6GPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst, double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); + uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirExp6GPU(double distSq, int index, double *gpu_n, double *gpu_rMin, double *gpu_expConst); __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double rOn, - double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); + double gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double rOn, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double rOn); -__device__ double CalcVirSwitchGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, double gpu_rCut, - double gpu_rOn, + double gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double rOn); +__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_epsilon_Cn, double *gpu_n, + double gpu_rCut, double gpu_rOn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); -__device__ double CalcVirSwitchGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, double gpu_rCut, - double gpu_rOn); - +__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_epsilon_Cn, double *gpu_n, + double gpu_rCut, double gpu_rOn); // Have to move the implementation for some functions here // since CUDA doesn't allow __global__ to call __device__ // from different files // Wanted to call CalcCoulombForceGPU() from CalculateEnergyCUDAKernel.cu file -__device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, - int gpu_VDW_Kind, int gpu_ewald, - int gpu_isMartini, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCutCoulomb, - double gpu_diElectric_1, - double *gpu_sigmaSq, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb, - int gpu_count, int kind1, - int kind2) -{ - if((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { +__device__ inline double CalcCoulombForceGPU( + double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, + int gpu_isMartini, double gpu_alpha, double gpu_alphaSq, + double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, + double gpu_lambdaCoulomb, int gpu_count, int kind1, int kind2) { + if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { return 0.0; } int index = FlatIndexGPU(kind1, kind2, gpu_count); - if(gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCutCoulomb, gpu_diElectric_1, - index, gpu_sigmaSq[index], sc_coul, - sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + if (gpu_VDW_Kind == GPU_VDW_STD_KIND) { + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { + return CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, + gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } else - return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul, - sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCutCoulomb, index, + gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } - #endif /*GOMC_CUDA*/ #endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/ diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 3584e46c2..c5cabf281 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -2,17 +2,18 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H #define CONSTANT_DEFINITIONS_CUDA_KERNEL_H #ifdef GOMC_CUDA -#include -#include +#include "EnsemblePreprocessor.h" #include "GeomLib.h" #include "VariablesCUDA.cuh" -#include "EnsemblePreprocessor.h" +#include +#include #define GPU_VDW_STD_KIND 0 #define GPU_VDW_SHIFT_KIND 1 @@ -21,12 +22,12 @@ along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA -#include -#include -#include #include "EnsemblePreprocessor.h" #include "NumLib.h" +#include +#include +#include -//Need a separate float constant for device code with the MSVC compiler -//See CUDA Programming Guide section I.4.13 for details +// Need a separate float constant for device code with the MSVC compiler +// See CUDA Programming Guide section I.4.13 for details static const __device__ double qqFactGPU = num::qqFact; -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) -{ +#define gpuErrchk(ans) \ + { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, + bool abort = true) { if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); + if (abort) + exit(code); } } -inline void checkLastErrorCUDA(const char *file, int line) -{ +inline void checkLastErrorCUDA(const char *file, int line) { cudaError_t code = cudaGetLastError(); if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); exit(code); } } -inline void printFreeMemory() -{ - size_t free_byte ; - size_t total_byte ; - cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ; +inline void printFreeMemory() { + size_t free_byte; + size_t total_byte; + cudaError_t cuda_status = cudaMemGetInfo(&free_byte, &total_byte); - if ( cudaSuccess != cuda_status ) { + if (cudaSuccess != cuda_status) { printf("Error: cudaMemGetInfo fails, %s \n", - cudaGetErrorString(cuda_status) ); + cudaGetErrorString(cuda_status)); exit(1); } - double free_db = (double)free_byte ; - double total_db = (double)total_byte ; - double used_db = total_db - free_db ; + double free_db = (double)free_byte; + double total_db = (double)total_byte; + double used_db = total_db - free_db; printf("GPU memory usage: used = %f, free = %f MB, total = %f MB\n", - used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, total_db / 1024.0 / 1024.0); + used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, + total_db / 1024.0 / 1024.0); } -class VariablesCUDA -{ +class VariablesCUDA { public: - VariablesCUDA() - { + VariablesCUDA() { gpu_sigmaSq = NULL; gpu_epsilon_Cn = NULL; gpu_n = NULL; @@ -94,7 +96,7 @@ public: int *gpu_VDW_Kind; int *gpu_isMartini; int *gpu_count; - int *gpu_startAtomIdx; //start atom index of the molecule + int *gpu_startAtomIdx; // start atom index of the molecule double *gpu_rCut, *gpu_rCutSq; double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow;