Iteration 0016 — ab55ea3e8508 (accepted)¶
GitHub commit: ab55ea3e8508 Published branch: fermilink-optimize/lammps-tip4p
Change summary¶
Restore safe hydrogen-neighbor LJ pruning in pair_lj_cut_tip4p_long and reuse cached TIP4P oxygen M/H state across PPPM particle_map()/make_rho()/fieldforce_ik() with memset density zeroing and linear IK brick indexing
Acceptance rationale¶
Correctness passed and 78.12 beats incumbent 79.88 by 2.20%, clearing the 2% acceptance bar.
Guardrails & metrics¶
field |
value |
|---|---|
decision |
ACCEPTED |
correctness |
ok |
correctness mode |
field_tolerances |
hard reject |
no |
guardrail errors |
0 |
incumbent commit |
|
candidate commit |
|
incumbent metric |
79.88 |
candidate metric |
78.12 |
baseline metric |
94.717 |
Δ vs incumbent |
+2.203% (lower-is-better sign) |
changed files |
src/KSPACE/pair_lj_cut_tip4p_long.cpp, src/KSPACE/pppm_tip4p.cpp, src/KSPACE/pppm_tip4p.h |
Diffstat¶
src/KSPACE/pair_lj_cut_tip4p_long.cpp | 10 +--
src/KSPACE/pppm_tip4p.cpp | 131 +++++++++++++++++++++++++---------
src/KSPACE/pppm_tip4p.h | 9 +++
3 files changed, 114 insertions(+), 36 deletions(-)
Diff¶
diff --git a/src/KSPACE/pair_lj_cut_tip4p_long.cpp b/src/KSPACE/pair_lj_cut_tip4p_long.cpp
index c7b794571f..a070ca005d 100644
--- a/src/KSPACE/pair_lj_cut_tip4p_long.cpp
+++ b/src/KSPACE/pair_lj_cut_tip4p_long.cpp
@@ -173,7 +173,7 @@ void PairLJCutTIP4PLong::eval_i(int i, int itype, double qtmp, double xtmp, doub
int vlist[6];
double delx, dely, delz, evdwl, ecoul, fraction, table;
double r, rsq, r2inv, r6inv, forcecoul, forcelj, cforce;
- double factor_coul, factor_lj;
+ double factor_coul;
double grij, expm2, prefactor, t, erfc;
double fO[3], fH[3], fd[3], v[6];
double *x2, *xH1, *xH2;
@@ -206,8 +206,8 @@ void PairLJCutTIP4PLong::eval_i(int i, int itype, double qtmp, double xtmp, doub
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
- factor_lj = special_lj[sbmask(j)];
- factor_coul = special_coul[sbmask(j)];
+ const int sb = sbmask(j);
+ factor_coul = special_coul[sb];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
@@ -216,9 +216,11 @@ void PairLJCutTIP4PLong::eval_i(int i, int itype, double qtmp, double xtmp, doub
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
const bool j_is_oxygen = (jtype == typeO);
+ const bool j_is_hydrogen = (jtype == typeH);
if constexpr (!I_HYDROGEN) {
- if (rsq < cut_ljsq_i[jtype]) {
+ if (!j_is_hydrogen && rsq < cut_ljsq_i[jtype]) {
+ const double factor_lj = special_lj[sb];
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1_i[jtype]*r6inv - lj2_i[jtype]);
diff --git a/src/KSPACE/pppm_tip4p.cpp b/src/KSPACE/pppm_tip4p.cpp
index 631eebc65c..dae2a023f0 100644
--- a/src/KSPACE/pppm_tip4p.cpp
+++ b/src/KSPACE/pppm_tip4p.cpp
@@ -23,8 +23,10 @@
#include "force.h"
#include "error.h"
#include "math_const.h"
+#include "memory.h"
#include <cmath>
+#include <cstring>
using namespace LAMMPS_NS;
using namespace MathConst;
@@ -38,6 +40,22 @@ PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp) : PPPM(lmp)
{
triclinic_support = 1;
tip4pflag = 1;
+
+ tip4p_cache_nmax = 0;
+ tip4p_h1 = nullptr;
+ tip4p_h2 = nullptr;
+ tip4p_cache_valid = nullptr;
+ tip4p_xm = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PPPMTIP4P::~PPPMTIP4P()
+{
+ memory->destroy(tip4p_h1);
+ memory->destroy(tip4p_h2);
+ memory->destroy(tip4p_cache_valid);
+ memory->destroy(tip4p_xm);
}
/* ---------------------------------------------------------------------- */
@@ -52,6 +70,40 @@ void PPPMTIP4P::init()
PPPM::init();
}
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4P::grow_tip4p_cache()
+{
+ if (atom->nmax <= tip4p_cache_nmax) return;
+
+ tip4p_cache_nmax = atom->nmax;
+
+ memory->destroy(tip4p_h1);
+ memory->create(tip4p_h1,tip4p_cache_nmax,"pppm/tip4p:h1");
+ memory->destroy(tip4p_h2);
+ memory->create(tip4p_h2,tip4p_cache_nmax,"pppm/tip4p:h2");
+ memory->destroy(tip4p_cache_valid);
+ memory->create(tip4p_cache_valid,tip4p_cache_nmax,"pppm/tip4p:valid");
+ memory->destroy(tip4p_xm);
+ memory->create(tip4p_xm,tip4p_cache_nmax,3,"pppm/tip4p:xm");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4P::find_M_cached(int i, int &iH1, int &iH2, double *&xM)
+{
+ if (atom->nmax > tip4p_cache_nmax) grow_tip4p_cache();
+
+ if (!tip4p_cache_valid[i]) {
+ find_M(i,tip4p_h1[i],tip4p_h2[i],tip4p_xm[i]);
+ tip4p_cache_valid[i] = 1;
+ }
+
+ iH1 = tip4p_h1[i];
+ iH2 = tip4p_h2[i];
+ xM = tip4p_xm[i];
+}
+
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
@@ -61,19 +113,24 @@ void PPPMTIP4P::init()
void PPPMTIP4P::particle_map()
{
int nx,ny,nz,iH1,iH2;
- double *xi,xM[3];
+ double *xi,*xM;
int *type = atom->type;
double **x = atom->x;
int nlocal = atom->nlocal;
+ if (nlocal > 0) {
+ grow_tip4p_cache();
+ std::memset(tip4p_cache_valid,0,nlocal*sizeof(int));
+ }
+
if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable" + utils::errorurl(6));
int flag = 0;
for (int i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
- find_M(i,iH1,iH2,xM);
+ find_M_cached(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
@@ -110,14 +167,14 @@ void PPPMTIP4P::particle_map()
void PPPMTIP4P::make_rho()
{
- int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
+ int l,m,n,nx,ny,nz,iH1,iH2;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
- double *xi,xM[3];
+ double *xi,*xM;
// clear 3d density array
- FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
- for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
+ FFT_SCALAR *density = &density_brick[nzlo_out][nylo_out][nxlo_out];
+ std::memset(density,0,ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@@ -128,10 +185,13 @@ void PPPMTIP4P::make_rho()
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
+ const int nx_out = nxhi_out - nxlo_out + 1;
+ const int ny_out = nyhi_out - nylo_out + 1;
+ const int nyz_out = nx_out * ny_out;
for (int i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
- find_M(i,iH1,iH2,xM);
+ find_M_cached(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
@@ -146,15 +206,15 @@ void PPPMTIP4P::make_rho()
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
+ int index = ((nz+n-nzlo_out) * nyz_out) + (ny+nlower-nylo_out) * nx_out +
+ (nx+nlower-nxlo_out);
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
- my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- density_brick[mz][my][mx] += x0*rho1d[0][l];
+ density[index+l-nlower] += x0*rho1d[0][l];
}
+ index += nx_out;
}
}
}
@@ -166,12 +226,11 @@ void PPPMTIP4P::make_rho()
void PPPMTIP4P::fieldforce_ik()
{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
+ int i,l,m,n,nx,ny,nz,iH1,iH2;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
double *xi;
- int iH1,iH2;
- double xM[3];
+ double *xM;
double fx,fy,fz;
// loop over my charges, interpolate electric field from nearby grid points
@@ -182,13 +241,22 @@ void PPPMTIP4P::fieldforce_ik()
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
+ const double half_alpha = 0.5 * alpha;
+ const double one_minus_alpha = 1.0 - alpha;
+ const int zforce_active = (slabflag != 2);
+ FFT_SCALAR *vdx = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *vdy = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *vdz = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
int *type = atom->type;
int nlocal = atom->nlocal;
+ const int nx_out = nxhi_out - nxlo_out + 1;
+ const int ny_out = nyhi_out - nylo_out + 1;
+ const int nyz_out = nx_out * ny_out;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
- find_M(i,iH1,iH2,xM);
+ find_M_cached(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
@@ -203,18 +271,18 @@ void PPPMTIP4P::fieldforce_ik()
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
+ int index = ((nz+n-nzlo_out) * nyz_out) + (ny+nlower-nylo_out) * nx_out +
+ (nx+nlower-nxlo_out);
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
- my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
x0 = y0*rho1d[0][l];
- ekx -= x0*vdx_brick[mz][my][mx];
- eky -= x0*vdy_brick[mz][my][mx];
- ekz -= x0*vdz_brick[mz][my][mx];
+ ekx -= x0*vdx[index+l-nlower];
+ eky -= x0*vdy[index+l-nlower];
+ ekz -= x0*vdz[index+l-nlower];
}
+ index += nx_out;
}
}
@@ -224,25 +292,24 @@ void PPPMTIP4P::fieldforce_ik()
if (type[i] != typeO) {
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
- if (slabflag != 2) f[i][2] += qfactor*ekz;
+ if (zforce_active) f[i][2] += qfactor*ekz;
} else {
fx = qfactor * ekx;
fy = qfactor * eky;
fz = qfactor * ekz;
- find_M(i,iH1,iH2,xM);
- f[i][0] += fx*(1 - alpha);
- f[i][1] += fy*(1 - alpha);
- if (slabflag != 2) f[i][2] += fz*(1 - alpha);
+ f[i][0] += fx * one_minus_alpha;
+ f[i][1] += fy * one_minus_alpha;
+ if (zforce_active) f[i][2] += fz * one_minus_alpha;
- f[iH1][0] += 0.5*alpha*fx;
- f[iH1][1] += 0.5*alpha*fy;
- if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
+ f[iH1][0] += half_alpha * fx;
+ f[iH1][1] += half_alpha * fy;
+ if (zforce_active) f[iH1][2] += half_alpha * fz;
- f[iH2][0] += 0.5*alpha*fx;
- f[iH2][1] += 0.5*alpha*fy;
- if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
+ f[iH2][0] += half_alpha * fx;
+ f[iH2][1] += half_alpha * fy;
+ if (zforce_active) f[iH2][2] += half_alpha * fz;
}
}
}
diff --git a/src/KSPACE/pppm_tip4p.h b/src/KSPACE/pppm_tip4p.h
index 6af532c409..abe3418959 100644
--- a/src/KSPACE/pppm_tip4p.h
+++ b/src/KSPACE/pppm_tip4p.h
@@ -27,6 +27,7 @@ namespace LAMMPS_NS {
class PPPMTIP4P : public PPPM {
public:
PPPMTIP4P(class LAMMPS *);
+ ~PPPMTIP4P() override;
void init() override;
protected:
@@ -38,6 +39,14 @@ class PPPMTIP4P : public PPPM {
void slabcorr() override;
private:
+ int tip4p_cache_nmax;
+ int *tip4p_h1;
+ int *tip4p_h2;
+ int *tip4p_cache_valid;
+ double **tip4p_xm;
+
+ void grow_tip4p_cache();
+ void find_M_cached(int, int &, int &, double *&);
void find_M(int, int &, int &, double *);
};