Iteration 0003 — 3a45586c8afa (accepted)¶
GitHub commit: 3a45586c8afa Published branch: fermilink-optimize/lammps-tip4p
Change summary¶
Specialize pair_lj_cut_tip4p_long with compile-time energy/virial/table dispatch, a shared TIP4P site-cache helper, and separate oxygen/non-oxygen i hot paths while preserving the original force writes and ev_tally_tip4p() semantics.
Acceptance rationale¶
Correctness passed with no guardrail errors, and the candidate improves the primary metric from 94.717 to 81.885, a 13.55% gain that clears the 2% acceptance bar.
Guardrails & metrics¶
field |
value |
|---|---|
decision |
ACCEPTED |
correctness |
ok |
correctness mode |
field_tolerances |
hard reject |
no |
guardrail errors |
0 |
incumbent commit |
|
candidate commit |
|
incumbent metric |
94.717 |
candidate metric |
81.885 |
baseline metric |
94.717 |
Δ vs incumbent |
+13.548% (lower-is-better sign) |
changed files |
src/KSPACE/pair_lj_cut_tip4p_long.cpp, src/KSPACE/pair_lj_cut_tip4p_long.h |
Diffstat¶
src/KSPACE/pair_lj_cut_tip4p_long.cpp | 616 +++++++++++++++++-----------------
src/KSPACE/pair_lj_cut_tip4p_long.h | 5 +
2 files changed, 322 insertions(+), 299 deletions(-)
Diff¶
diff --git a/src/KSPACE/pair_lj_cut_tip4p_long.cpp b/src/KSPACE/pair_lj_cut_tip4p_long.cpp
index d9f8daf751..5534584ad0 100644
--- a/src/KSPACE/pair_lj_cut_tip4p_long.cpp
+++ b/src/KSPACE/pair_lj_cut_tip4p_long.cpp
@@ -71,28 +71,14 @@ PairLJCutTIP4PLong::~PairLJCutTIP4PLong()
void PairLJCutTIP4PLong::compute(int eflag, int vflag)
{
- int i,j,ii,jj,inum,jnum,itype,jtype,itable,key;
- int n,vlist[6];
- int iH1,iH2,jH1,jH2;
- double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
- double fraction,table;
- double r,r2inv,r6inv,forcecoul,forcelj,cforce;
- double factor_coul,factor_lj;
- double grij,expm2,prefactor,t,erfc;
- double fO[3],fH[3],fd[3],v[6];
- double *x1,*x2,*xH1,*xH2;
- int *ilist,*jlist,*numneigh,**firstneigh;
- double rsq;
-
- evdwl = ecoul = 0.0;
ev_init(eflag,vflag);
// reallocate hneigh & newsite if necessary
// initialize hneigh[0] to -1 on steps when reneighboring occurred
// initialize hneigh[2] to 0 every step
- int nlocal = atom->nlocal;
- int nall = nlocal + atom->nghost;
+ const int nlocal = atom->nlocal;
+ const int nall = nlocal + atom->nghost;
if (atom->nmax > nmax) {
nmax = atom->nmax;
@@ -102,321 +88,353 @@ void PairLJCutTIP4PLong::compute(int eflag, int vflag)
memory->create(newsite,nmax,3,"pair:newsite");
}
if (neighbor->ago == 0)
- for (i = 0; i < nall; i++) hneigh[i][0] = -1;
- for (i = 0; i < nall; i++) hneigh[i][2] = 0;
+ for (int i = 0; i < nall; i++) hneigh[i][0] = -1;
+ for (int i = 0; i < nall; i++) hneigh[i][2] = 0;
+
+ if (!ncoultablebits) {
+ if (evflag) {
+ if (eflag) {
+ if (vflag) return eval<1,1,1,1>();
+ else return eval<1,1,1,0>();
+ } else {
+ if (vflag) return eval<1,1,0,1>();
+ else return eval<1,1,0,0>();
+ }
+ } else return eval<1,0,0,0>();
+ } else {
+ if (evflag) {
+ if (eflag) {
+ if (vflag) return eval<0,1,1,1>();
+ else return eval<0,1,1,0>();
+ } else {
+ if (vflag) return eval<0,1,0,1>();
+ else return eval<0,1,0,0>();
+ }
+ } else return eval<0,0,0,0>();
+ }
+}
- double **f = atom->f;
+/* ---------------------------------------------------------------------- */
+
+template <int CTABLE, int EVFLAG, int EFLAG, int VFLAG>
+void PairLJCutTIP4PLong::eval()
+{
double **x = atom->x;
double *q = atom->q;
tagint *tag = atom->tag;
int *type = atom->type;
- double *special_coul = force->special_coul;
- double *special_lj = force->special_lj;
- int newton_pair = force->newton_pair;
- double qqrd2e = force->qqrd2e;
- double cut_coulsqplus = (cut_coul+2.0*qdist) * (cut_coul+2.0*qdist);
+ int *ilist = list->ilist;
+ int *numneigh = list->numneigh;
+ int **firstneigh = list->firstneigh;
+ const int inum = list->inum;
+ const int nlocal = atom->nlocal;
+ const double cut_coulsqplus = (cut_coul+2.0*qdist) * (cut_coul+2.0*qdist);
+
+ for (int ii = 0; ii < inum; ii++) {
+ const int i = ilist[ii];
+ const double qtmp = q[i];
+ const double xtmp = x[i][0];
+ const double ytmp = x[i][1];
+ const double ztmp = x[i][2];
+ const int itype = type[i];
+ int *jlist = firstneigh[i];
+ const int jnum = numneigh[i];
- inum = list->inum;
- ilist = list->ilist;
- numneigh = list->numneigh;
- firstneigh = list->firstneigh;
-
- // loop over neighbors of my atoms
+ if (itype == typeO) {
+ int iH1, iH2;
+ double *x1 = tip4p_site(i,iH1,iH2,x,tag,type);
+ eval_i<CTABLE,EVFLAG,EFLAG,VFLAG,1>(i,itype,qtmp,xtmp,ytmp,ztmp,iH1,iH2,x1,jlist,jnum,
+ nlocal,cut_coulsqplus);
+ } else {
+ eval_i<CTABLE,EVFLAG,EFLAG,VFLAG,0>(i,itype,qtmp,xtmp,ytmp,ztmp,-1,-1,x[i],jlist,jnum,
+ nlocal,cut_coulsqplus);
+ }
+ }
+}
- for (ii = 0; ii < inum; ii++) {
- i = ilist[ii];
- qtmp = q[i];
- xtmp = x[i][0];
- ytmp = x[i][1];
- ztmp = x[i][2];
- itype = type[i];
+/* ---------------------------------------------------------------------- */
- // if atom I = water O, set x1 = offset charge site
- // else x1 = x of atom I
+template <int CTABLE, int EVFLAG, int EFLAG, int VFLAG, int I_WATER>
+void PairLJCutTIP4PLong::eval_i(int i, int itype, double qtmp, double xtmp, double ytmp,
+ double ztmp, int iH1, int iH2, double *x1, int *jlist,
+ int jnum, int nlocal, double cut_coulsqplus)
+{
+ int j, jj, jtype, itable, key = 0, n = 0, jH1 = -1, jH2 = -1;
+ int vlist[6];
+ double delx, dely, delz, evdwl, ecoul, fraction, table;
+ double r, rsq, r2inv, r6inv, forcecoul, forcelj, cforce;
+ double factor_coul, factor_lj;
+ double grij, expm2, prefactor, t, erfc;
+ double fO[3], fH[3], fd[3], v[6];
+ double *x2, *xH1, *xH2;
- if (itype == typeO) {
- if (hneigh[i][0] < 0) {
- iH1 = atom->map(tag[i] + 1);
- iH2 = atom->map(tag[i] + 2);
- if (iH1 == -1 || iH2 == -1)
- error->one(FLERR,"TIP4P hydrogen is missing");
- if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
- error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
- // set iH1,iH2 to closest image to O
- iH1 = domain->closest_image(i,iH1);
- iH2 = domain->closest_image(i,iH2);
- compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
- hneigh[i][0] = iH1;
- hneigh[i][1] = iH2;
- hneigh[i][2] = 1;
+ double **f = atom->f;
+ double **x = atom->x;
+ double *q = atom->q;
+ tagint *tag = atom->tag;
+ int *type = atom->type;
+ double *special_coul = force->special_coul;
+ double *special_lj = force->special_lj;
+ const double qqrd2e = force->qqrd2e;
+ const double half_alpha = 0.5 * alpha;
+ const double one_minus_alpha = 1.0 - alpha;
+ double *cut_ljsq_i = cut_ljsq[itype];
+ double *lj1_i = lj1[itype];
+ double *lj2_i = lj2[itype];
+ double *lj3_i = lj3[itype];
+ double *lj4_i = lj4[itype];
+ double *offset_i = offset[itype];
+
+ for (jj = 0; jj < jnum; jj++) {
+ j = jlist[jj];
+ factor_lj = special_lj[sbmask(j)];
+ factor_coul = special_coul[sbmask(j)];
+ j &= NEIGHMASK;
+
+ delx = xtmp - x[j][0];
+ dely = ytmp - x[j][1];
+ delz = ztmp - x[j][2];
+ rsq = delx*delx + dely*dely + delz*delz;
+ jtype = type[j];
+
+ if (rsq < cut_ljsq_i[jtype]) {
+ r2inv = 1.0/rsq;
+ r6inv = r2inv*r2inv*r2inv;
+ forcelj = r6inv * (lj1_i[jtype]*r6inv - lj2_i[jtype]);
+ forcelj *= factor_lj * r2inv;
+
+ f[i][0] += delx*forcelj;
+ f[i][1] += dely*forcelj;
+ f[i][2] += delz*forcelj;
+ f[j][0] -= delx*forcelj;
+ f[j][1] -= dely*forcelj;
+ f[j][2] -= delz*forcelj;
+
+ if constexpr (EFLAG) {
+ evdwl = r6inv*(lj3_i[jtype]*r6inv-lj4_i[jtype]) - offset_i[jtype];
+ evdwl *= factor_lj;
+ } else evdwl = 0.0;
+
+ if constexpr (EVFLAG)
+ ev_tally(i,j,nlocal,1,evdwl,0.0,forcelj,delx,dely,delz);
+ }
- } else {
- iH1 = hneigh[i][0];
- iH2 = hneigh[i][1];
- if (hneigh[i][2] == 0) {
- hneigh[i][2] = 1;
- compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
- }
- }
- x1 = newsite[i];
- } else x1 = x[i];
-
- jlist = firstneigh[i];
- jnum = numneigh[i];
-
- for (jj = 0; jj < jnum; jj++) {
- j = jlist[jj];
- factor_lj = special_lj[sbmask(j)];
- factor_coul = special_coul[sbmask(j)];
- j &= NEIGHMASK;
-
- delx = xtmp - x[j][0];
- dely = ytmp - x[j][1];
- delz = ztmp - x[j][2];
- rsq = delx*delx + dely*dely + delz*delz;
- jtype = type[j];
-
- // LJ interaction based on true rsq
-
- if (rsq < cut_ljsq[itype][jtype]) {
- r2inv = 1.0/rsq;
- r6inv = r2inv*r2inv*r2inv;
- forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
- forcelj *= factor_lj * r2inv;
-
- f[i][0] += delx*forcelj;
- f[i][1] += dely*forcelj;
- f[i][2] += delz*forcelj;
- f[j][0] -= delx*forcelj;
- f[j][1] -= dely*forcelj;
- f[j][2] -= delz*forcelj;
-
- if (eflag) {
- evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
- offset[itype][jtype];
- evdwl *= factor_lj;
- } else evdwl = 0.0;
-
- if (evflag) ev_tally(i,j,nlocal,newton_pair,
- evdwl,0.0,forcelj,delx,dely,delz);
+ if (rsq < cut_coulsqplus) {
+ if constexpr (I_WATER) {
+ if (jtype == typeO) x2 = tip4p_site(j,jH1,jH2,x,tag,type);
+ else x2 = x[j];
+
+ delx = x1[0] - x2[0];
+ dely = x1[1] - x2[1];
+ delz = x1[2] - x2[2];
+ rsq = delx*delx + dely*dely + delz*delz;
+ } else if (jtype == typeO) {
+ x2 = tip4p_site(j,jH1,jH2,x,tag,type);
+ delx = x1[0] - x2[0];
+ dely = x1[1] - x2[1];
+ delz = x1[2] - x2[2];
+ rsq = delx*delx + dely*dely + delz*delz;
}
- // adjust rsq and delxyz for off-site O charge(s) if necessary
- // but only if they are within reach
-
- if (rsq < cut_coulsqplus) {
- if (itype == typeO || jtype == typeO) {
-
- // if atom J = water O, set x2 = offset charge site
- // else x2 = x of atom J
-
- if (jtype == typeO) {
- if (hneigh[j][0] < 0) {
- jH1 = atom->map(tag[j] + 1);
- jH2 = atom->map(tag[j] + 2);
- if (jH1 == -1 || jH2 == -1)
- error->one(FLERR,"TIP4P hydrogen is missing");
- if (atom->type[jH1] != typeH || atom->type[jH2] != typeH)
- error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
- // set jH1,jH2 to closest image to O
- jH1 = domain->closest_image(j,jH1);
- jH2 = domain->closest_image(j,jH2);
- compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
- hneigh[j][0] = jH1;
- hneigh[j][1] = jH2;
- hneigh[j][2] = 1;
-
- } else {
- jH1 = hneigh[j][0];
- jH2 = hneigh[j][1];
- if (hneigh[j][2] == 0) {
- hneigh[j][2] = 1;
- compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
- }
- }
- x2 = newsite[j];
- } else x2 = x[j];
-
- delx = x1[0] - x2[0];
- dely = x1[1] - x2[1];
- delz = x1[2] - x2[2];
- rsq = delx*delx + dely*dely + delz*delz;
- }
-
- // Coulombic interaction based on modified rsq
-
- if (rsq < cut_coulsq) {
- r2inv = 1 / rsq;
- if (!ncoultablebits || rsq <= tabinnersq) {
- r = sqrt(rsq);
- grij = g_ewald * r;
- expm2 = exp(-grij*grij);
- t = 1.0 / (1.0 + EWALD_P*grij);
- erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
- prefactor = qqrd2e * qtmp*q[j]/r;
- forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
- if (factor_coul < 1.0) {
- forcecoul -= (1.0-factor_coul)*prefactor;
- }
- } else {
- union_int_float_t rsq_lookup;
- rsq_lookup.f = rsq;
- itable = rsq_lookup.i & ncoulmask;
- itable >>= ncoulshiftbits;
- fraction = ((double) rsq_lookup.f - rtable[itable]) * drtable[itable];
- table = ftable[itable] + fraction*dftable[itable];
- forcecoul = qtmp*q[j] * table;
- if (factor_coul < 1.0) {
- table = ctable[itable] + fraction*dctable[itable];
- prefactor = qtmp*q[j] * table;
- forcecoul -= (1.0-factor_coul)*prefactor;
- }
+ if (rsq < cut_coulsq) {
+ r2inv = 1.0 / rsq;
+ if (CTABLE || rsq <= tabinnersq) {
+ r = sqrt(rsq);
+ grij = g_ewald * r;
+ expm2 = exp(-grij*grij);
+ t = 1.0 / (1.0 + EWALD_P*grij);
+ erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+ prefactor = qqrd2e * qtmp*q[j]/r;
+ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+ if (factor_coul < 1.0) {
+ forcecoul -= (1.0-factor_coul)*prefactor;
}
+ } else {
+ union_int_float_t rsq_lookup;
+ rsq_lookup.f = rsq;
+ itable = rsq_lookup.i & ncoulmask;
+ itable >>= ncoulshiftbits;
+ fraction = ((double) rsq_lookup.f - rtable[itable]) * drtable[itable];
+ table = ftable[itable] + fraction*dftable[itable];
+ forcecoul = qtmp*q[j] * table;
+ if (factor_coul < 1.0) {
+ table = ctable[itable] + fraction*dctable[itable];
+ prefactor = qtmp*q[j] * table;
+ forcecoul -= (1.0-factor_coul)*prefactor;
+ }
+ }
- cforce = forcecoul * r2inv;
-
- // if i,j are not O atoms, force is applied directly
- // if i or j are O atoms, force is on fictitious atom & partitioned
- // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
- // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
- // preserves total force and torque on water molecule
- // virial = sum(r x F) where each water's atoms are near xi and xj
- // vlist stores 2,4,6 atoms whose forces contribute to virial
+ cforce = forcecoul * r2inv;
+ if constexpr (EVFLAG) {
n = 0;
- key = 0;
-
- if (itype != typeO) {
- f[i][0] += delx * cforce;
- f[i][1] += dely * cforce;
- f[i][2] += delz * cforce;
-
- if (vflag) {
- v[0] = x[i][0] * delx * cforce;
- v[1] = x[i][1] * dely * cforce;
- v[2] = x[i][2] * delz * cforce;
- v[3] = x[i][0] * dely * cforce;
- v[4] = x[i][0] * delz * cforce;
- v[5] = x[i][1] * delz * cforce;
- }
- vlist[n++] = i;
+ key = I_WATER;
+ }
- } else {
- key++;
-
- fd[0] = delx*cforce;
- fd[1] = dely*cforce;
- fd[2] = delz*cforce;
-
- fO[0] = fd[0]*(1 - alpha);
- fO[1] = fd[1]*(1 - alpha);
- fO[2] = fd[2]*(1 - alpha);
-
- fH[0] = 0.5 * alpha * fd[0];
- fH[1] = 0.5 * alpha * fd[1];
- fH[2] = 0.5 * alpha * fd[2];
-
- f[i][0] += fO[0];
- f[i][1] += fO[1];
- f[i][2] += fO[2];
-
- f[iH1][0] += fH[0];
- f[iH1][1] += fH[1];
- f[iH1][2] += fH[2];
-
- f[iH2][0] += fH[0];
- f[iH2][1] += fH[1];
- f[iH2][2] += fH[2];
-
- if (vflag) {
- xH1 = x[iH1];
- xH2 = x[iH2];
- v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
- v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
- v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
- v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
- v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
- v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
- }
+ if constexpr (!I_WATER) {
+ f[i][0] += delx * cforce;
+ f[i][1] += dely * cforce;
+ f[i][2] += delz * cforce;
+
+ if constexpr (VFLAG) {
+ v[0] = xtmp * delx * cforce;
+ v[1] = ytmp * dely * cforce;
+ v[2] = ztmp * delz * cforce;
+ v[3] = xtmp * dely * cforce;
+ v[4] = xtmp * delz * cforce;
+ v[5] = ytmp * delz * cforce;
+ }
+ if constexpr (EVFLAG) vlist[n++] = i;
+
+ } else {
+ fd[0] = delx*cforce;
+ fd[1] = dely*cforce;
+ fd[2] = delz*cforce;
+
+ fO[0] = fd[0] * one_minus_alpha;
+ fO[1] = fd[1] * one_minus_alpha;
+ fO[2] = fd[2] * one_minus_alpha;
+
+ fH[0] = half_alpha * fd[0];
+ fH[1] = half_alpha * fd[1];
+ fH[2] = half_alpha * fd[2];
+
+ f[i][0] += fO[0];
+ f[i][1] += fO[1];
+ f[i][2] += fO[2];
+
+ f[iH1][0] += fH[0];
+ f[iH1][1] += fH[1];
+ f[iH1][2] += fH[2];
+
+ f[iH2][0] += fH[0];
+ f[iH2][1] += fH[1];
+ f[iH2][2] += fH[2];
+
+ if constexpr (VFLAG) {
+ xH1 = x[iH1];
+ xH2 = x[iH2];
+ v[0] = xtmp*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
+ v[1] = ytmp*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
+ v[2] = ztmp*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
+ v[3] = xtmp*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
+ v[4] = xtmp*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
+ v[5] = ytmp*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
+ }
+ if constexpr (EVFLAG) {
vlist[n++] = i;
vlist[n++] = iH1;
vlist[n++] = iH2;
}
+ }
- if (jtype != typeO) {
- f[j][0] -= delx * cforce;
- f[j][1] -= dely * cforce;
- f[j][2] -= delz * cforce;
-
- if (vflag) {
- v[0] -= x[j][0] * delx * cforce;
- v[1] -= x[j][1] * dely * cforce;
- v[2] -= x[j][2] * delz * cforce;
- v[3] -= x[j][0] * dely * cforce;
- v[4] -= x[j][0] * delz * cforce;
- v[5] -= x[j][1] * delz * cforce;
- }
- vlist[n++] = j;
-
- } else {
- key += 2;
-
- fd[0] = -delx*cforce;
- fd[1] = -dely*cforce;
- fd[2] = -delz*cforce;
-
- fO[0] = fd[0]*(1 - alpha);
- fO[1] = fd[1]*(1 - alpha);
- fO[2] = fd[2]*(1 - alpha);
-
- fH[0] = 0.5 * alpha * fd[0];
- fH[1] = 0.5 * alpha * fd[1];
- fH[2] = 0.5 * alpha * fd[2];
-
- f[j][0] += fO[0];
- f[j][1] += fO[1];
- f[j][2] += fO[2];
-
- f[jH1][0] += fH[0];
- f[jH1][1] += fH[1];
- f[jH1][2] += fH[2];
-
- f[jH2][0] += fH[0];
- f[jH2][1] += fH[1];
- f[jH2][2] += fH[2];
-
- if (vflag) {
- xH1 = x[jH1];
- xH2 = x[jH2];
- v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
- v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
- v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
- v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
- v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
- v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
- }
+ if (jtype != typeO) {
+ f[j][0] -= delx * cforce;
+ f[j][1] -= dely * cforce;
+ f[j][2] -= delz * cforce;
+
+ if constexpr (VFLAG) {
+ v[0] -= x[j][0] * delx * cforce;
+ v[1] -= x[j][1] * dely * cforce;
+ v[2] -= x[j][2] * delz * cforce;
+ v[3] -= x[j][0] * dely * cforce;
+ v[4] -= x[j][0] * delz * cforce;
+ v[5] -= x[j][1] * delz * cforce;
+ }
+ if constexpr (EVFLAG) vlist[n++] = j;
+
+ } else {
+ if constexpr (EVFLAG) key += 2;
+
+ fd[0] = -delx*cforce;
+ fd[1] = -dely*cforce;
+ fd[2] = -delz*cforce;
+
+ fO[0] = fd[0] * one_minus_alpha;
+ fO[1] = fd[1] * one_minus_alpha;
+ fO[2] = fd[2] * one_minus_alpha;
+
+ fH[0] = half_alpha * fd[0];
+ fH[1] = half_alpha * fd[1];
+ fH[2] = half_alpha * fd[2];
+
+ f[j][0] += fO[0];
+ f[j][1] += fO[1];
+ f[j][2] += fO[2];
+
+ f[jH1][0] += fH[0];
+ f[jH1][1] += fH[1];
+ f[jH1][2] += fH[2];
+
+ f[jH2][0] += fH[0];
+ f[jH2][1] += fH[1];
+ f[jH2][2] += fH[2];
+
+ if constexpr (VFLAG) {
+ xH1 = x[jH1];
+ xH2 = x[jH2];
+ v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
+ v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
+ v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
+ v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
+ v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
... (88 more lines; see download link above)