Iteration 0003 — 380bf2b0146b (accepted)

GitHub commit: 380bf2b0146b Published branch: fermilink-optimize/lammps-neighbor

Change summary

Add a benchmark-specific fast path in NPairBin<1,1,0,0,0>::build() for standard molecular systems without neighbor exclusions, removing repeated special_i setup and exclude checks from the hot orthogonal half/bin/newton loops.

Acceptance rationale

Candidate improves weighted_median_neigh_seconds from 0.33535 to 0.32842 with correctness intact, so it becomes the new incumbent.

Guardrails & metrics

field

value

decision

ACCEPTED

correctness

ok

correctness mode

field_tolerances

hard reject

no

guardrail errors

0

incumbent commit

32b495fdb66b

candidate commit

380bf2b0146b

incumbent metric

0.33535

candidate metric

0.32842

baseline metric

0.36158

Δ vs incumbent

+2.066% (lower-is-better sign)

changed files

src/npair_bin.cpp

Diffstat

src/npair_bin.cpp | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+)

Diff

download full diff

diff --git a/src/npair_bin.cpp b/src/npair_bin.cpp
index 490bb45169..bd1fc1d338 100644
--- a/src/npair_bin.cpp
+++ b/src/npair_bin.cpp
@@ -69,6 +69,82 @@ void NPairBin<1, 1, 0, 0, 0>::build(NeighList *list)
   int inum = 0;
   ipage->reset();

+  if (!exclude && molecular == Atom::MOLECULAR) {
+    for (i = 0; i < nlocal; i++) {
+      n = 0;
+      neighptr = ipage->vget();
+
+      itype = type[i];
+      const double *const cutneighsq_i = cutneighsq[itype];
+      const tagint *const special_i = special[i];
+      const int *const nspecial_i = nspecial[i];
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+
+      ibin = atom2bin[i];
+
+      for (j = bins[i]; j >= 0; j = bins[j]) {
+        const double *const xj = x[j];
+        if (j >= nlocal) {
+          if (xj[2] < ztmp) continue;
+          if (xj[2] == ztmp) {
+            if (xj[1] < ytmp) continue;
+            if ((xj[1] == ytmp) && (xj[0] < xtmp)) continue;
+          }
+        }
+
+        jtype = type[j];
+
+        delx = xtmp - xj[0];
+        dely = ytmp - xj[1];
+        delz = ztmp - xj[2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (rsq > cutneighsq_i[jtype]) continue;
+
+        which = find_special(special_i, nspecial_i, tag[j]);
+        if (which == 0)
+          neighptr[n++] = j;
+        else if (domain->minimum_image_check(delx, dely, delz))
+          neighptr[n++] = j;
+        else if (which > 0)
+          neighptr[n++] = j ^ (which << SBBITS);
+      }
+
+      for (k = 1; k < nstencil; k++) {
+        for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
+          const double *const xj = x[j];
+          jtype = type[j];
+
+          delx = xtmp - xj[0];
+          dely = ytmp - xj[1];
+          delz = ztmp - xj[2];
+          rsq = delx * delx + dely * dely + delz * delz;
+
+          if (rsq > cutneighsq_i[jtype]) continue;
+
+          which = find_special(special_i, nspecial_i, tag[j]);
+          if (which == 0)
+            neighptr[n++] = j;
+          else if (domain->minimum_image_check(delx, dely, delz))
+            neighptr[n++] = j;
+          else if (which > 0)
+            neighptr[n++] = j ^ (which << SBBITS);
+        }
+      }
+
+      ilist[inum++] = i;
+      firstneigh[i] = neighptr;
+      numneigh[i] = n;
+      ipage->vgot(n);
+      if (ipage->status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36));
+    }
+
+    list->inum = inum;
+    return;
+  }
+
   for (i = 0; i < nlocal; i++) {
     n = 0;
     neighptr = ipage->vget();