schema_version: 1
benchmark_id: lammps-tip4p-water-nve-neighbor-bin
package_id: lammps
goal_context:
  target: Optimize the TIP4P long-range water NVE neighbor rebuild hot path for `neighbor 2.0 bin`, especially rebuild-time binning and pair-list generation.
  initial_hypothesis: Start by reducing redundant memory traffic and branch overhead in `Neighbor::build()` binning/setup and the orthogonal half-list stencil walk in `NPairBin::build()` while preserving rebuild cadence, exclusions, and list completeness.
  intent_level: guidance
repo:
  editable_paths:
    - src/neighbor.cpp
    - src/neighbor.h
    - src/npair_bin.cpp
    - src/npair_bin.h
  immutable_paths:
    - .fermilink-optimize/**
    - skills/**
    - /anvil/scratch/x-tli22/fermilink_optimize/project_lammps/input_cpp_lammps/**
    - examples/**
    - unittest/**
    - cmake/**
    - src/** except src/neighbor.cpp, src/neighbor.h, src/npair_bin.cpp, src/npair_bin.h
campaign:
  max_iterations: 120
  stop_on_consecutive_rejections: 30
worker:
  max_iterations: 8
  wait_seconds: 1
controller:
  timeout_seconds: 1800
  warmup_runs: 1
  measured_runs: 3
  objective:
    primary_metric: weighted_median_neigh_seconds
    direction: minimize
    min_relative_improvement: 0.02
  reject_on:
    - crash
    - timeout
    - missing_metrics
    - correctness_failure
correctness:
  mode: field_tolerances
  require_all_cases_converged: true
  field_tolerances:
    - field: thermo_steps
      abs_delta: 0.0
    - field: etotal_series
      relative_delta: 1.0e-8
    - field: energy_drift_per_atom_per_step
      abs_delta: 1.0e-8
    - field: pe_series
      relative_delta: 1.0e-8
    - field: ke_series
      relative_delta: 1.0e-8
    - field: temp_series
      relative_delta: 1.0e-8
    - field: press_series
      relative_delta: 1.0e-6
    - field: density_series
      relative_delta: 1.0e-10
    - field: force_sample_atom_ids
      abs_delta: 0.0
    - field: forces_xyz
      rms_delta: 1.0e-8
      abs_delta: 1.0e-7
    - field: atom_count
      abs_delta: 0.0
    - field: neighbor_list_builds
      abs_delta: 0.0
    - field: dangerous_builds
      abs_delta: 0.0
runtime:
  mode: direct
  command:
    - python3
    - .fermilink-optimize/autogen/benchmark_runner.py
    - --benchmark
    - '{benchmark}'
    - --emit-json
  env:
    OMP_NUM_THREADS: '1'
    OMP_PROC_BIND: 'close'
    OMP_PLACES: 'cores'
    OPENBLAS_NUM_THREADS: '1'
    MKL_NUM_THREADS: '1'
    FERMILINK_GOAL_INPUT_ROOT: .fermilink-optimize/inputs/all
  pre_commands:
    - - bash
      - -lc
      - mkdir -p build && cd build && cmake -C ../cmake/presets/most.cmake -C ../cmake/presets/nolib.cmake -D PKG_GPU=off ../cmake
    - - bash
      - -lc
      - cd build && cmake --build . -j4
split:
  train_case_ids:
    - train-16r-long
    - train-32r-long
    - train-32r-short
cases:
  - id: train-16r-long
    weight: 1.0
    description: Longer-run 16-rank training case with repeated rebuilds and lower communication noise.
    input_script: in.tip4p_nve_long
    data_file: water_216_data.lmp
    mpi_ranks: 16
    omp_threads: 1
  - id: train-32r-long
    weight: 1.0
    description: Longer-run 32-rank training case for a second domain decomposition.
    input_script: in.tip4p_nve_long
    data_file: water_216_data.lmp
    mpi_ranks: 32
    omp_threads: 1
  - id: train-32r-short
    weight: 1.0
    description: Shorter-turnaround 32-rank training case with the same neighbor settings.
    input_script: in.tip4p_nve
    data_file: water_216_data.lmp
    mpi_ranks: 32
    omp_threads: 1
  - id: test-16r-short
    weight: 1.0
    description: Held-out lower-rank validation case.
    input_script: in.tip4p_nve
    data_file: water_216_data.lmp
    mpi_ranks: 16
    omp_threads: 1
  - id: test-64r-short
    weight: 1.0
    description: Held-out scaling-sensitive validation case.
    input_script: in.tip4p_nve
    data_file: water_216_data.lmp
    mpi_ranks: 64
    omp_threads: 1