diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 04049a330..29e872fae 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -82,7 +82,7 @@ jobs:
- name: Checkout bpfilter
uses: actions/checkout@v2
- name: Configure the build
- run: cmake -S $GITHUB_WORKSPACE -B $GITHUB_WORKSPACE/build -DNO_BENCHMARKS=1
+ run: cmake -S $GITHUB_WORKSPACE -B $GITHUB_WORKSPACE/build
- name: Build all
run: make -C $GITHUB_WORKSPACE/build -j `nproc`
@@ -177,7 +177,8 @@ jobs:
BENCH_INCLUDE=""
BENCH_FAIL_ON=""
fi
- tools/benchmarks/bfbencher \
+ tests/benchmarks/bfbencher \
+ history \
--since 30bd49f \
--until $BENCH_UNTIL \
$BENCH_INCLUDE \
diff --git a/CLAUDE.md b/CLAUDE.md
index fe4f209b1..ecf74b6c2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -68,7 +68,7 @@ make -C build doc
```
**Build options:**
-- `-DNO_DOCS=1`, `-DNO_TESTS=1`, `-DNO_CHECKS=1`, `-DNO_BENCHMARKS=1`
+- `-DNO_DOCS=1`, `-DNO_TESTS=1`, `-DNO_CHECKS=1`
## Code style
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e787ed8a..d3b5f48c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,7 +32,6 @@ include(GNUInstallDirs)
option(NO_DOCS "Disable documentation generation" 0)
option(NO_TESTS "Disable unit, end-to-end, and integration tests" 0)
option(NO_CHECKS "Disable the check target (clang-tidy and clang-format" 0)
-option(NO_BENCHMARKS "Disable the benchmark" 0)
option(WITH_COVERAGE "Build with code coverage support. Disabled by default" 0)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -100,7 +99,3 @@ if (NOT ${NO_TESTS})
enable_testing()
add_subdirectory(tests)
endif ()
-
-if (NOT ${NO_BENCHMARKS})
- add_subdirectory(tools/benchmarks)
-endif ()
diff --git a/README.md b/README.md
index 4705845a1..feab00460 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@ sudo dnf install -y bpfilter bpfilter-devel
sudo dnf install -y clang cmake gcc libbpf-devel bison flex sed xxd
# Configure the project and build bpfilter
-cmake -S $SOURCES_DIR -B $BUILD_DIR -DNO_DOCS=ON -DNO_TESTS=ON -DNO_CHECKS=ON -DNO_BENCHMARKS=ON
+cmake -S $SOURCES_DIR -B $BUILD_DIR -DNO_DOCS=ON -DNO_TESTS=ON -DNO_CHECKS=ON
make -C $BUILD_DIR
```
diff --git a/derivation.nix b/derivation.nix
index 02da8cdb6..82b736e2b 100644
--- a/derivation.nix
+++ b/derivation.nix
@@ -66,7 +66,6 @@ in
"-DNO_DOCS=1"
"-DNO_TESTS=1"
"-DNO_CHECKS=1"
- "-DNO_BENCHMARKS=1"
];
# We do not run the unit tests because the nix build sandbox doesn't
diff --git a/doc/developers/build.rst b/doc/developers/build.rst
index f979cac32..25e4fa1d6 100644
--- a/doc/developers/build.rst
+++ b/doc/developers/build.rst
@@ -95,9 +95,8 @@ You can then use CMake to generate the build system:
The usual CMake options are allowed (e.g. ``CMAKE_BUILD_TYPE``, ``CMAKE_INSTALL_PREFIX``...). The build configuration is modular, so you're free to enable/disable some parts of the projects according to your needs:
- ``-DNO_DOCS``: disable the documentation, including the coverage and benchmarks report.
-- ``-DNO_TESTS``: disable unit tests, end-to-end tests, and integration tests.
+- ``-DNO_TESTS``: disable all tests.
- ``-DNO_CHECKS``: disable style check and static analyzer.
-- ``-DNO_BENCHMARKS``: disable benchmarks.
A full configuration (without any part disabled) will provide the following targets:
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index b3efbb8f3..4c8d0d6f9 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -7,6 +7,7 @@ add_subdirectory(e2e)
add_subdirectory(fuzz)
add_subdirectory(integration)
add_subdirectory(check)
+add_subdirectory(benchmarks)
add_custom_target(test_bin
DEPENDS unit_bin e2e_bin fuzz_parser
diff --git a/tools/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt
similarity index 98%
rename from tools/benchmarks/CMakeLists.txt
rename to tests/benchmarks/CMakeLists.txt
index a9fad4f2a..e5d3b54a3 100644
--- a/tools/benchmarks/CMakeLists.txt
+++ b/tests/benchmarks/CMakeLists.txt
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
-enable_language(CXX)
-
find_package(benchmark REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(bpf REQUIRED IMPORTED_TARGET libbpf)
diff --git a/tools/benchmarks/benchmark.cpp b/tests/benchmarks/benchmark.cpp
similarity index 100%
rename from tools/benchmarks/benchmark.cpp
rename to tests/benchmarks/benchmark.cpp
diff --git a/tools/benchmarks/benchmark.hpp b/tests/benchmarks/benchmark.hpp
similarity index 100%
rename from tools/benchmarks/benchmark.hpp
rename to tests/benchmarks/benchmark.hpp
diff --git a/tools/benchmarks/bfbencher b/tests/benchmarks/bfbencher
similarity index 76%
rename from tools/benchmarks/bfbencher
rename to tests/benchmarks/bfbencher
index 08f62939a..9fe089403 100755
--- a/tools/benchmarks/bfbencher
+++ b/tests/benchmarks/bfbencher
@@ -36,8 +36,8 @@ DEFAULT_LAST_COMMIT_REF = "wip"
DEFAULT_SOURCE_PATH = pathlib.Path(".")
DEFAULT_CACHE_PATH = pathlib.Path(".cache/bfbencher")
DEFAULT_USERNAME = getpass.getuser()
-DEFAULT_REPORT_TEMPLATE_PATH = pathlib.Path("tools/benchmarks/results.html.j2")
-DEFAULT_PR_REPORT_TEMPLATE_PATH = pathlib.Path("tools/benchmarks/summary.html.j2")
+DEFAULT_REPORT_TEMPLATE_PATH = pathlib.Path("tests/benchmarks/results.html.j2")
+DEFAULT_PR_REPORT_TEMPLATE_PATH = pathlib.Path("tests/benchmarks/summary.html.j2")
DEFAULT_HOST = [socket.gethostname(), "localhost"]
SHORT_SHA_LEN = 7
@@ -53,6 +53,7 @@ class Stats:
self.n_failures = 0
self.n_cache_hits = 0
self.n_cache_misses = 0
+ self.n_benchmark_errors = 0
def success(self, from_cache: bool = False) -> None:
if from_cache:
@@ -111,6 +112,47 @@ class Renderer:
self.console.print(table)
+ def print_compare_report(
+ self,
+ rows: list[Report.CompareRow],
+ base_sha: str,
+ ref_sha: str,
+ ) -> None:
+ def format_pct(pct: float) -> str:
+ color = "green" if pct < 0 else ("red" if pct > 0 else "white")
+ return f"[{color}]{pct:+.1f}%[/{color}]"
+
+ table = rich.table.Table(
+ title=f"{base_sha[:SHORT_SHA_LEN]} → {ref_sha[:SHORT_SHA_LEN]}",
+ show_header=True,
+ )
+ table.add_column("Benchmark", style="cyan")
+ table.add_column("Base", justify="right")
+ table.add_column("Ref", justify="right")
+ table.add_column("ΔTime", justify="right")
+ table.add_column("ΔTime%", justify="right")
+ table.add_column("Base Insn", justify="right")
+ table.add_column("Ref Insn", justify="right")
+ table.add_column("ΔInsn", justify="right")
+ table.add_column("ΔInsn%", justify="right")
+
+ for row in rows:
+ table.add_row(
+ row.name,
+ row.base_time_str,
+ row.ref_time_str,
+ row.delta_time_str,
+ format_pct(row.delta_time_pct),
+ str(row.base_insn) if row.base_insn is not None else "-",
+ str(row.ref_insn) if row.ref_insn is not None else "-",
+ f"{row.delta_insn:+d}" if row.delta_insn is not None else "-",
+ format_pct(row.delta_insn_pct)
+ if row.delta_insn_pct is not None
+ else "-",
+ )
+
+ self.console.print(table)
+
renderer: Renderer = Renderer()
@@ -221,6 +263,10 @@ class Benchmark:
"nInsn": Analyzer(nInsns[-1], nInsns[-n - 1 : -1]) if nInsns else None,
}
+ @property
+ def results(self) -> list[Result]:
+ return list(self._results)
+
@property
def last(self) -> Result | None:
return self._results[-1] if self._results else None
@@ -548,10 +594,56 @@ class FilesystemSource:
self._local = local_src_dir
shutil.copytree(self._path, self._local, dirs_exist_ok=True)
+ self._detach_if_worktree()
self._repo: git.Repo = git.Repo(self._local)
self._retry_all: bool = False
self._retry_failed: bool = False
+ def _detach_if_worktree(self) -> None:
+ """Convert a copied git worktree into a standalone repository.
+
+ In a git worktree the .git entry is a file containing a gitdir pointer
+ to the original repo's worktree-specific state. shutil.copytree copies
+ that file verbatim, so all git operations on the copy would modify the
+ original worktree's HEAD and index. This method detects that case and
+ replaces the .git file with a self-contained .git directory built from
+ the worktree-specific state (HEAD, index) and the shared commondir
+ (objects, refs, config, …).
+ """
+ git_entry = self._local / ".git"
+ if not git_entry.is_file():
+ return
+
+ content = git_entry.read_text().strip()
+ if not content.startswith("gitdir:"):
+ return
+
+ wt_gitdir = pathlib.Path(content.split(":", 1)[1].strip())
+ if not wt_gitdir.is_absolute():
+ wt_gitdir = (self._local / wt_gitdir).resolve()
+
+ commondir_file = wt_gitdir / "commondir"
+ if commondir_file.exists():
+ commondir = (wt_gitdir / commondir_file.read_text().strip()).resolve()
+ else:
+ commondir = wt_gitdir
+
+ # Replace the .git pointer file with a full standalone git directory.
+ git_entry.unlink()
+ shutil.copytree(commondir, self._local / ".git")
+
+ # Drop worktrees/ — those entries are specific to the original repo.
+ worktrees_dir = self._local / ".git" / "worktrees"
+ if worktrees_dir.exists():
+ shutil.rmtree(worktrees_dir)
+
+ # Apply the worktree-specific HEAD and index, which track the state of
+ # this worktree and differ from the main worktree's equivalents.
+ for fname in ("HEAD", "index"):
+ src = wt_gitdir / fname
+ if src.exists():
+ shutil.copy2(src, self._local / ".git" / fname)
+
@property
def local(self) -> pathlib.Path:
"""Local path to the source repository copy."""
@@ -749,7 +841,7 @@ class BenchmarkContext:
return self.build_dir / "output/sbin/bfcli"
def configure(
- self, doc: bool = False, tests: bool = False, checks: bool = False
+ self, doc: bool = False, checks: bool = False
) -> bool:
cmd: list[str] = [
"cmake",
@@ -761,8 +853,6 @@ class BenchmarkContext:
if not doc:
cmd += ["-DNO_DOCS=1"]
- if not tests:
- cmd += ["-DNO_TESTS=1"]
if not checks:
cmd += ["-DNO_CHECKS=1"]
@@ -884,6 +974,23 @@ class Report:
runtime_ns: float = 0 # Runtime in nanoseconds for sorting
insn_count: int = 0 # Instruction count for sorting
+ @dataclasses.dataclass
+ class CompareRow:
+ """Prepared data for a single benchmark row in compare mode."""
+
+ name: str
+ label: str
+ base_time_str: str
+ ref_time_str: str
+ delta_time_str: str
+ delta_time_pct: float
+ base_insn: int | None
+ ref_insn: int | None
+ delta_insn: int | None
+ delta_insn_pct: float | None
+ base_time_ns: float
+ ref_time_ns: float
+
def __init__(self, history: History):
self._history = history
@@ -1023,6 +1130,120 @@ class Report:
rows = self._get_benchmark_rows(terms)
renderer.print_report(rows, terms)
+ def _get_compare_rows(self, base_sha: str, ref_sha: str) -> list[CompareRow]:
+ rows = []
+ for benchmark in self._history.sorted_benchmarks():
+ base_result = next(
+ (r for r in benchmark.results if r.commit_sha == base_sha), None
+ )
+ ref_result = next(
+ (r for r in benchmark.results if r.commit_sha == ref_sha), None
+ )
+ if not base_result or not ref_result:
+ continue
+
+ base_ns: float = float(base_result.time.to("ns").magnitude) # type: ignore[union-attr]
+ ref_ns: float = float(ref_result.time.to("ns").magnitude) # type: ignore[union-attr]
+ delta_ns: float = ref_ns - base_ns
+ delta_pct: float = (delta_ns / base_ns * 100) if base_ns else 0.0
+
+ base_insn = int(base_result.nInsn) if base_result.nInsn else None
+ ref_insn = int(ref_result.nInsn) if ref_result.nInsn else None
+ if base_insn is not None and ref_insn is not None:
+ delta_insn: int | None = int(ref_insn) - int(base_insn)
+ delta_insn_pct: float | None = (
+ (delta_insn / base_insn * 100) if base_insn else 0
+ )
+ else:
+ delta_insn = None
+ delta_insn_pct = None
+
+ rows.append(
+ Report.CompareRow(
+ name=benchmark.name,
+ label=benchmark.label,
+ base_time_str=f"{base_result.time:~.2f}", # type: ignore[union-attr]
+ ref_time_str=f"{ref_result.time:~.2f}", # type: ignore[union-attr]
+ delta_time_str=f"{delta_ns:+.2f} ns",
+ delta_time_pct=delta_pct,
+ base_insn=base_insn,
+ ref_insn=ref_insn,
+ delta_insn=delta_insn,
+ delta_insn_pct=delta_insn_pct,
+ base_time_ns=base_ns,
+ ref_time_ns=ref_ns,
+ )
+ )
+
+ return rows
+
+ def print_compare_report(self, base_sha: str, ref_sha: str) -> None:
+ rows = self._get_compare_rows(base_sha, ref_sha)
+ renderer.print_compare_report(rows, base_sha, ref_sha)
+
+ def write_compare_json(
+ self,
+ path: pathlib.Path,
+ base_sha: str,
+ ref_sha: str,
+ host: str,
+ ) -> None:
+ rows = self._get_compare_rows(base_sha, ref_sha)
+ data = {
+ "base": base_sha,
+ "ref": ref_sha,
+ "host": host,
+ "benchmarks": [
+ {
+ "name": r.name,
+ "base_time_ns": r.base_time_ns,
+ "ref_time_ns": r.ref_time_ns,
+ "delta_time_ns": r.ref_time_ns - r.base_time_ns,
+ "delta_time_pct": r.delta_time_pct,
+ "base_insn": r.base_insn,
+ "ref_insn": r.ref_insn,
+ "delta_insn": r.delta_insn,
+ "delta_insn_pct": r.delta_insn_pct,
+ }
+ for r in rows
+ ],
+ }
+
+ with open(path, "w") as f:
+ json.dump(data, f, indent=2)
+
+
+def _benchmark_commits(executor: Executor, args: argparse.Namespace) -> None:
+ for ctx in BenchmarkContext.commits(executor):
+ if not ctx.configure():
+ continue
+
+ if not ctx.make("bfcli"):
+ continue
+
+ if not ctx.make("benchmark_bin"):
+ continue
+
+ if not ctx.run_benchmark(
+ args.bind_node, args.no_preempt, args.cpu_pin, args.slice
+ ):
+ continue
+
+ results = ctx.results
+ if not results:
+ executor.log(f"could not find {ctx.results_path}")
+ continue
+
+ for r in results:
+ if r.get("error_occurred"):
+ executor.stats.n_benchmark_errors += 1
+ executor.log(
+ f"[red bold]benchmark error: {r['name']}: {r.get('error_message', '')}[/]"
+ )
+
+ executor.add_results(ctx.commit, results)
+ executor.log("Done!")
+
def run_benchmarks(args: argparse.Namespace):
executor = (
@@ -1072,6 +1293,9 @@ def run_benchmarks(args: argparse.Namespace):
)
report.print_report([20])
+ if args.fail_on_benchmark_error and executor.stats.n_benchmark_errors > 0:
+ raise SystemExit(1)
+
if args.fail_on_significant_change:
terms = [20]
for benchmark in executor.results.sorted_benchmarks():
@@ -1081,72 +1305,56 @@ def run_benchmarks(args: argparse.Namespace):
raise SystemExit(1)
-def main():
- parser = argparse.ArgumentParser(
- prog="bfbencher",
- description="Benchmark bpfilter performance across git commits.",
- )
+def run_compare(args: argparse.Namespace):
+ source_repo = git.Repo(args.sources)
+ base_sha: str = source_repo.git.rev_parse(args.base)
+ ref_sha: str = source_repo.git.rev_parse(args.ref)
- parser.add_argument(
- "--since",
- type=str,
- help=f'oldest commit to benchmark. Use "wip" to start from the uncommitted changes (committed as "bfbencher: WIP"). Must be older than --until, or the same. Defaults to "{DEFAULT_FIRST_COMMIT_REF}"',
- default=DEFAULT_FIRST_COMMIT_REF,
- )
- parser.add_argument(
- "--include",
- type=str,
- action="append",
- default=[],
- help='include an extra commit outside the range. Can be specified multiple times. Use "wip" to include uncommitted changes. Commits are sorted in git order with the range commits.',
- )
- parser.add_argument(
- "--until",
- type=str,
- help=f'newest commit to benchmark. Use "wip" to include uncommitted changes (committed as "bfbencher: WIP"). Must be newer than --since, or the same. Defaults to "{DEFAULT_LAST_COMMIT_REF}"',
- default=DEFAULT_LAST_COMMIT_REF,
+ args.since = args.base
+ args.until = args.base
+ args.include = [args.ref]
+
+ executor = (
+ LocalExecutor(args) if args.host in DEFAULT_HOST else RemoteExecutor(args)
)
- parser.add_argument(
+
+ with executor:
+ _benchmark_commits(executor, args)
+
+ report = Report(executor.results)
+ report.print_compare_report(base_sha, ref_sha)
+
+ if args.json_output:
+ report.write_compare_json(
+ args.json_output, base_sha, ref_sha, executor.host
+ )
+
+ if args.fail_on_benchmark_error and executor.stats.n_benchmark_errors > 0:
+ raise SystemExit(1)
+
+
+def main():
+ # Options shared across all subcommands.
+ shared = argparse.ArgumentParser(add_help=False)
+ shared.add_argument(
"--sources",
type=pathlib.Path,
help=f'path to the bpfilter sources directory. Defaults to "{DEFAULT_SOURCE_PATH}".',
default=DEFAULT_SOURCE_PATH,
)
- parser.add_argument(
+ shared.add_argument(
"--host",
type=str,
help=f'host to run the benchmark on. bfbencher will connect to the host using SSH, copy the project sources on it, and run the benchmarks. Defaults to "{DEFAULT_HOST[0]}" (current host).',
default=DEFAULT_HOST[0],
)
- parser.add_argument(
+ shared.add_argument(
"--cache-dir",
type=pathlib.Path,
help=f"path to the directory containing the cached results. The cache is used to store benchmark results based on the hostname and the commit SHA, it is stored on the host running bfbencher. Defaults to {DEFAULT_CACHE_PATH}.",
default=DEFAULT_CACHE_PATH,
)
- parser.add_argument(
- "--report-template-path",
- type=pathlib.Path,
- help=f'path to the Jinja2 template use to generate the HTML report. Defaults to "{DEFAULT_REPORT_TEMPLATE_PATH}"',
- default=DEFAULT_REPORT_TEMPLATE_PATH,
- )
- parser.add_argument(
- "--report-path",
- type=pathlib.Path,
- help="path of the final HTML report.",
- )
- parser.add_argument(
- "--pr-report-template-path",
- type=pathlib.Path,
- help=f'path to the Jinja2 template use to generate the HTML pull-request report. Defaults to "{DEFAULT_PR_REPORT_TEMPLATE_PATH}"',
- default=DEFAULT_PR_REPORT_TEMPLATE_PATH,
- )
- parser.add_argument(
- "--pr-report-path",
- type=pathlib.Path,
- help="path of the HTML summary report for pull requests (shows only significant changes).",
- )
- parser.add_argument(
+ shared.add_argument(
"--retry",
"-r",
type=str,
@@ -1154,41 +1362,130 @@ def main():
default=[],
help='retry benchmarks for specific commits, ignoring cached results. Use "failed" to retry all failed commits, "all" to retry everything, or a commit ref to retry a specific commit. Can be specified multiple times.',
)
- parser.add_argument(
+ shared.add_argument(
+ "--fail-on-benchmark-error",
+ action="store_true",
+ help="exit with non-zero status if any benchmark reports an error during execution",
+ default=False,
+ )
+ shared.add_argument(
"--fail-on-significant-change",
choices=["better", "worse", "any"],
help="exit with non-zero status if any benchmark has a statistically significant change (better=improvement, worse=regression, any=either)",
default=None,
)
- parser.add_argument(
+ shared.add_argument(
"--bind-node",
type=int,
help="CPU and memory node to bind the benchmark to.",
default=None,
)
- parser.add_argument(
+ shared.add_argument(
"--no-preempt",
action="store_true",
- help="if set, use chrt to run the bechmark with real-time scheduling policy at the highest priority. This option should reduce jitter as only kernel threads could preempt it.",
+ help="if set, use chrt to run the benchmark with real-time scheduling policy at the highest priority. This option should reduce jitter as only kernel threads could preempt it.",
default=False,
)
- parser.add_argument(
+ shared.add_argument(
"--cpu-pin",
type=int,
help="if set, defines the CPU to pin the benchmark to. If the CPU is isolated, it will reduce variability between runs.",
default=None,
)
- parser.add_argument(
+ shared.add_argument(
"--slice",
type=str,
help="systemd slice to run the benchmark into. Required if --cpu-pin is isolated at the systemd level.",
default=None,
)
+ parser = argparse.ArgumentParser(
+ prog="bfbencher",
+ description="Benchmark bpfilter performance across git commits.",
+ )
+ subparsers = parser.add_subparsers(dest="command")
+
+ history_parser = subparsers.add_parser(
+ "history",
+ parents=[shared],
+ help="benchmark performance across a range of commits",
+ description="Benchmark bpfilter performance across a range of commits and report changes over time.",
+ )
+ history_parser.add_argument(
+ "--report-template-path",
+ type=pathlib.Path,
+ help=f'path to the Jinja2 template use to generate the HTML report. Defaults to "{DEFAULT_REPORT_TEMPLATE_PATH}"',
+ default=DEFAULT_REPORT_TEMPLATE_PATH,
+ )
+ history_parser.add_argument(
+ "--report-path",
+ type=pathlib.Path,
+ help="path of the final HTML report.",
+ )
+ history_parser.add_argument(
+ "--pr-report-template-path",
+ type=pathlib.Path,
+ help=f'path to the Jinja2 template use to generate the HTML pull-request report. Defaults to "{DEFAULT_PR_REPORT_TEMPLATE_PATH}"',
+ default=DEFAULT_PR_REPORT_TEMPLATE_PATH,
+ )
+ history_parser.add_argument(
+ "--pr-report-path",
+ type=pathlib.Path,
+ help="path of the HTML summary report for pull requests (shows only significant changes).",
+ )
+ history_parser.add_argument(
+ "--since",
+ type=str,
+ help=f'oldest commit to benchmark. Use "wip" to start from the uncommitted changes (committed as "bfbencher: WIP"). Must be older than --until, or the same. Defaults to "{DEFAULT_FIRST_COMMIT_REF}"',
+ default=DEFAULT_FIRST_COMMIT_REF,
+ )
+ history_parser.add_argument(
+ "--until",
+ type=str,
+ help=f'newest commit to benchmark. Use "wip" to include uncommitted changes (committed as "bfbencher: WIP"). Must be newer than --since, or the same. Defaults to "{DEFAULT_LAST_COMMIT_REF}"',
+ default=DEFAULT_LAST_COMMIT_REF,
+ )
+ history_parser.add_argument(
+ "--include",
+ type=str,
+ action="append",
+ default=[],
+ help='include an extra commit outside the range. Can be specified multiple times. Use "wip" to include uncommitted changes. Commits are sorted in git order with the range commits.',
+ )
+
+ compare_parser = subparsers.add_parser(
+ "compare",
+ parents=[shared],
+ help="compare performance between two specific commits",
+ description="Benchmark two specific commits and report the performance difference.",
+ )
+ compare_parser.add_argument(
+ "base",
+ type=str,
+ help='baseline commit ref. Use "wip" for uncommitted changes.',
+ )
+ compare_parser.add_argument(
+ "ref",
+ type=str,
+ help='commit ref to compare against the baseline. Use "wip" for uncommitted changes.',
+ )
+ compare_parser.add_argument(
+ "--json-output",
+ type=pathlib.Path,
+ help="write comparison results to a JSON file.",
+ )
+
args = parser.parse_args()
+ if args.command is None:
+ parser.print_help()
+ raise SystemExit(1)
+
try:
- run_benchmarks(args)
+ if args.command == "history":
+ run_benchmarks(args)
+ elif args.command == "compare":
+ run_compare(args)
except KeyboardInterrupt:
renderer.log("Command interrupted by user")
raise SystemExit(1)
diff --git a/tools/benchmarks/main.cpp b/tests/benchmarks/main.cpp
similarity index 86%
rename from tools/benchmarks/main.cpp
rename to tests/benchmarks/main.cpp
index c8f634aba..7517ef68b 100644
--- a/tools/benchmarks/main.cpp
+++ b/tests/benchmarks/main.cpp
@@ -94,6 +94,108 @@ void chain_policy_c(::benchmark::State &state)
BENCHMARK(chain_policy_c);
+void xdp_prologue_c(::benchmark::State &state)
+{
+ Chain chain("bf_benchmark", BF_HOOK_XDP, BF_VERDICT_ACCEPT);
+
+ auto chainp = chain.get();
+ int ret = bf_chain_set(chainp.get(), nullptr);
+ if (ret < 0)
+ throw std::runtime_error("failed to load chain");
+
+ auto prog = bft::Program(chain.name());
+
+ while (state.KeepRunningBatch(::bft::progRunRepeat)) {
+ auto stats = prog.run(::bft::pkt_local_ip6_tcp);
+ if (stats.retval != XDP_PASS)
+ state.SkipWithError("benchmark run failed");
+
+ state.SetIterationTime((double)stats.duration * stats.repeat);
+ }
+
+ state.counters["nInsn"] = prog.nInsn();
+ state.SetLabel("XDP prologue, accept policy");
+}
+
+BENCHMARK(xdp_prologue_c);
+
+void tc_ingress_prologue_c(::benchmark::State &state)
+{
+ Chain chain("bf_benchmark", BF_HOOK_TC_INGRESS, BF_VERDICT_ACCEPT);
+
+ auto chainp = chain.get();
+ int ret = bf_chain_set(chainp.get(), nullptr);
+ if (ret < 0)
+ throw std::runtime_error("failed to load chain");
+
+ auto prog = bft::Program(chain.name());
+
+ // TC_ACT_OK = 0
+ while (state.KeepRunningBatch(::bft::progRunRepeat)) {
+ auto stats = prog.run(::bft::pkt_local_ip6_tcp);
+ if (stats.retval != 0)
+ state.SkipWithError("benchmark run failed");
+
+ state.SetIterationTime((double)stats.duration * stats.repeat);
+ }
+
+ state.counters["nInsn"] = prog.nInsn();
+ state.SetLabel("TC_INGRESS prologue, accept policy");
+}
+
+BENCHMARK(tc_ingress_prologue_c);
+
+void cgroup_skb_ingress_prologue_c(::benchmark::State &state)
+{
+ Chain chain("bf_benchmark", BF_HOOK_CGROUP_SKB_INGRESS, BF_VERDICT_ACCEPT);
+
+ auto chainp = chain.get();
+ int ret = bf_chain_set(chainp.get(), nullptr);
+ if (ret < 0)
+ throw std::runtime_error("failed to load chain");
+
+ auto prog = bft::Program(chain.name());
+
+ while (state.KeepRunningBatch(::bft::progRunRepeat)) {
+ auto stats = prog.run(::bft::pkt_local_ip6_tcp);
+ if (stats.retval != ::bft::CGROUP_SKB_ACCEPT)
+ state.SkipWithError("benchmark run failed");
+
+ state.SetIterationTime((double)stats.duration * stats.repeat);
+ }
+
+ state.counters["nInsn"] = prog.nInsn();
+ state.SetLabel("CGROUP_SKB_INGRESS prologue, accept policy");
+}
+
+BENCHMARK(cgroup_skb_ingress_prologue_c);
+
+void nf_local_in_prologue_c(::benchmark::State &state)
+{
+ Chain chain("bf_benchmark", BF_HOOK_NF_LOCAL_IN, BF_VERDICT_ACCEPT);
+
+ auto chainp = chain.get();
+ int ret = bf_chain_set(chainp.get(), nullptr);
+ if (ret < 0)
+ throw std::runtime_error("failed to load chain");
+
+ auto prog = bft::Program(chain.name());
+
+ // NF_ACCEPT = 1
+ while (state.KeepRunningBatch(::bft::progRunRepeat)) {
+ auto stats = prog.run(::bft::pkt_local_ip6_tcp);
+ if (stats.retval != 1)
+ state.SkipWithError("benchmark run failed");
+
+ state.SetIterationTime((double)stats.duration * stats.repeat);
+ }
+
+ state.counters["nInsn"] = prog.nInsn();
+ state.SetLabel("NF_LOCAL_IN prologue, accept policy");
+}
+
+BENCHMARK(nf_local_in_prologue_c);
+
void single_rule__ip4_saddr(::benchmark::State &state)
{
Chain chain("bf_benchmark", BF_HOOK_XDP, BF_VERDICT_ACCEPT);
diff --git a/tools/benchmarks/summary.html.j2 b/tests/benchmarks/summary.html.j2
similarity index 100%
rename from tools/benchmarks/summary.html.j2
rename to tests/benchmarks/summary.html.j2
diff --git a/tests/check/CMakeLists.txt b/tests/check/CMakeLists.txt
index e26f9baf1..d62632af0 100644
--- a/tests/check/CMakeLists.txt
+++ b/tests/check/CMakeLists.txt
@@ -13,7 +13,7 @@ file(GLOB_RECURSE bf_srcs
file(GLOB_RECURSE bf_test_srcs
${CMAKE_SOURCE_DIR}/tests/*.h ${CMAKE_SOURCE_DIR}/tests/*.c
${CMAKE_SOURCE_DIR}/tests/*.hpp ${CMAKE_SOURCE_DIR}/tests/*.cpp
- ${CMAKE_SOURCE_DIR}/tools/benchmarks/*.hpp ${CMAKE_SOURCE_DIR}/tools/benchmarks/*.cpp
+ ${CMAKE_SOURCE_DIR}/tests/benchmarks/*.hpp ${CMAKE_SOURCE_DIR}/tests/benchmarks/*.cpp
)
set(bf_all_srcs ${bf_srcs} ${bf_test_srcs})
diff --git a/tools/benchmarks/results.html.j2 b/tools/benchmarks/results.html.j2
deleted file mode 100644
index 60d54a0c1..000000000
--- a/tools/benchmarks/results.html.j2
+++ /dev/null
@@ -1,819 +0,0 @@
-
-
-
-
-
-
- Benchmark Results
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Commit range
-
{{ first_commit_sha[:7] }}..{{ last_commit_sha[:7] }}
-
-
-
Host
-
{{ hostname }}
-
-
-
Commits
-
{{ n_commits }}
-
-
-
Results
- {% if stats.n_failures %}
-
{{ stats.n_successes }} ({{ stats.n_failures }} failures)
- {% else %}
-
{{ stats.n_successes }}
- {% endif %}
-
-
-
-
-
-
-
-
-
-
-
-
- Each row shows performance data for a single benchmark. Runtime is the CPU time
- measured for the most recent commit, and Instructions is the BPF instruction count
- of the generated program. The Δ (delta) columns show the percentage change compared
- to the mean of the previous N commits (e.g., "5 commits" compares against the average of
- commits 2-6). Only statistically significant changes (z-score > 2.5) are colored:
- green for improvements (faster or fewer instructions),
- red for regressions. Uncolored values indicate changes within
- normal variance. Click a benchmark name to jump to its historical chart below.
-
-
- {%- from "summary.html.j2" import render_table -%}
-
- {{ render_table(rows, terms, none, "bootstrap", true, ureg, none, get_class) }}
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/tools/bfoptimize b/tools/bfoptimize
new file mode 100755
index 000000000..77d716b36
--- /dev/null
+++ b/tools/bfoptimize
@@ -0,0 +1,672 @@
+#!/usr/bin/env python3
+"""bfoptimize — LLM-driven BPF bytecode optimization loop for bpfilter."""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import datetime
+import json
+import multiprocessing
+import os
+import pathlib
+import shutil
+import subprocess
+from typing import Any
+
+import anthropic
+import diskcache # type: ignore[import-untyped]
+import git
+import numpy
+import rich.console
+import rich.table
+from claude_agent_sdk import (
+ AssistantMessage,
+ ClaudeAgentOptions,
+ ResultMessage,
+ TextBlock,
+ query,
+)
+
+DEFAULT_SOURCES = pathlib.Path(".")
+DEFAULT_BUILD_DIR = pathlib.Path("build")
+DEFAULT_CACHE_DIR = pathlib.Path(".cache/bfoptimize")
+DEFAULT_ITERATIONS = 10
+DEFAULT_MODEL = "claude-opus-4-6"
+DEFAULT_EFFORT = "high"
+CGEN_DIR = "src/libbpfilter/cgen"
+SHORT_SHA_LEN = 7
+
+console = rich.console.Console(log_path=False)
+
+
+# ---------------------------------------------------------------------------
+# History
+# ---------------------------------------------------------------------------
+
+
+class History:
+ """Persists attempt records and the current baseline SHA."""
+
+ def __init__(self, cache_dir: pathlib.Path) -> None:
+ self._path = cache_dir / "history.json"
+ self._data: dict[str, Any] = self._load()
+
+ def _load(self) -> dict[str, Any]:
+ if self._path.exists():
+ return json.loads(self._path.read_text())
+ return {"baseline_sha": None, "attempts": []}
+
+ def save(self) -> None:
+ self._path.parent.mkdir(parents=True, exist_ok=True)
+ self._path.write_text(json.dumps(self._data, indent=2))
+
+ @property
+ def baseline_sha(self) -> str | None:
+ return self._data.get("baseline_sha")
+
+ @baseline_sha.setter
+ def baseline_sha(self, sha: str) -> None:
+ self._data["baseline_sha"] = sha
+
+ @property
+ def attempts(self) -> list[dict[str, Any]]:
+ return self._data.get("attempts", [])
+
+ def next_id(self) -> int:
+ attempts = self.attempts
+ return (attempts[-1]["id"] + 1) if attempts else 1
+
+ def add_attempt(self, attempt: dict[str, Any]) -> None:
+ self._data.setdefault("attempts", []).append(attempt)
+ self.save()
+
+ def summary(self) -> str:
+ attempts = self.attempts
+ if not attempts:
+ return "No previous attempts."
+ lines = ["Previous optimization attempts (do not repeat these):"]
+ for a in attempts:
+ delta = (
+ f"{a['delta_time_pct']:+.1f}%"
+ if a.get("delta_time_pct") is not None
+ else "N/A"
+ )
+ lines.append(
+ f" #{a['id']} [{a['status']}] {a['description']} (weighted runtime delta: {delta})"
+ )
+ return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Source loading
+# ---------------------------------------------------------------------------
+
+
+def load_cgen_sources(sources_dir: pathlib.Path) -> str:
+ cgen_path = sources_dir / CGEN_DIR
+ parts: list[str] = []
+ for f in sorted(cgen_path.rglob("*.[ch]")):
+ rel = f.relative_to(sources_dir)
+ try:
+ content = f.read_text(encoding="utf-8")
+ except UnicodeDecodeError:
+ continue
+ parts.append(f"=== {rel} ===\n{content}")
+ return "\n\n".join(parts)
+
+
+def load_last_benchmark(cache_dir: pathlib.Path) -> str:
+ bench_path = cache_dir / "last_bench.json"
+ if not bench_path.exists():
+ return "No benchmark data available yet."
+ try:
+ data = json.loads(bench_path.read_text())
+ base = data.get("base", "?")[:SHORT_SHA_LEN]
+ ref = data.get("ref", "?")[:SHORT_SHA_LEN]
+ lines = [f"Last benchmark results ({base} → {ref}):"]
+ for b in data.get("benchmarks", []):
+ pct = b.get("delta_time_pct", 0)
+ lines.append(
+ f" {b['name']}: base={b['base_time_ns']:.1f}ns"
+ f" ref={b['ref_time_ns']:.1f}ns delta={pct:+.1f}%"
+ )
+ return "\n".join(lines)
+ except Exception:
+ return "Benchmark data unavailable."
+
+
+# ---------------------------------------------------------------------------
+# Planning phase (extended thinking)
+# ---------------------------------------------------------------------------
+
+
+def plan_optimization(
+ client: anthropic.Anthropic,
+ cgen_sources: str,
+ history_summary: str,
+ benchmark_results: str,
+ cache_dir: pathlib.Path,
+ attempt_id: int,
+ model: str = DEFAULT_MODEL,
+ thinking: bool = True,
+ effort: str = DEFAULT_EFFORT,
+ context_1m: bool = False,
+ hint: str | None = None,
+) -> str:
+ prompt = f"""You are optimizing the BPF bytecode generation logic in the bpfilter project.
+
+The cgen directory generates BPF programs that run in the Linux kernel for packet filtering.
+Every nanosecond saved matters — these programs execute for every packet received by the host.
+
+## Source files in {CGEN_DIR}/
+
+{cgen_sources}
+
+## Current benchmark results
+
+{benchmark_results}
+
+## Optimization history
+
+{history_summary}
+
+## Task
+
+Propose exactly ONE concrete optimization to the cgen code. Describe:
+1. Which file(s) you will change and what specifically you will change
+2. Why this will reduce the runtime of the generated BPF programs
+3. Any risks or tricky edge cases to handle
+
+Be specific and actionable. Do not repeat any previously attempted optimization.
+Output only the optimization proposal — no code yet."""
+
+ if hint:
+ prompt += (
+ f"\n\n## Hint\n\n{hint}\n\n"
+ "This is a direction to consider, not a constraint — "
+ "you may propose a different optimization if you judge it more impactful."
+ )
+
+ stream_kwargs: dict[str, Any] = {
+ "model": model,
+ "max_tokens": 128000,
+ "messages": [{"role": "user", "content": prompt}],
+ "output_config": {"effort": effort},
+ }
+ if thinking:
+ stream_kwargs["thinking"] = {"type": "adaptive"}
+ if context_1m:
+ stream_kwargs["extra_headers"] = {"anthropic-beta": "context-1m-2025-08-07"}
+
+ with client.messages.stream(**stream_kwargs) as stream:
+ for event in stream:
+ if event.type == "content_block_delta":
+ if event.delta.type == "thinking_delta":
+ console.print(event.delta.thinking, end="")
+ elif event.delta.type == "text_delta":
+ console.print(event.delta.text, end="")
+ console.print("")
+ response = stream.get_final_message()
+
+ # Persist thinking blocks for post-hoc inspection
+ thinking_texts = [b.thinking for b in response.content if b.type == "thinking"]
+ if thinking_texts:
+ thinking_path = cache_dir / f"{attempt_id}-thinking.txt"
+ thinking_path.write_text("\n\n---\n\n".join(thinking_texts)) # type: ignore[arg-type]
+
+ return next(b.text for b in response.content if b.type == "text") # type: ignore[union-attr]
+
+
+# ---------------------------------------------------------------------------
+# Execution phase (Agent SDK)
+# ---------------------------------------------------------------------------
+
+
+async def execute_optimization(
+ sources_dir: pathlib.Path,
+ build_dir: pathlib.Path,
+ optimization_plan: str,
+ baseline_sha: str,
+) -> str | None:
+ """Run the agent to implement the optimization. Returns new HEAD sha if committed."""
+ ncpus = multiprocessing.cpu_count()
+ abs_sources = sources_dir.resolve()
+ abs_build = build_dir.resolve()
+
+ prompt = f"""You are implementing a performance optimization to the bpfilter BPF bytecode generator.
+
+## Optimization to implement
+
+{optimization_plan}
+
+## Rules
+
+- Modify ONLY files under `{abs_sources}/{CGEN_DIR}/`. Do not touch any other files.
+- Build: cmake -S {abs_sources} -B {abs_build} -DNO_DOCS=1 -DNO_TESTS=1 -DNO_CHECKS=1 \
+-DCMAKE_BUILD_TYPE=release && make -C {abs_build} -j{ncpus} bpfilter
+- Test: make -C {abs_build} -j{ncpus} unit e2e integration
+- If tests pass: commit with `git -C {abs_sources} commit -am "daemon: cgen: "`
+- If build or tests fail: diagnose and fix. If you cannot make tests pass, revert ALL your \
+changes with `git -C {abs_sources} checkout -- {abs_sources}/{CGEN_DIR}/` and exit without \
+committing.
+- The current baseline is {baseline_sha[:SHORT_SHA_LEN]}. Only commit when tests are green."""
+
+ async for message in query(
+ prompt=prompt,
+ options=ClaudeAgentOptions(
+ cwd=str(abs_sources),
+ permission_mode="bypassPermissions",
+ allowed_tools=["Read", "Edit", "Write", "Bash", "Glob", "Grep"],
+ ),
+ ):
+ if isinstance(message, AssistantMessage):
+ for block in message.content:
+ if isinstance(block, TextBlock) and block.text.strip():
+ console.log(f"[dim]{block.text.strip()[:200]}[/dim]")
+ elif isinstance(message, ResultMessage) and message.result:
+ console.log(f"Agent done: {message.result[:200]}")
+
+ repo = git.Repo(sources_dir)
+ new_sha = repo.head.commit.hexsha
+ return new_sha if new_sha != baseline_sha else None
+
+
+# ---------------------------------------------------------------------------
+# Benchmark step
+# ---------------------------------------------------------------------------
+
+
+def _benchmark_noise(bfbencher_cache_dir: pathlib.Path) -> dict[str, float]:
+ """Estimate per-benchmark noise (CV) from bfbencher's accumulated cache.
+
+ Iterates all cached commit results and computes the coefficient of variation
+ (MAD/median, normalised via the 1.4826 consistency factor) for each
+ benchmark. Benchmarks with fewer than 3 data points are excluded so that
+ noise estimates are not based on a single outlier.
+
+ Returns a mapping from benchmark name to CV. An empty dict is returned
+ when the cache is absent or unreadable.
+ """
+ series: dict[str, list[float]] = {}
+ try:
+ cache = diskcache.Cache(bfbencher_cache_dir)
+ for key in cache:
+ val = cache.get(key)
+ if not isinstance(val, dict) or not val.get("success"):
+ continue
+ for r in val.get("results", []):
+ name = r.get("name", "")
+ t = float(r.get("cpu_time", 0))
+ if t > 0:
+ series.setdefault(name, []).append(t)
+ cache.close()
+ except Exception:
+ return {}
+
+ noise: dict[str, float] = {}
+ for name, times in series.items():
+ if len(times) < 3:
+ continue
+ arr = numpy.array(times)
+ median = float(numpy.median(arr))
+ if median == 0:
+ continue
+ mad = float(numpy.median(numpy.abs(arr - median)))
+ noise[name] = (mad * 1.4826) / median
+ return noise
+
+
+def run_benchmark(
+ sources_dir: pathlib.Path,
+ cache_dir: pathlib.Path,
+ bfbencher: pathlib.Path,
+ baseline_sha: str,
+ result_sha: str,
+ extra_args: list[str],
+ attempt_id: int,
+) -> float | None:
+ json_path = cache_dir / "last_bench.json"
+ cmd = [
+ str(bfbencher),
+ "compare",
+ baseline_sha,
+ result_sha,
+ "--sources",
+ str(sources_dir),
+ "--cache-dir",
+ str(cache_dir / "bfbencher"),
+ "--json-output",
+ str(json_path),
+ "--fail-on-benchmark-error",
+ ] + extra_args
+
+ console.log(
+ f"Running bfbencher compare {baseline_sha[:SHORT_SHA_LEN]} → {result_sha[:SHORT_SHA_LEN]}"
+ )
+ result = subprocess.run(cmd, text=True, env={**os.environ, "PYTHONUNBUFFERED": "1"})
+ if result.returncode != 0:
+ console.log("[red]bfbencher compare failed[/red]")
+ return None
+
+ shutil.copy(json_path, cache_dir / f"bench-{attempt_id}.json")
+
+ try:
+ data = json.loads(json_path.read_text())
+ noise = _benchmark_noise(cache_dir / "bfbencher")
+
+ pairs: list[tuple[float, float]] = []
+ for b in data.get("benchmarks", []):
+ if b.get("delta_time_pct") is None:
+ continue
+ cv = noise.get(b["name"], 0.0)
+ # Weight = 1 / (1 + CV): high-noise benchmarks contribute less.
+ pairs.append((b["delta_time_pct"], 1.0 / (1.0 + cv)))
+
+ if not pairs:
+ return None
+
+ noisy = [(name, cv) for name, cv in noise.items() if cv > 0.01]
+ if noisy:
+ noisy_str = ", ".join(
+ f"{name} (CV={cv:.1%})"
+ for name, cv in sorted(noisy, key=lambda x: -x[1])
+ )
+ console.log(f"[dim]Volatile benchmarks (down-weighted): {noisy_str}[/dim]")
+
+ total_weight = sum(w for _, w in pairs)
+ return sum(d * w for d, w in pairs) / total_weight
+ except Exception as e:
+ console.log(f"[red]Failed to parse benchmark output: {e}[/red]")
+ return None
+
+
+# ---------------------------------------------------------------------------
+# Summary table
+# ---------------------------------------------------------------------------
+
+
+def print_summary(history: History) -> None:
+ table = rich.table.Table(title="bfoptimize summary", show_header=True)
+ table.add_column("#", justify="right")
+ table.add_column("Status", justify="center")
+ table.add_column("Δ Runtime", justify="right")
+ table.add_column("Description", style="cyan")
+
+ for a in history.attempts:
+ status = a["status"]
+ color = (
+ "green"
+ if status == "accepted"
+ else "red"
+ if status == "rejected_bench"
+ else "yellow"
+ )
+ delta = (
+ f"{a['delta_time_pct']:+.1f}%"
+ if a.get("delta_time_pct") is not None
+ else "-"
+ )
+ table.add_row(
+ str(a["id"]),
+ f"[{color}]{status}[/{color}]",
+ delta,
+ a["description"][:80],
+ )
+
+ console.print(table)
+
+
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------
+
+
+async def run_optimize(args: argparse.Namespace) -> None:
+ args.cache_dir.mkdir(parents=True, exist_ok=True)
+ history = History(args.cache_dir)
+ client = anthropic.Anthropic()
+ repo = git.Repo(args.sources)
+
+ if history.baseline_sha is None:
+ history.baseline_sha = repo.head.commit.hexsha
+ history.save()
+
+ console.log(f"Baseline: {history.baseline_sha[:SHORT_SHA_LEN]}")
+
+ bfbencher = (args.sources / "tests/benchmarks/bfbencher").resolve()
+
+ bench_extra: list[str] = []
+ if args.host:
+ bench_extra += ["--host", args.host]
+ if args.bind_node is not None:
+ bench_extra += ["--bind-node", str(args.bind_node)]
+ if args.no_preempt:
+ bench_extra += ["--no-preempt"]
+ if args.cpu_pin is not None:
+ bench_extra += ["--cpu-pin", str(args.cpu_pin)]
+ if args.slice:
+ bench_extra += ["--slice", args.slice]
+
+ for i in range(args.iterations):
+ attempt_id = history.next_id()
+ console.log(
+ f"\n[bold cyan]─── Iteration {i + 1}/{args.iterations}"
+ f" (attempt #{attempt_id}) ───[/bold cyan]"
+ )
+
+ baseline_sha = history.baseline_sha
+ assert baseline_sha is not None
+
+ # ── Plan ──────────────────────────────────────────────────────────
+ console.log("[bold]Planning...[/bold]")
+ cgen_sources = load_cgen_sources(args.sources)
+ benchmark_results = load_last_benchmark(args.cache_dir)
+ try:
+ plan = plan_optimization(
+ client,
+ cgen_sources,
+ history.summary(),
+ benchmark_results,
+ args.cache_dir,
+ attempt_id,
+ model=args.model,
+ thinking=args.thinking,
+ effort=args.effort,
+ context_1m=args.context_1m,
+ hint=args.hint,
+ )
+ except Exception as e:
+ console.log(f"[red]Planning failed: {e}[/red]")
+ continue
+
+ description = plan.strip().splitlines()[0][:120]
+ console.log(f"Proposal: {description}")
+
+ # ── Execute ───────────────────────────────────────────────────────
+ console.log("[bold]Executing...[/bold]")
+ try:
+ result_sha = await execute_optimization(
+ args.sources,
+ args.build_dir,
+ plan,
+ baseline_sha,
+ )
+ except Exception as e:
+ console.log(f"[red]Agent execution failed: {e}[/red]")
+ history.add_attempt(
+ {
+ "id": attempt_id,
+ "description": description,
+ "status": "rejected_tests",
+ "baseline_sha": baseline_sha,
+ "result_sha": None,
+ "delta_time_pct": None,
+ "timestamp": datetime.datetime.now().isoformat(),
+ }
+ )
+ continue
+
+ if result_sha is None:
+ console.log("[yellow]Agent did not commit — rejected_tests[/yellow]")
+ history.add_attempt(
+ {
+ "id": attempt_id,
+ "description": description,
+ "status": "rejected_tests",
+ "baseline_sha": baseline_sha,
+ "result_sha": None,
+ "delta_time_pct": None,
+ "timestamp": datetime.datetime.now().isoformat(),
+ }
+ )
+ continue
+
+ console.log(f"Agent committed: {result_sha[:SHORT_SHA_LEN]}")
+
+ # ── Benchmark ─────────────────────────────────────────────────────
+ console.log("[bold]Benchmarking...[/bold]")
+ delta = run_benchmark(
+ args.sources,
+ args.cache_dir,
+ bfbencher,
+ baseline_sha,
+ result_sha,
+ bench_extra,
+ attempt_id,
+ )
+
+ if delta is None or delta >= 0:
+ delta_str = f"{delta:+.1f}%" if delta is not None else "N/A"
+ console.log(f"[red]Rejected (bench): mean delta {delta_str}[/red]")
+ repo.git.reset("--hard", baseline_sha)
+ status = "rejected_bench"
+ else:
+ console.log(f"[green]Accepted: mean delta {delta:+.1f}%[/green]")
+ history.baseline_sha = result_sha
+ status = "accepted"
+
+ history.add_attempt(
+ {
+ "id": attempt_id,
+ "description": description,
+ "status": status,
+ "baseline_sha": baseline_sha,
+ "result_sha": result_sha,
+ "delta_time_pct": delta,
+ "timestamp": datetime.datetime.now().isoformat(),
+ }
+ )
+
+ print_summary(history)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ prog="bfoptimize",
+ description="LLM-driven BPF bytecode optimization loop for bpfilter.",
+ )
+ parser.add_argument(
+ "--iterations",
+ "-n",
+ type=int,
+ default=DEFAULT_ITERATIONS,
+ help=f"number of optimization iterations to run (default: {DEFAULT_ITERATIONS})",
+ )
+ parser.add_argument(
+ "--sources",
+ type=pathlib.Path,
+ default=DEFAULT_SOURCES,
+ help=f'path to the bpfilter source directory (default: "{DEFAULT_SOURCES}")',
+ )
+ parser.add_argument(
+ "--build-dir",
+ type=pathlib.Path,
+ default=DEFAULT_BUILD_DIR,
+ help=f'cmake build directory (default: "{DEFAULT_BUILD_DIR}")',
+ )
+ parser.add_argument(
+ "--cache-dir",
+ type=pathlib.Path,
+ default=DEFAULT_CACHE_DIR,
+ help=f'directory for history and benchmark cache (default: "{DEFAULT_CACHE_DIR}")',
+ )
+ parser.add_argument(
+ "--host",
+ type=str,
+ default=None,
+ help="remote host for benchmarking (passed through to bfbencher)",
+ )
+ parser.add_argument(
+ "--bind-node",
+ type=int,
+ default=None,
+ help="CPU/memory NUMA node to bind benchmarks to",
+ )
+ parser.add_argument(
+ "--no-preempt",
+ action="store_true",
+ default=False,
+ help="run benchmarks with real-time scheduling (chrt -f 99)",
+ )
+ parser.add_argument(
+ "--cpu-pin",
+ type=int,
+ default=None,
+ help="CPU to pin benchmark to",
+ )
+ parser.add_argument(
+ "--slice",
+ type=str,
+ default=None,
+ help="systemd slice for benchmark execution",
+ )
+ parser.add_argument(
+ "--model",
+ type=str,
+ default=DEFAULT_MODEL,
+ choices=["claude-opus-4-6", "claude-sonnet-4-6"],
+ help=f"Claude model to use for planning (default: {DEFAULT_MODEL})",
+ )
+ parser.add_argument(
+ "--thinking",
+ action=argparse.BooleanOptionalAction,
+ default=True,
+ help="enable adaptive thinking during planning (default: enabled)",
+ )
+ parser.add_argument(
+ "--effort",
+ type=str,
+ default=DEFAULT_EFFORT,
+ choices=["low", "medium", "high", "max"],
+ help=f"effort level for the planning call (default: {DEFAULT_EFFORT}; max is Opus only)",
+ )
+ parser.add_argument(
+ "--context-1m",
+ action="store_true",
+ default=False,
+ help="enable 1M context window beta (claude-opus-4-6 and claude-sonnet-4-6 only)",
+ )
+ parser.add_argument(
+ "--hint",
+ type=str,
+ default=None,
+ help="optional direction for the model (e.g. 'look into XXX'); it is provided as context but not enforced",
+ )
+
+ args = parser.parse_args()
+
+ try:
+ asyncio.run(run_optimize(args))
+ except KeyboardInterrupt:
+ console.log("Interrupted by user")
+ raise SystemExit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/bfoptimize-web b/tools/bfoptimize-web
new file mode 100755
index 000000000..fefdb952a
--- /dev/null
+++ b/tools/bfoptimize-web
@@ -0,0 +1,1174 @@
+#!/usr/bin/env python3
+"""bfoptimize-web — Local web UI for bfoptimize."""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import pathlib
+import signal
+from typing import Any, AsyncGenerator
+
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import HTMLResponse, PlainTextResponse, StreamingResponse
+from pydantic import BaseModel
+
+# ---------------------------------------------------------------------------
+# Global state
+# ---------------------------------------------------------------------------
+
+_process: asyncio.subprocess.Process | None = None
+_log_lines: list[str] = []
+_subscribers: list[asyncio.Queue[str | None]] = []
+
+# Updated at startup from CLI args; overridden per-run from POST /run body
+_cache_dir: pathlib.Path = pathlib.Path(".cache/bfoptimize")
+_sources_dir: pathlib.Path = pathlib.Path(".")
+_bfoptimize: pathlib.Path = pathlib.Path(__file__).parent / "bfoptimize"
+
+app = FastAPI()
+
+# ---------------------------------------------------------------------------
+# Request models
+# ---------------------------------------------------------------------------
+
+
+class RunRequest(BaseModel):
+ iterations: int = 10
+ sources: str = "."
+ build_dir: str = "build"
+ cache_dir: str = ".cache/bfoptimize"
+ host: str | None = None
+ bind_node: int | None = None
+ no_preempt: bool = False
+ cpu_pin: int | None = None
+ slice: str | None = None
+ model: str = "claude-opus-4-6"
+ thinking: bool = True
+ effort: str = "high"
+ context_1m: bool = False
+ hint: str | None = None
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _build_cmd(body: RunRequest) -> list[str]:
+ cmd = [
+ str(_bfoptimize),
+ "--iterations",
+ str(body.iterations),
+ "--sources",
+ body.sources,
+ "--build-dir",
+ body.build_dir,
+ "--cache-dir",
+ body.cache_dir,
+ ]
+ if body.host:
+ cmd += ["--host", body.host]
+ if body.bind_node is not None:
+ cmd += ["--bind-node", str(body.bind_node)]
+ if body.no_preempt:
+ cmd += ["--no-preempt"]
+ if body.cpu_pin is not None:
+ cmd += ["--cpu-pin", str(body.cpu_pin)]
+ if body.slice:
+ cmd += ["--slice", body.slice]
+ cmd += ["--model", body.model]
+ cmd += ["--thinking" if body.thinking else "--no-thinking"]
+ cmd += ["--effort", body.effort]
+ if body.context_1m:
+ cmd += ["--context-1m"]
+ if body.hint:
+ cmd += ["--hint", body.hint]
+ return cmd
+
+
+async def _broadcast_output(proc: asyncio.subprocess.Process) -> None:
+ assert proc.stdout is not None
+ async for raw in proc.stdout:
+ text = raw.decode(errors="replace").rstrip()
+ _log_lines.append(text)
+ for q in list(_subscribers):
+ await q.put(text)
+ await proc.wait()
+ for q in list(_subscribers):
+ await q.put(None)
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+
+@app.get("/", response_class=HTMLResponse)
+async def index() -> str:
+ return HTML
+
+
+@app.post("/run", status_code=202)
+async def start_run(body: RunRequest) -> dict[str, str]:
+ global _process, _log_lines, _cache_dir, _sources_dir
+ if _process is not None and _process.returncode is None:
+ raise HTTPException(409, "Already running")
+ _cache_dir = pathlib.Path(body.cache_dir)
+ _sources_dir = pathlib.Path(body.sources)
+ _log_lines = []
+ cmd = _build_cmd(body)
+ _process = await asyncio.create_subprocess_exec(
+ *cmd,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.STDOUT,
+ )
+ asyncio.create_task(_broadcast_output(_process))
+ return {"status": "started"}
+
+
+@app.delete("/run")
+async def stop_run() -> dict[str, str]:
+ if _process is None or _process.returncode is not None:
+ raise HTTPException(404, "No running process")
+ _process.send_signal(signal.SIGTERM)
+ return {"status": "stopping"}
+
+
+@app.get("/stream")
+async def stream() -> StreamingResponse:
+ async def generator() -> AsyncGenerator[str, None]:
+ q: asyncio.Queue[str | None] = asyncio.Queue()
+ _subscribers.append(q)
+ try:
+ for line in list(_log_lines):
+ yield f"data: {line}\n\n"
+ if _process is None or _process.returncode is not None:
+ return
+ while True:
+ try:
+ item = await asyncio.wait_for(q.get(), timeout=15.0)
+ except asyncio.TimeoutError:
+ yield ": keepalive\n\n"
+ continue
+ if item is None:
+ break
+ yield f"data: {item}\n\n"
+ finally:
+ try:
+ _subscribers.remove(q)
+ except ValueError:
+ pass
+
+ return StreamingResponse(generator(), media_type="text/event-stream")
+
+
+@app.get("/history")
+async def get_history() -> Any:
+ path = _cache_dir / "history.json"
+ if not path.exists():
+ return {"baseline_sha": None, "attempts": []}
+ return json.loads(path.read_text())
+
+
+@app.delete("/history")
+async def reset_history() -> dict[str, str]:
+ import shutil
+
+ for p in _cache_dir.iterdir():
+ if p.is_dir():
+ shutil.rmtree(p)
+ else:
+ p.unlink()
+ return {"status": "reset"}
+
+
+@app.get("/status")
+async def get_status() -> dict[str, Any]:
+ if _process is None:
+ return {"state": "idle", "returncode": None}
+ if _process.returncode is None:
+ return {"state": "running", "returncode": None}
+ return {"state": "stopped", "returncode": _process.returncode}
+
+
+@app.get("/bench/{bench_id}")
+async def get_bench(bench_id: int) -> Any:
+ path = _cache_dir / f"bench-{bench_id}.json"
+ if not path.exists():
+ raise HTTPException(404, "Bench data not found")
+ return json.loads(path.read_text())
+
+
+@app.get("/diff/{bench_id}")
+async def get_diff(bench_id: int) -> PlainTextResponse:
+ history_path = _cache_dir / "history.json"
+ if not history_path.exists():
+ raise HTTPException(404, "No history")
+ history = json.loads(history_path.read_text())
+ attempt = next(
+ (a for a in history.get("attempts", []) if a["id"] == bench_id), None
+ )
+ if attempt is None or not attempt.get("result_sha"):
+ raise HTTPException(404, "Attempt not found or has no result")
+ proc = await asyncio.create_subprocess_exec(
+ "git",
+ "-C",
+ str(_sources_dir.resolve()),
+ "diff",
+ attempt["baseline_sha"],
+ attempt["result_sha"],
+ "--",
+ "src/libbpfilter/cgen/",
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE,
+ )
+ stdout, _ = await proc.communicate()
+ return PlainTextResponse(stdout.decode(errors="replace"))
+
+
+# ---------------------------------------------------------------------------
+# Embedded frontend
+# ---------------------------------------------------------------------------
+
+HTML = """
+
+
+
+
+bfoptimize
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stop
+
+
+
+
+
+
+ Idle
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Runtime delta per attempt
+ negative = faster than baseline
+
+
+
+
+
+
+
+
+
+
Benchmark history
+
+
+
+ ◕
+ No results yet — start a run to populate this section
+
+
+
+
Benchmark comparison
+
+
+
+ Benchmark
+ Base (ns) Ref (ns)
+ ΔTime (ns) ΔTime %
+ Base Insn Ref Insn
+ ΔInsn ΔInsn %
+
+
+
+
+
+
+
+ Code changes
+ src/libbpfilter/cgen/
+
+
+
+
+
+
+
+
+
+"""
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+ global _cache_dir, _sources_dir
+
+ parser = argparse.ArgumentParser(
+ prog="bfoptimize-web",
+ description="Local web UI for bfoptimize.",
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=8080,
+ help="TCP port to listen on (default: 8080)",
+ )
+ parser.add_argument(
+ "--sources",
+ type=pathlib.Path,
+ default=pathlib.Path("."),
+ help='bpfilter source directory for git diff (default: ".")',
+ )
+ parser.add_argument(
+ "--cache-dir",
+ type=pathlib.Path,
+ default=pathlib.Path(".cache/bfoptimize"),
+ help='bfoptimize cache directory (default: ".cache/bfoptimize")',
+ )
+ args = parser.parse_args()
+
+ _cache_dir = args.cache_dir
+ _sources_dir = args.sources
+
+ uvicorn.run(app, host="127.0.0.1", port=args.port)
+
+
+if __name__ == "__main__":
+ main()