Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ permissions:
pull-requests: write

jobs:
check-generated-docs:
timeout-minutes: 5
runs-on: [ "8-core-ubuntu" ]
Comment thread
yaakov-stein marked this conversation as resolved.
name: "Check generated docs"
steps:
- name: Checkout bpfilter
uses: actions/checkout@v4
- name: Check matcher hook documentation
run: |
python3 $GITHUB_WORKSPACE/doc/generate_matcher_hook_compat.py \
--matcher-c $GITHUB_WORKSPACE/src/libbpfilter/matcher.c \
--matcher-h $GITHUB_WORKSPACE/src/libbpfilter/include/bpfilter/matcher.h \
--hook-h $GITHUB_WORKSPACE/src/libbpfilter/include/bpfilter/hook.h \
--output $GITHUB_WORKSPACE/doc/usage/_generated/bfcli_matcher_hook_compatibility.rst \
--check

create-images:
timeout-minutes: 15
strategy:
Expand Down
5 changes: 5 additions & 0 deletions doc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
find_package(Doxygen REQUIRED)
find_program(SPHINX_BIN sphinx-build REQUIRED)

set(bfcli_matcher_hook_compatibility
${CMAKE_CURRENT_SOURCE_DIR}/usage/_generated/bfcli_matcher_hook_compatibility.rst
)

file(GLOB_RECURSE bf_srcs
${CMAKE_SOURCE_DIR}/src/*.h ${CMAKE_SOURCE_DIR}/src/*.c
${CMAKE_SOURCE_DIR}/tests/harness/*.h ${CMAKE_SOURCE_DIR}/tests/harness/*.c
Expand All @@ -80,6 +84,7 @@ list(FILTER bf_srcs EXCLUDE REGEX "${CMAKE_SOURCE_DIR}/src/external/.*")
set(doc_srcs
${CMAKE_CURRENT_SOURCE_DIR}/index.rst
${CMAKE_CURRENT_SOURCE_DIR}/usage/bfcli.rst
${bfcli_matcher_hook_compatibility}
${CMAKE_CURRENT_SOURCE_DIR}/usage/index.rst
${CMAKE_CURRENT_SOURCE_DIR}/developers/build.rst
${CMAKE_CURRENT_SOURCE_DIR}/developers/contributing.rst
Expand Down
331 changes: 331 additions & 0 deletions doc/generate_matcher_hook_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
Comment thread
yaakov-stein marked this conversation as resolved.

"""Generate the bfcli matcher-to-hook compatibility table.

The table is derived from the matcher metadata declared in
src/libbpfilter/matcher.c. Each matcher's unsupported hooks are extracted
from its `unsupported_hooks` bitmask.
"""

from __future__ import annotations

import argparse
import pathlib
import re
import sys


def _warn(message: str) -> None:
print(f"warning: {message}", file=sys.stderr)


def _read(path: pathlib.Path) -> str:
return path.read_text(encoding="utf-8")


def _read_enum_order(
path: pathlib.Path,
enum_name: str,
prefix: str,
) -> list[str]:
content = _read(path)
match = re.search(
rf"enum\s+{re.escape(enum_name)}\s*\{{(?P<body>.*?)\n\}};",
content,
re.DOTALL,
)
if not match:
raise ValueError(f"unable to find enum {enum_name} in {path}")

values: list[str] = []
for raw_line in match.group("body").splitlines():
line = raw_line.split("//", 1)[0].split("/*", 1)[0].strip()
if not line:
continue

token = line.rstrip(",")
if "=" in token:
token = token.split("=", 1)[0].strip()

if token.startswith(prefix):
values.append(token)

return values


def _read_string_map(
path: pathlib.Path,
array_name: str,
prefix: str,
) -> dict[str, str]:
content = _read(path)
match = re.search(
rf"{re.escape(array_name)}\[\]\s*=\s*\{{(?P<body>.*?)\n\}};",
content,
re.DOTALL,
)
if not match:
Comment thread
yaakov-stein marked this conversation as resolved.
raise ValueError(f"unable to find array {array_name} in {path}")

mapping: dict[str, str] = {}
for enum_name, value in re.findall(
rf"\[({prefix}[A-Z0-9_]+)\]\s*=\s*\"([^\"]+)\"",
match.group("body"),
):
mapping[enum_name] = value

return mapping


def _read_hook_macros(path: pathlib.Path) -> dict[str, list[str]]:
flattened = _read(path).replace("\\\n", " ")
macros: dict[str, list[str]] = {}
for name, expr in re.findall(
Comment thread
yaakov-stein marked this conversation as resolved.
r"#define\s+(_BF_HOOKS_[A-Z0-9_]+)\s+([^\n]+)",
flattened,
):
macros[name] = [
token.strip() for token in expr.split(",") if token.strip()
]

if not macros:
_warn(f"unable to find any _BF_HOOKS_* macros in {path}")

return macros


def _extract_initializer_blocks(
content: str,
array_name: str,
) -> dict[str, str]:
start = content.find(array_name)
if start < 0:
raise ValueError(f"unable to find {array_name}")

array_start = content.find("{", start)
if array_start < 0:
raise ValueError(f"unable to find initializer for {array_name}")
Comment thread
yaakov-stein marked this conversation as resolved.

depth = 0
array_end = -1
# This lightweight parser assumes braces only delimit initializer blocks.
# Braces inside comments or string literals would confuse the depth count.
for idx in range(array_start, len(content)):
if content[idx] == "{":
depth += 1
elif content[idx] == "}":
depth -= 1
if depth == 0:
array_end = idx
break

if array_end < 0:
raise ValueError(f"unable to find end of initializer for {array_name}")

body = content[array_start + 1 : array_end]
blocks: dict[str, str] = {}
i = 0
while i < len(body):
if body[i] == "}":
break

match = re.search(r"\[(BF_MATCHER_[A-Z0-9_]+)\]\s*=", body[i:])
if not match:
break

enum_name = match.group(1)
i += match.end()

while i < len(body) and body[i].isspace():
i += 1

if i >= len(body) or body[i] != "{":
raise ValueError(f"unable to find block start for {enum_name}")

depth = 0
block_start = i
while i < len(body):
if body[i] == "{":
depth += 1
elif body[i] == "}":
depth -= 1
if depth == 0:
blocks[enum_name] = body[block_start : i + 1]
i += 1
break
i += 1
Comment thread
yaakov-stein marked this conversation as resolved.

return blocks


def _extract_unsupported_hooks(block: str) -> list[str]:
match = re.search(
r"\.unsupported_hooks\s*=\s*BF_FLAGS\((?P<expr>.*?)\)\s*,",
block,
re.DOTALL,
)
if not match:
return []

return [
token.strip()
for token in match.group("expr").replace("\n", " ").split(",")
if token.strip()
]

Comment thread
yaakov-stein marked this conversation as resolved.

def _expand_hooks(
tokens: list[str],
macros: dict[str, list[str]],
seen: set[str] | None = None,
) -> list[str]:
if seen is None:
seen = set()

expanded: list[str] = []
for token in tokens:
if token in macros:
Comment thread
yaakov-stein marked this conversation as resolved.
if token in seen:
raise ValueError(f"circular hook macro reference: {token}")

expanded.extend(
_expand_hooks(macros[token], macros, seen | {token})
)
else:
expanded.append(token)

return expanded


def _validate_hooks(
matcher: str,
hooks: list[str],
hook_order: list[str],
) -> None:
unknown = sorted(set(hook for hook in hooks if hook not in hook_order))
if unknown:
unknown_str = ", ".join(unknown)
raise ValueError(
f"unknown hooks for {matcher}: {unknown_str}; "
"update the parser if hook macros moved"
)


def _render_table(
matcher_order: list[str],
matcher_names: dict[str, str],
hook_order: list[str],
unsupported: dict[str, list[str]],
) -> str:
lines = [
(
".. This file is auto-generated by "
"doc/generate_matcher_hook_compat.py."
),
".. Do not edit manually.",
"",
".. list-table::",
" :header-rows: 1",
" :widths: 2 5",
"",
" * - Matcher",
" - Unsupported hooks",
]

for matcher in matcher_order:
matcher_name = matcher_names.get(matcher)
if not matcher_name or matcher_name == "<set>":
continue

blocked = set(unsupported.get(matcher, []))
blocked_hooks = [hook for hook in hook_order if hook in blocked]
rendered_hooks = ", ".join(f"``{hook}``" for hook in blocked_hooks)
if not rendered_hooks:
rendered_hooks = "None"

lines.extend(
[
f" * - ``{matcher_name}``",
f" - {rendered_hooks}",
]
Comment thread
yaakov-stein marked this conversation as resolved.
)

lines.append("")
return "\n".join(lines)


def render(
matcher_c: pathlib.Path,
matcher_h: pathlib.Path,
hook_h: pathlib.Path,
) -> str:
matcher_content = _read(matcher_c)
matcher_order = _read_enum_order(
matcher_h, "bf_matcher_type", "BF_MATCHER_"
)
hook_order = _read_enum_order(hook_h, "bf_hook", "BF_HOOK_")
matcher_names = _read_string_map(
matcher_c, "_bf_matcher_type_strs", "BF_MATCHER_"
)
hook_macros = _read_hook_macros(matcher_c)
matcher_blocks = _extract_initializer_blocks(
matcher_content, "_bf_matcher_metas"
)

unsupported: dict[str, list[str]] = {}
for matcher, block in matcher_blocks.items():
unsupported[matcher] = _expand_hooks(
_extract_unsupported_hooks(block),
hook_macros,
)
_validate_hooks(matcher, unsupported[matcher], hook_order)

return _render_table(matcher_order, matcher_names, hook_order, unsupported)


def generate(
matcher_c: pathlib.Path,
matcher_h: pathlib.Path,
hook_h: pathlib.Path,
output: pathlib.Path,
) -> str:
rendered = render(matcher_c, matcher_h, hook_h)
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(rendered, encoding="utf-8")
return rendered


def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--matcher-c", type=pathlib.Path, required=True)
parser.add_argument("--matcher-h", type=pathlib.Path, required=True)
parser.add_argument("--hook-h", type=pathlib.Path, required=True)
parser.add_argument("--output", type=pathlib.Path, required=True)
parser.add_argument("--check", action="store_true")
args = parser.parse_args()

if args.check:
Comment thread
yaakov-stein marked this conversation as resolved.
rendered = render(args.matcher_c, args.matcher_h, args.hook_h)
if not args.output.exists():
print(
(
f"{args.output} does not exist; run without --check "
"to generate it"
),
file=sys.stderr,
)
return 1

current = _read(args.output)
if current != rendered:
print(f"{args.output} is out of date", file=sys.stderr)
Comment thread
yaakov-stein marked this conversation as resolved.
return 1
else:
generate(args.matcher_c, args.matcher_h, args.hook_h, args.output)

return 0


if __name__ == "__main__":
raise SystemExit(main())
Loading
Loading