diff --git a/pyproject.toml b/pyproject.toml index a143b0d..3fafc5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.4" +version = "0.2.5" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/tests/test_otel_semconv.py b/tests/test_otel_semconv.py new file mode 100644 index 0000000..f318818 --- /dev/null +++ b/tests/test_otel_semconv.py @@ -0,0 +1,225 @@ +"""Assertions that emitted spans use OTel semantic-convention attribute names. + +Test-side enforcement of the rule "use the official OTel semconv attribute +names where one exists" (see `src/observability/spans.py` and +`docs/INVARIANTS.md`). + +The scaffold ships only semconv-defined keys (no custom ones); the +`ALLOWED_CUSTOM_KEYS` set below is intentionally empty. When a project +that extends this template needs a custom attribute (e.g. +`agent.tool_calls_count`), it MUST add a constant to +`src/observability/spans.py` AND register it in `ALLOWED_CUSTOM_KEYS` — +the registry-closure test fails otherwise. That's the forcing function +that keeps custom keys to their documented set rather than letting them +proliferate as one-off raw strings. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) + +from src.observability import spans as spans_module +from src.observability.spans import ( + DB_QUERY_TEXT, + DB_RESPONSE_RETURNED_ROWS, + GENAI_CONVERSATION_ID, + GENAI_OPERATION_NAME, + GENAI_PROVIDER_NAME, + GENAI_REQUEST_MODEL, + GENAI_TOOL_CALL_ARGUMENTS, + GENAI_TOOL_CALL_RESULT, + GENAI_TOOL_NAME, + GENAI_USAGE_INPUT_TOKENS, + GENAI_USAGE_OUTPUT_TOKENS, + agent_span, +) + +if TYPE_CHECKING: + from collections.abc import Sequence + + from opentelemetry.sdk.trace import ReadableSpan + + +SEMCONV_KEYS: frozenset[str] = frozenset( + { + GENAI_CONVERSATION_ID, + GENAI_REQUEST_MODEL, + GENAI_OPERATION_NAME, + GENAI_PROVIDER_NAME, + GENAI_USAGE_INPUT_TOKENS, + GENAI_USAGE_OUTPUT_TOKENS, + GENAI_TOOL_NAME, + GENAI_TOOL_CALL_ARGUMENTS, + GENAI_TOOL_CALL_RESULT, + DB_QUERY_TEXT, + DB_RESPONSE_RETURNED_ROWS, + } +) + +# Custom keys allowed because no semconv equivalent exists. Empty in the +# scaffold — projects that extend this template add entries here as they +# add custom span attributes (and constants in `src/observability/spans.py`). +ALLOWED_CUSTOM_KEYS: frozenset[str] = frozenset() + +ACCEPTED_KEYS: frozenset[str] = SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS + + +@pytest.fixture +def memory_exporter(monkeypatch: pytest.MonkeyPatch) -> InMemorySpanExporter: + """Install a TracerProvider that records spans into an in-memory exporter. + + Uses `monkeypatch.setattr` to swap `trace.get_tracer_provider` for the + test's lifetime instead of `trace.set_tracer_provider` — the OTel SDK + refuses to override the global provider once any other test has called + `set_tracer_provider`, which would silently route spans to the wrong + exporter. + + Uses `SimpleSpanProcessor` so spans are visible to `get_finished_spans()` + immediately after the span context exits — no flush race. + """ + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + monkeypatch.setattr(trace, "get_tracer_provider", lambda: provider) + return exporter + + +def _span_by_name(spans: Sequence[ReadableSpan], name: str) -> ReadableSpan: + """Return the unique span with the given name (assert exactly one).""" + matches = [s for s in spans if s.name == name] + assert len(matches) == 1, ( + f"expected exactly one span named {name!r}, got {len(matches)} " + f"(all names: {[s.name for s in spans]})" + ) + return matches[0] + + +def test_tool_call_span_carries_genai_tool_attributes( + memory_exporter: InMemorySpanExporter, +) -> None: + """A tool-call span carries the documented `gen_ai.tool.*` keys.""" + tool_attrs: dict[str, str | int] = { + GENAI_TOOL_NAME: "echo", + GENAI_TOOL_CALL_ARGUMENTS: '{"msg": "hi"}', + GENAI_TOOL_CALL_RESULT: '{"echoed": "hi"}', + } + with agent_span("tool.echo", tool_attrs): + pass + + spans = memory_exporter.get_finished_spans() + tool_span = _span_by_name(spans, "tool.echo") + assert tool_span.attributes is not None + keys = set(tool_span.attributes) + + assert GENAI_TOOL_NAME in keys + assert GENAI_TOOL_CALL_ARGUMENTS in keys + assert GENAI_TOOL_CALL_RESULT in keys + assert tool_span.attributes[GENAI_TOOL_NAME] == "echo" + + +def test_llm_call_span_carries_genai_attributes( + memory_exporter: InMemorySpanExporter, +) -> None: + """An LLM-style span carries the documented `gen_ai.*` keys + token usage.""" + llm_attrs: dict[str, str | int] = { + GENAI_CONVERSATION_ID: "session-test-1", + GENAI_REQUEST_MODEL: "gpt-4o-mini", + GENAI_OPERATION_NAME: "chat", + GENAI_PROVIDER_NAME: "openai", + GENAI_USAGE_INPUT_TOKENS: 42, + GENAI_USAGE_OUTPUT_TOKENS: 100, + } + with agent_span("agent.llm_call", llm_attrs): + pass + + spans = memory_exporter.get_finished_spans() + llm_span = _span_by_name(spans, "agent.llm_call") + assert llm_span.attributes is not None + + for key, value in llm_attrs.items(): + assert key in llm_span.attributes, f"missing {key}" + assert llm_span.attributes[key] == value + + +def test_db_query_span_carries_db_attributes( + memory_exporter: InMemorySpanExporter, +) -> None: + """A DB-query span carries `db.query.text` + `db.response.returned_rows`.""" + db_attrs: dict[str, str | int] = { + DB_QUERY_TEXT: "SELECT 1", + DB_RESPONSE_RETURNED_ROWS: 1, + } + with agent_span("db.example_query", db_attrs): + pass + + spans = memory_exporter.get_finished_spans() + db_span = _span_by_name(spans, "db.example_query") + assert db_span.attributes is not None + keys = set(db_span.attributes) + + assert DB_QUERY_TEXT in keys + assert DB_RESPONSE_RETURNED_ROWS in keys + + +def test_no_unregistered_attribute_keys_emitted( + memory_exporter: InMemorySpanExporter, +) -> None: + """Every emitted attribute key is in `SEMCONV_KEYS` or `ALLOWED_CUSTOM_KEYS`.""" + with agent_span( + "tool.echo", + { + GENAI_TOOL_NAME: "echo", + GENAI_TOOL_CALL_ARGUMENTS: "{}", + GENAI_TOOL_CALL_RESULT: "{}", + }, + ): + pass + + spans = memory_exporter.get_finished_spans() + + rogue: list[tuple[str, str]] = [] + for span in spans: + if span.attributes is None: + continue + for key in span.attributes: + if key not in ACCEPTED_KEYS: + rogue.append((span.name, key)) + + assert not rogue, ( + "Span attributes emitted with keys outside the documented registry: " + f"{rogue}. Either use a semconv name from `src/observability/spans.py` " + "or add the new key to both `spans.py` (with a comment explaining why " + "no semconv equivalent applies) and `ALLOWED_CUSTOM_KEYS` in this test." + ) + + +def test_spans_module_constants_are_registered() -> None: + """Every uppercase `str` constant in `spans.py` belongs to the test allowlist. + + The allowlist is `SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS`. Catches the + "added a new constant in `spans.py` without updating the test" drift class. + If this fails, either: + + - the new constant is a semconv name → add it to `SEMCONV_KEYS`. + - the new constant is a custom name → add it to `ALLOWED_CUSTOM_KEYS` + AND ensure the comment in `spans.py` explains why no semconv applies. + """ + declared = { + getattr(spans_module, name) + for name in dir(spans_module) + if name.isupper() and isinstance(getattr(spans_module, name), str) + } + unregistered = declared - ACCEPTED_KEYS + assert not unregistered, ( + f"`spans.py` defines constants not in the test allowlist: " + f"{sorted(unregistered)!r}. Update `SEMCONV_KEYS` or " + "`ALLOWED_CUSTOM_KEYS` in this test." + ) diff --git a/tests/test_route_versioning.py b/tests/test_route_versioning.py new file mode 100644 index 0000000..450c736 --- /dev/null +++ b/tests/test_route_versioning.py @@ -0,0 +1,105 @@ +"""Audit every FastAPI route is `/api/v*/...` or in the documented allowlist. + +Backstops invariant 7 in `docs/INVARIANTS.md` (was *aspirational on test*). +A future contributor adding `@app.get("/foo")` instead of registering the +endpoint on the versioned `APIRouter(prefix="/api/v1")` would silently +introduce an un-versioned route — breaking the API-versioning contract from +`docs/DEVELOPMENT.md` without any gate firing. This test is the gate. +""" + +from __future__ import annotations + +import re + +from src.api.main import app + +VERSIONED_PREFIX = re.compile(r"^/api/v\d+/") + +# Routes deliberately NOT under `/api/v*/`. Add an entry here only with a +# comment naming the structural reason — never just "we needed an exception". +UNVERSIONED_ALLOWLIST: frozenset[str] = frozenset( + { + # FastAPI auto-generated documentation surface. Disabled by passing + # docs_url=None / redoc_url=None to FastAPI() if needed; the + # scaffold keeps them enabled at the unversioned root. + "/openapi.json", + "/docs", + "/docs/oauth2-redirect", + "/redoc", + } +) + + +def test_every_route_is_versioned_or_allowlisted() -> None: + """Every routable path is `/api/v*/` or in the explicit allowlist.""" + offenders: list[str] = [] + for route in app.routes: + path = getattr(route, "path", None) + if path is None: + # Not an APIRoute (e.g. Mount). Skip cleanly — `path` is the + # discriminator we care about. + continue + if path in UNVERSIONED_ALLOWLIST: + continue + if not VERSIONED_PREFIX.match(path): + offenders.append(path) + + assert not offenders, ( + f"un-versioned route(s) detected in app.routes: {offenders!r}. Per " + "docs/DEVELOPMENT.md API Versioning, every routable path must match " + "`/api/v*/<…>` or be added to UNVERSIONED_ALLOWLIST in this test " + "with a comment naming the structural reason. Common entry points " + 'for an un-versioned path: a router wired without `prefix="/api/vN"`, ' + "an `@app.` decorator on the FastAPI app instead of a versioned " + "APIRouter, a manual `app.add_api_route(...)` call, or a Mount that " + "exposes paths at the unversioned root. Fix at the wiring site or, " + "if the path is genuinely unversioned by design (cf. `/api/health`), " + "add it to the allowlist." + ) + + +def test_allowlist_has_no_dead_entries() -> None: + """Every UNVERSIONED_ALLOWLIST entry must correspond to a real route. + + Without this assertion, removing a docs surface (e.g. constructing + FastAPI with `docs_url=None`) leaves `/docs` in the allowlist as cruft; + over time the allowlist becomes a graveyard that masks what is + *actually* deliberately unversioned. Any drop-out fails fast and forces + a deletion in the same PR. + """ + actual_paths = {getattr(r, "path", None) for r in app.routes} + actual_paths.discard(None) + dead = sorted(UNVERSIONED_ALLOWLIST - actual_paths) + assert not dead, ( + f"UNVERSIONED_ALLOWLIST entries no longer present in app.routes: " + f"{dead}. Remove the dead entry — keeping it is allowlist cruft. " + "If the path was intentionally removed (e.g. FastAPI docs disabled " + "via `docs_url=None`), drop the corresponding allowlist line in " + "this same PR so the allowlist tracks reality." + ) + + +# ---------- Regex sanity checks ---------- +# +# Belt-and-braces against the allowlist swallowing a typo that the route +# walk wouldn't catch (e.g. someone mutating the regex to be too permissive). + + +def test_regex_accepts_v1_routes() -> None: + assert VERSIONED_PREFIX.match("/api/v1/chat") + assert VERSIONED_PREFIX.match("/api/v1/sessions") + assert VERSIONED_PREFIX.match("/api/v1/sessions/{session_id}") + + +def test_regex_accepts_future_major_versions() -> None: + """`/api/v2/...` and beyond are accepted; the rule isn't v1-only.""" + assert VERSIONED_PREFIX.match("/api/v2/chat") + assert VERSIONED_PREFIX.match("/api/v10/some-endpoint") + + +def test_regex_rejects_unversioned_paths() -> None: + assert not VERSIONED_PREFIX.match("/foo") + assert not VERSIONED_PREFIX.match("/api/foo") + assert not VERSIONED_PREFIX.match("/api/health") # in allowlist, not regex + assert not VERSIONED_PREFIX.match("/api/v/chat") # missing version digit + assert not VERSIONED_PREFIX.match("/api/version1/chat") # wrong prefix shape diff --git a/uv.lock b/uv.lock index a444275..bd15374 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.4" +version = "0.2.5" source = { virtual = "." } dependencies = [ { name = "fastapi" },