Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "harness-python-react"
version = "0.2.4"
version = "0.2.5"
description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
readme = "README.md"
requires-python = ">=3.14"
Expand Down
225 changes: 225 additions & 0 deletions tests/test_otel_semconv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""Assertions that emitted spans use OTel semantic-convention attribute names.

Test-side enforcement of the rule "use the official OTel semconv attribute
names where one exists" (see `src/observability/spans.py` and
`docs/INVARIANTS.md`).

The scaffold ships only semconv-defined keys (no custom ones); the
`ALLOWED_CUSTOM_KEYS` set below is intentionally empty. When a project
that extends this template needs a custom attribute (e.g.
`agent.tool_calls_count`), it MUST add a constant to
`src/observability/spans.py` AND register it in `ALLOWED_CUSTOM_KEYS` —
the registry-closure test fails otherwise. That's the forcing function
that keeps custom keys to their documented set rather than letting them
proliferate as one-off raw strings.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

import pytest
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
InMemorySpanExporter,
)

from src.observability import spans as spans_module
from src.observability.spans import (
DB_QUERY_TEXT,
DB_RESPONSE_RETURNED_ROWS,
GENAI_CONVERSATION_ID,
GENAI_OPERATION_NAME,
GENAI_PROVIDER_NAME,
GENAI_REQUEST_MODEL,
GENAI_TOOL_CALL_ARGUMENTS,
GENAI_TOOL_CALL_RESULT,
GENAI_TOOL_NAME,
GENAI_USAGE_INPUT_TOKENS,
GENAI_USAGE_OUTPUT_TOKENS,
agent_span,
)

if TYPE_CHECKING:
from collections.abc import Sequence

from opentelemetry.sdk.trace import ReadableSpan


SEMCONV_KEYS: frozenset[str] = frozenset(
{
GENAI_CONVERSATION_ID,
GENAI_REQUEST_MODEL,
GENAI_OPERATION_NAME,
GENAI_PROVIDER_NAME,
GENAI_USAGE_INPUT_TOKENS,
GENAI_USAGE_OUTPUT_TOKENS,
GENAI_TOOL_NAME,
GENAI_TOOL_CALL_ARGUMENTS,
GENAI_TOOL_CALL_RESULT,
DB_QUERY_TEXT,
DB_RESPONSE_RETURNED_ROWS,
}
)

# Custom keys allowed because no semconv equivalent exists. Empty in the
# scaffold — projects that extend this template add entries here as they
# add custom span attributes (and constants in `src/observability/spans.py`).
ALLOWED_CUSTOM_KEYS: frozenset[str] = frozenset()

ACCEPTED_KEYS: frozenset[str] = SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS


@pytest.fixture
def memory_exporter(monkeypatch: pytest.MonkeyPatch) -> InMemorySpanExporter:
"""Install a TracerProvider that records spans into an in-memory exporter.

Uses `monkeypatch.setattr` to swap `trace.get_tracer_provider` for the
test's lifetime instead of `trace.set_tracer_provider` — the OTel SDK
refuses to override the global provider once any other test has called
`set_tracer_provider`, which would silently route spans to the wrong
exporter.

Uses `SimpleSpanProcessor` so spans are visible to `get_finished_spans()`
immediately after the span context exits — no flush race.
"""
exporter = InMemorySpanExporter()
provider = TracerProvider()
provider.add_span_processor(SimpleSpanProcessor(exporter))
monkeypatch.setattr(trace, "get_tracer_provider", lambda: provider)
return exporter


def _span_by_name(spans: Sequence[ReadableSpan], name: str) -> ReadableSpan:
"""Return the unique span with the given name (assert exactly one)."""
matches = [s for s in spans if s.name == name]
assert len(matches) == 1, (
f"expected exactly one span named {name!r}, got {len(matches)} "
f"(all names: {[s.name for s in spans]})"
)
return matches[0]


def test_tool_call_span_carries_genai_tool_attributes(
memory_exporter: InMemorySpanExporter,
) -> None:
"""A tool-call span carries the documented `gen_ai.tool.*` keys."""
tool_attrs: dict[str, str | int] = {
GENAI_TOOL_NAME: "echo",
GENAI_TOOL_CALL_ARGUMENTS: '{"msg": "hi"}',
GENAI_TOOL_CALL_RESULT: '{"echoed": "hi"}',
}
with agent_span("tool.echo", tool_attrs):
pass

spans = memory_exporter.get_finished_spans()
tool_span = _span_by_name(spans, "tool.echo")
assert tool_span.attributes is not None
keys = set(tool_span.attributes)

assert GENAI_TOOL_NAME in keys
assert GENAI_TOOL_CALL_ARGUMENTS in keys
assert GENAI_TOOL_CALL_RESULT in keys
assert tool_span.attributes[GENAI_TOOL_NAME] == "echo"


def test_llm_call_span_carries_genai_attributes(
memory_exporter: InMemorySpanExporter,
) -> None:
"""An LLM-style span carries the documented `gen_ai.*` keys + token usage."""
llm_attrs: dict[str, str | int] = {
GENAI_CONVERSATION_ID: "session-test-1",
GENAI_REQUEST_MODEL: "gpt-4o-mini",
GENAI_OPERATION_NAME: "chat",
GENAI_PROVIDER_NAME: "openai",
GENAI_USAGE_INPUT_TOKENS: 42,
GENAI_USAGE_OUTPUT_TOKENS: 100,
}
with agent_span("agent.llm_call", llm_attrs):
pass

spans = memory_exporter.get_finished_spans()
llm_span = _span_by_name(spans, "agent.llm_call")
assert llm_span.attributes is not None

for key, value in llm_attrs.items():
assert key in llm_span.attributes, f"missing {key}"
assert llm_span.attributes[key] == value


def test_db_query_span_carries_db_attributes(
memory_exporter: InMemorySpanExporter,
) -> None:
"""A DB-query span carries `db.query.text` + `db.response.returned_rows`."""
db_attrs: dict[str, str | int] = {
DB_QUERY_TEXT: "SELECT 1",
DB_RESPONSE_RETURNED_ROWS: 1,
}
with agent_span("db.example_query", db_attrs):
pass

spans = memory_exporter.get_finished_spans()
db_span = _span_by_name(spans, "db.example_query")
assert db_span.attributes is not None
keys = set(db_span.attributes)

assert DB_QUERY_TEXT in keys
assert DB_RESPONSE_RETURNED_ROWS in keys


def test_no_unregistered_attribute_keys_emitted(
memory_exporter: InMemorySpanExporter,
) -> None:
"""Every emitted attribute key is in `SEMCONV_KEYS` or `ALLOWED_CUSTOM_KEYS`."""
with agent_span(
"tool.echo",
{
GENAI_TOOL_NAME: "echo",
GENAI_TOOL_CALL_ARGUMENTS: "{}",
GENAI_TOOL_CALL_RESULT: "{}",
},
):
pass

spans = memory_exporter.get_finished_spans()

rogue: list[tuple[str, str]] = []
for span in spans:
if span.attributes is None:
continue
for key in span.attributes:
if key not in ACCEPTED_KEYS:
rogue.append((span.name, key))

assert not rogue, (
"Span attributes emitted with keys outside the documented registry: "
f"{rogue}. Either use a semconv name from `src/observability/spans.py` "
"or add the new key to both `spans.py` (with a comment explaining why "
"no semconv equivalent applies) and `ALLOWED_CUSTOM_KEYS` in this test."
)


def test_spans_module_constants_are_registered() -> None:
"""Every uppercase `str` constant in `spans.py` belongs to the test allowlist.

The allowlist is `SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS`. Catches the
"added a new constant in `spans.py` without updating the test" drift class.
If this fails, either:

- the new constant is a semconv name → add it to `SEMCONV_KEYS`.
- the new constant is a custom name → add it to `ALLOWED_CUSTOM_KEYS`
AND ensure the comment in `spans.py` explains why no semconv applies.
"""
declared = {
getattr(spans_module, name)
for name in dir(spans_module)
if name.isupper() and isinstance(getattr(spans_module, name), str)
}
unregistered = declared - ACCEPTED_KEYS
assert not unregistered, (
f"`spans.py` defines constants not in the test allowlist: "
f"{sorted(unregistered)!r}. Update `SEMCONV_KEYS` or "
"`ALLOWED_CUSTOM_KEYS` in this test."
)
105 changes: 105 additions & 0 deletions tests/test_route_versioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""Audit every FastAPI route is `/api/v*/...` or in the documented allowlist.

Backstops invariant 7 in `docs/INVARIANTS.md` (was *aspirational on test*).
A future contributor adding `@app.get("/foo")` instead of registering the
endpoint on the versioned `APIRouter(prefix="/api/v1")` would silently
introduce an un-versioned route — breaking the API-versioning contract from
`docs/DEVELOPMENT.md` without any gate firing. This test is the gate.
"""

from __future__ import annotations

import re

from src.api.main import app

VERSIONED_PREFIX = re.compile(r"^/api/v\d+/")

# Routes deliberately NOT under `/api/v*/`. Add an entry here only with a
# comment naming the structural reason — never just "we needed an exception".
UNVERSIONED_ALLOWLIST: frozenset[str] = frozenset(
{
# FastAPI auto-generated documentation surface. Disabled by passing
# docs_url=None / redoc_url=None to FastAPI() if needed; the
# scaffold keeps them enabled at the unversioned root.
"/openapi.json",
"/docs",
"/docs/oauth2-redirect",
"/redoc",
}
)


def test_every_route_is_versioned_or_allowlisted() -> None:
"""Every routable path is `/api/v*/` or in the explicit allowlist."""
offenders: list[str] = []
for route in app.routes:
path = getattr(route, "path", None)
if path is None:
# Not an APIRoute (e.g. Mount). Skip cleanly — `path` is the
# discriminator we care about.
continue
if path in UNVERSIONED_ALLOWLIST:
continue
if not VERSIONED_PREFIX.match(path):
offenders.append(path)

assert not offenders, (
f"un-versioned route(s) detected in app.routes: {offenders!r}. Per "
"docs/DEVELOPMENT.md API Versioning, every routable path must match "
"`/api/v*/<…>` or be added to UNVERSIONED_ALLOWLIST in this test "
"with a comment naming the structural reason. Common entry points "
'for an un-versioned path: a router wired without `prefix="/api/vN"`, '
"an `@app.<verb>` decorator on the FastAPI app instead of a versioned "
"APIRouter, a manual `app.add_api_route(...)` call, or a Mount that "
"exposes paths at the unversioned root. Fix at the wiring site or, "
"if the path is genuinely unversioned by design (cf. `/api/health`), "
"add it to the allowlist."
)


def test_allowlist_has_no_dead_entries() -> None:
"""Every UNVERSIONED_ALLOWLIST entry must correspond to a real route.

Without this assertion, removing a docs surface (e.g. constructing
FastAPI with `docs_url=None`) leaves `/docs` in the allowlist as cruft;
over time the allowlist becomes a graveyard that masks what is
*actually* deliberately unversioned. Any drop-out fails fast and forces
a deletion in the same PR.
"""
actual_paths = {getattr(r, "path", None) for r in app.routes}
actual_paths.discard(None)
dead = sorted(UNVERSIONED_ALLOWLIST - actual_paths)
assert not dead, (
f"UNVERSIONED_ALLOWLIST entries no longer present in app.routes: "
f"{dead}. Remove the dead entry — keeping it is allowlist cruft. "
"If the path was intentionally removed (e.g. FastAPI docs disabled "
"via `docs_url=None`), drop the corresponding allowlist line in "
"this same PR so the allowlist tracks reality."
)


# ---------- Regex sanity checks ----------
#
# Belt-and-braces against the allowlist swallowing a typo that the route
# walk wouldn't catch (e.g. someone mutating the regex to be too permissive).


def test_regex_accepts_v1_routes() -> None:
assert VERSIONED_PREFIX.match("/api/v1/chat")
assert VERSIONED_PREFIX.match("/api/v1/sessions")
assert VERSIONED_PREFIX.match("/api/v1/sessions/{session_id}")


def test_regex_accepts_future_major_versions() -> None:
"""`/api/v2/...` and beyond are accepted; the rule isn't v1-only."""
assert VERSIONED_PREFIX.match("/api/v2/chat")
assert VERSIONED_PREFIX.match("/api/v10/some-endpoint")


def test_regex_rejects_unversioned_paths() -> None:
assert not VERSIONED_PREFIX.match("/foo")
assert not VERSIONED_PREFIX.match("/api/foo")
assert not VERSIONED_PREFIX.match("/api/health") # in allowlist, not regex
assert not VERSIONED_PREFIX.match("/api/v/chat") # missing version digit
assert not VERSIONED_PREFIX.match("/api/version1/chat") # wrong prefix shape
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading