Skip to content

Commit 8116629

Browse files
authored
feat: observability setup (OTel SDK, OTLP exporter, structured logging, span helpers) (#19) (#56)
1 parent 6a3516b commit 8116629

7 files changed

Lines changed: 610 additions & 0 deletions

File tree

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ dependencies = [
3434
"pydantic>=2.11.0",
3535
"pydantic-settings>=2.9.0",
3636
"httpx>=0.28.1",
37+
"opentelemetry-api>=1.33.0",
38+
"opentelemetry-sdk>=1.33.0",
39+
"opentelemetry-exporter-otlp-proto-grpc>=1.33.0",
40+
"opentelemetry-instrumentation-fastapi>=0.62b0",
41+
"opentelemetry-instrumentation-httpx>=0.54b0",
42+
"opentelemetry-instrumentation-logging>=0.54b0",
3743
]
3844

3945
[project.optional-dependencies]

src/api/main.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212

1313
from src.api.routes import router as v1_router
1414
from src.api.sessions import SessionStore
15+
from src.observability.logging import setup_logging
16+
from src.observability.tracing import (
17+
instrument_fastapi,
18+
instrument_httpx,
19+
setup_tracing,
20+
)
1521

1622
if TYPE_CHECKING:
1723
from collections.abc import AsyncIterator
@@ -36,6 +42,10 @@ def _package_version() -> str:
3642
@asynccontextmanager
3743
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
3844
"""Application lifespan: initialise process-wide services on startup."""
45+
setup_tracing()
46+
setup_logging()
47+
instrument_httpx()
48+
instrument_fastapi(app)
3949
app.state.session_store = SessionStore()
4050
logger.info("harness-python-react API started (v%s)", _package_version())
4151
yield

src/observability/logging.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Structured JSON logging with OpenTelemetry trace correlation."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import logging
7+
import os
8+
import sys
9+
from datetime import UTC, datetime
10+
11+
12+
class _JSONFormatter(logging.Formatter):
13+
"""Format log records as single-line JSON with OTel trace context."""
14+
15+
def format(self, record: logging.LogRecord) -> str:
16+
entry: dict[str, str | int | float] = {
17+
"timestamp": datetime.fromtimestamp(record.created, tz=UTC).isoformat(),
18+
"level": record.levelname,
19+
"module": record.module,
20+
"message": record.getMessage(),
21+
}
22+
23+
# Trace context injected by OTel logging instrumentation
24+
entry["trace_id"] = getattr(record, "otelTraceID", "0")
25+
entry["span_id"] = getattr(record, "otelSpanID", "0")
26+
27+
return json.dumps(entry, default=str)
28+
29+
30+
def setup_logging(level: str | None = None) -> None:
31+
"""Configure Python logging with structured JSON output and OTel correlation.
32+
33+
Reads ``LOG_LEVEL`` from the environment if *level* is not provided.
34+
35+
Args:
36+
level: Logging level name (e.g. "INFO", "DEBUG"). Falls back to the
37+
``LOG_LEVEL`` environment variable, then to ``"INFO"``.
38+
"""
39+
from opentelemetry.instrumentation.logging import LoggingInstrumentor
40+
41+
resolved_level = (level if level else os.getenv("LOG_LEVEL", "INFO")).upper()
42+
43+
# Instrument stdlib logging so trace/span IDs are injected
44+
LoggingInstrumentor().instrument(set_logging_format=False)
45+
46+
root = logging.getLogger()
47+
root.setLevel(resolved_level)
48+
49+
# Remove existing handlers to avoid duplicate output
50+
root.handlers.clear()
51+
52+
handler = logging.StreamHandler(sys.stderr)
53+
handler.setFormatter(_JSONFormatter())
54+
root.addHandler(handler)

src/observability/spans.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""Span helper utilities and OTel semantic-convention attribute keys.
2+
3+
Use semconv-defined attribute names where one exists. The full GenAI registry
4+
is at https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/
5+
and the DB registry at https://opentelemetry.io/docs/specs/semconv/database/.
6+
7+
Semconv-stable attribute keys live as module constants so a typo at the
8+
call site is a NameError, not a silently-different attribute.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
from contextlib import contextmanager
14+
from typing import TYPE_CHECKING
15+
16+
from opentelemetry import trace
17+
18+
if TYPE_CHECKING:
19+
from collections.abc import Iterator, Mapping
20+
21+
# ---------------------------------------------------------------------------
22+
# GenAI semantic convention attribute keys
23+
# https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/
24+
# ---------------------------------------------------------------------------
25+
26+
# GenAI — conversation & request
27+
GENAI_CONVERSATION_ID: str = "gen_ai.conversation.id"
28+
GENAI_REQUEST_MODEL: str = "gen_ai.request.model"
29+
GENAI_OPERATION_NAME: str = "gen_ai.operation.name"
30+
GENAI_PROVIDER_NAME: str = "gen_ai.provider.name"
31+
32+
# GenAI — usage (token counts)
33+
GENAI_USAGE_INPUT_TOKENS: str = "gen_ai.usage.input_tokens"
34+
GENAI_USAGE_OUTPUT_TOKENS: str = "gen_ai.usage.output_tokens"
35+
36+
# GenAI — tool calling
37+
GENAI_TOOL_NAME: str = "gen_ai.tool.name"
38+
GENAI_TOOL_CALL_ARGUMENTS: str = "gen_ai.tool.call.arguments"
39+
GENAI_TOOL_CALL_RESULT: str = "gen_ai.tool.call.result"
40+
41+
# DB semantic convention attribute keys
42+
# https://opentelemetry.io/docs/specs/semconv/database/
43+
DB_QUERY_TEXT: str = "db.query.text"
44+
DB_RESPONSE_RETURNED_ROWS: str = "db.response.returned_rows"
45+
46+
47+
# ---------------------------------------------------------------------------
48+
# Span helpers
49+
# ---------------------------------------------------------------------------
50+
51+
52+
@contextmanager
53+
def agent_span(
54+
name: str,
55+
attributes: Mapping[str, str | int | float | bool] | None = None,
56+
) -> Iterator[trace.Span]:
57+
"""Create a span and yield it for further mutation.
58+
59+
Args:
60+
name: Human-readable span name.
61+
attributes: Initial key-value attributes to set on the span.
62+
"""
63+
tracer = trace.get_tracer(__name__)
64+
with tracer.start_as_current_span(name) as span:
65+
if attributes:
66+
for key, value in attributes.items():
67+
span.set_attribute(key, value)
68+
yield span
69+
70+
71+
def set_span_attributes(
72+
span: trace.Span,
73+
**kwargs: str | int | float | bool | None,
74+
) -> None:
75+
"""Set multiple attributes on a span, filtering ``None`` values.
76+
77+
Args:
78+
span: The span to annotate.
79+
**kwargs: Attribute key-value pairs. ``None`` values are silently
80+
skipped.
81+
"""
82+
for key, value in kwargs.items():
83+
if value is not None:
84+
span.set_attribute(key, value)

src/observability/tracing.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""Tracer provider setup, exporters, and auto-instrumentation."""
2+
3+
from __future__ import annotations
4+
5+
import os
6+
from typing import TYPE_CHECKING
7+
8+
from opentelemetry import trace
9+
from opentelemetry.sdk.resources import Resource
10+
from opentelemetry.sdk.trace import TracerProvider
11+
from opentelemetry.sdk.trace.export import (
12+
BatchSpanProcessor,
13+
ConsoleSpanExporter,
14+
)
15+
16+
if TYPE_CHECKING:
17+
from fastapi import FastAPI
18+
19+
20+
def setup_tracing() -> TracerProvider:
21+
"""Create and configure the global tracer provider.
22+
23+
Reads configuration from environment variables:
24+
25+
- ``OTEL_SERVICE_NAME`` — Resource ``service.name`` attribute.
26+
Default ``harness-python-react``.
27+
- ``OTEL_EXPORTER`` — ``otlp`` (default) or ``console``.
28+
- ``OTEL_EXPORTER_OTLP_ENDPOINT`` — gRPC endpoint for OTLP exporter
29+
(default ``http://localhost:4317``;
30+
Jaeger via docker-compose).
31+
32+
Returns the configured TracerProvider.
33+
"""
34+
service_name = os.getenv("OTEL_SERVICE_NAME", "harness-python-react")
35+
resource = Resource.create({"service.name": service_name})
36+
provider = TracerProvider(resource=resource)
37+
38+
exporter_type = os.getenv("OTEL_EXPORTER", "otlp")
39+
40+
if exporter_type == "console":
41+
provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
42+
else:
43+
endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317")
44+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
45+
OTLPSpanExporter,
46+
)
47+
48+
exporter = OTLPSpanExporter(endpoint=endpoint, insecure=True)
49+
provider.add_span_processor(BatchSpanProcessor(exporter))
50+
51+
trace.set_tracer_provider(provider)
52+
return provider
53+
54+
55+
def get_tracer(name: str) -> trace.Tracer:
56+
"""Get a tracer from the global provider.
57+
58+
Args:
59+
name: Logical name for the tracer, typically the module path.
60+
"""
61+
return trace.get_tracer(name)
62+
63+
64+
def instrument_fastapi(app: FastAPI) -> None:
65+
"""Apply OpenTelemetry auto-instrumentation to a FastAPI application."""
66+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
67+
68+
FastAPIInstrumentor.instrument_app(app)
69+
70+
71+
def instrument_httpx() -> None:
72+
"""Apply OpenTelemetry auto-instrumentation to httpx HTTP clients."""
73+
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
74+
75+
HTTPXClientInstrumentor().instrument()

tests/test_observability.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""Tests for src/observability/ — tracing, logging, span helpers."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import logging
7+
from io import StringIO
8+
9+
import pytest
10+
from opentelemetry import trace
11+
from opentelemetry.sdk.trace import TracerProvider
12+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
13+
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
14+
InMemorySpanExporter,
15+
)
16+
17+
from src.observability.logging import _JSONFormatter, setup_logging
18+
from src.observability.spans import (
19+
GENAI_CONVERSATION_ID,
20+
GENAI_OPERATION_NAME,
21+
GENAI_REQUEST_MODEL,
22+
GENAI_TOOL_NAME,
23+
agent_span,
24+
set_span_attributes,
25+
)
26+
27+
28+
@pytest.fixture()
29+
def exporter() -> InMemorySpanExporter:
30+
"""Attach an in-memory span exporter to the (possibly pre-existing)
31+
global tracer provider. Clears captured spans on entry so each test
32+
sees a clean slate even when OTel's provider override has already
33+
been set by a prior test."""
34+
captured = InMemorySpanExporter()
35+
provider = trace.get_tracer_provider()
36+
if not isinstance(provider, TracerProvider):
37+
provider = TracerProvider()
38+
trace.set_tracer_provider(provider)
39+
provider.add_span_processor(SimpleSpanProcessor(captured))
40+
captured.clear()
41+
return captured
42+
43+
44+
def test_agent_span_records(exporter: InMemorySpanExporter) -> None:
45+
with agent_span("op", attributes={GENAI_OPERATION_NAME: "echo"}):
46+
pass
47+
48+
spans = exporter.get_finished_spans()
49+
assert len(spans) == 1
50+
assert spans[0].name == "op"
51+
attrs = dict(spans[0].attributes or {})
52+
assert attrs[GENAI_OPERATION_NAME] == "echo"
53+
54+
55+
def test_set_span_attributes_skips_none(exporter: InMemorySpanExporter) -> None:
56+
with agent_span("op") as span:
57+
set_span_attributes(
58+
span,
59+
**{
60+
GENAI_REQUEST_MODEL: "gpt-4o-mini",
61+
GENAI_CONVERSATION_ID: None,
62+
GENAI_TOOL_NAME: "echo_tool",
63+
},
64+
)
65+
66+
spans = exporter.get_finished_spans()
67+
attrs = dict(spans[0].attributes or {})
68+
assert attrs[GENAI_REQUEST_MODEL] == "gpt-4o-mini"
69+
assert attrs[GENAI_TOOL_NAME] == "echo_tool"
70+
assert GENAI_CONVERSATION_ID not in attrs
71+
72+
73+
def test_semconv_attributes_have_dotted_names() -> None:
74+
"""Sanity check: every exported semconv key uses the official dotted form."""
75+
for key in (
76+
GENAI_CONVERSATION_ID,
77+
GENAI_REQUEST_MODEL,
78+
GENAI_OPERATION_NAME,
79+
GENAI_TOOL_NAME,
80+
):
81+
assert key.startswith("gen_ai.")
82+
83+
84+
def test_json_formatter_emits_trace_and_span_ids() -> None:
85+
formatter = _JSONFormatter()
86+
record = logging.LogRecord(
87+
name="test",
88+
level=logging.INFO,
89+
pathname="test.py",
90+
lineno=1,
91+
msg="hello %s",
92+
args=("world",),
93+
exc_info=None,
94+
)
95+
record.otelTraceID = "abc123"
96+
record.otelSpanID = "def456"
97+
98+
payload = json.loads(formatter.format(record))
99+
assert payload["message"] == "hello world"
100+
assert payload["trace_id"] == "abc123"
101+
assert payload["span_id"] == "def456"
102+
103+
104+
def test_setup_logging_attaches_json_handler(
105+
monkeypatch: pytest.MonkeyPatch,
106+
) -> None:
107+
monkeypatch.setenv("LOG_LEVEL", "DEBUG")
108+
setup_logging()
109+
110+
root = logging.getLogger()
111+
assert root.level == logging.DEBUG
112+
assert any(isinstance(h.formatter, _JSONFormatter) for h in root.handlers)
113+
114+
# Round-trip: emit a log record and assert the JSON is single-line valid.
115+
handler = next(h for h in root.handlers if isinstance(h.formatter, _JSONFormatter))
116+
assert isinstance(handler, logging.StreamHandler)
117+
buf = StringIO()
118+
handler.setStream(buf)
119+
root.info("ping")
120+
line = buf.getvalue().strip()
121+
assert "\n" not in line
122+
parsed = json.loads(line)
123+
assert parsed["message"] == "ping"
124+
assert parsed["level"] == "INFO"

0 commit comments

Comments
 (0)