Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/benchmarks/layered_admissibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@ Deterministically compare admissibility outcomes across fixture bundles using Co
- no fuzzy matching
- no semantic equivalence

## Visualization

![Layered admissibility degradation curve](../media/layered_admissibility_curve.svg)

This SVG is a deterministic benchmark artifact generated directly from `artifacts/layered_admissibility_results.json` via the hand-written renderer (`src/visualization/svg_curve_renderer.py`). Rendering is pure SVG text generation with fixed canvas geometry, stable ordering, and fixed float precision (three decimals), so output is CI-friendly and reproducible with no stochastic rendering.

## Future

- add more fixture families
- add progressive degradation levels
- add SVG curve visualization later
- extend deterministic benchmark artifacts
- keep visualization static and reproducible
36 changes: 36 additions & 0 deletions docs/media/layered_admissibility_curve.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 21 additions & 0 deletions scripts/render_curve_svg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations

import json
from pathlib import Path
import sys

PROJECT_ROOT = Path(__file__).resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))

from src.visualization.svg_curve_renderer import SVGCurveRenderer

INPUT_PATH = Path("artifacts/layered_admissibility_results.json")
OUTPUT_PATH = Path("docs/media/layered_admissibility_curve.svg")


if __name__ == "__main__":
payload = json.loads(INPUT_PATH.read_text(encoding="utf-8"))
svg = SVGCurveRenderer().render(payload)
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH.write_text(svg, encoding="utf-8")
1 change: 1 addition & 0 deletions src/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Deterministic visualization helpers."""
131 changes: 131 additions & 0 deletions src/visualization/svg_curve_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

from dataclasses import dataclass
from html import escape


@dataclass(frozen=True, slots=True)
class _PointLayout:
fixture_id: str
score: float
x: float
y: float
failure_labels: tuple[str, ...]


class SVGCurveRenderer:
WIDTH = 1000
HEIGHT = 520
MARGIN_LEFT = 90
MARGIN_RIGHT = 40
MARGIN_TOP = 70
MARGIN_BOTTOM = 140

TITLE = "Layered Admissibility Degradation Curve"
X_LABEL = "Fixture progression"
Y_LABEL = "overall_admissibility_score"

X_TICKS: tuple[tuple[str, str], ...] = (
("coding_workflow_pr_review_v1", "positive"),
("coding_workflow_pr_review_mild_v1", "mild"),
("coding_workflow_pr_review_moderate_v1", "moderate"),
("coding_workflow_pr_review_degraded_v1", "severe"),
)

LEGEND_ITEMS: tuple[str, ...] = ("structural", "relational", "operational", "governance")

FAILURE_ANNOTATION_ORDER: tuple[str, ...] = (
"RECOVERY_PATH_INVALID",
"CAUSAL_DEPENDENCY_LOSS",
"POLICY_ORDER_BROKEN",
"INVARIANT_VIOLATION",
)

def _fmt(self, value: float) -> str:
return f"{value:.3f}"

def _layout_points(self, curve_json: dict) -> tuple[_PointLayout, ...]:
points_by_fixture = {point["fixture_id"]: point for point in curve_json["points"]}
plot_width = self.WIDTH - self.MARGIN_LEFT - self.MARGIN_RIGHT
plot_height = self.HEIGHT - self.MARGIN_TOP - self.MARGIN_BOTTOM

layouts: list[_PointLayout] = []
for index, (fixture_id, _) in enumerate(self.X_TICKS):
point = points_by_fixture[fixture_id]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Accessing points_by_fixture[fixture_id] directly will raise a KeyError if the input JSON is missing any of the expected benchmark fixtures defined in X_TICKS. Adding a check or a more descriptive error message would improve the robustness of the renderer, especially when used in CI environments.

Suggested change
point = points_by_fixture[fixture_id]
if fixture_id not in points_by_fixture:
raise KeyError(f"Fixture '{fixture_id}' not found in input data.")
point = points_by_fixture[fixture_id]

score = float(point["overall_admissibility_score"])
x = self.MARGIN_LEFT + (plot_width * index / (len(self.X_TICKS) - 1))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This calculation will result in a ZeroDivisionError if X_TICKS is modified to contain only one element. Adding a guard for the denominator ensures the renderer remains robust against changes to the tick configuration.

Suggested change
x = self.MARGIN_LEFT + (plot_width * index / (len(self.X_TICKS) - 1))
x = self.MARGIN_LEFT + (plot_width * index / (len(self.X_TICKS) - 1)) if len(self.X_TICKS) > 1 else self.MARGIN_LEFT

y = self.MARGIN_TOP + ((1.0 - score) * plot_height)
layouts.append(
_PointLayout(
fixture_id=fixture_id,
score=score,
x=x,
y=y,
failure_labels=tuple(sorted(point["failure_labels"])),
)
)
return tuple(layouts)

def render(self, curve_json: dict) -> str:
layouts = self._layout_points(curve_json)
plot_bottom = self.HEIGHT - self.MARGIN_BOTTOM
plot_right = self.WIDTH - self.MARGIN_RIGHT

polyline_points = " ".join(f"{self._fmt(p.x)},{self._fmt(p.y)}" for p in layouts)
elements: list[str] = [
f'<svg xmlns="http://www.w3.org/2000/svg" width="{self.WIDTH}" height="{self.HEIGHT}" viewBox="0 0 {self.WIDTH} {self.HEIGHT}">',
' <rect x="0" y="0" width="1000" height="520" fill="#ffffff"/>',
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The background rectangle uses hardcoded width and height values (1000 and 520) instead of referencing the class constants WIDTH and HEIGHT. This will cause the background to be incorrectly sized if the canvas dimensions are updated in the future.

Suggested change
' <rect x="0" y="0" width="1000" height="520" fill="#ffffff"/>',
f' <rect x="0" y="0" width="{self.WIDTH}" height="{self.HEIGHT}" fill="#ffffff"/>',

f' <text x="{self.WIDTH/2:.1f}" y="36" text-anchor="middle" font-size="22" font-family="monospace" fill="#111111">{self.TITLE}</text>',
f' <line x1="{self.MARGIN_LEFT}" y1="{plot_bottom}" x2="{plot_right}" y2="{plot_bottom}" stroke="#222222" stroke-width="1"/>',
f' <line x1="{self.MARGIN_LEFT}" y1="{self.MARGIN_TOP}" x2="{self.MARGIN_LEFT}" y2="{plot_bottom}" stroke="#222222" stroke-width="1"/>',
]

for tick_score in (0.0, 0.5, 1.0):
y = self.MARGIN_TOP + ((1.0 - tick_score) * (self.HEIGHT - self.MARGIN_TOP - self.MARGIN_BOTTOM))
elements.append(
f' <line x1="{self.MARGIN_LEFT}" y1="{self._fmt(y)}" x2="{plot_right}" y2="{self._fmt(y)}" stroke="#e0e0e0" stroke-width="1"/>'
)
elements.append(
f' <text x="{self.MARGIN_LEFT-12}" y="{self._fmt(y+4)}" text-anchor="end" font-size="12" font-family="monospace" fill="#333333">{self._fmt(tick_score)}</text>'
)

for point, (_, stage_name) in zip(layouts, self.X_TICKS):
elements.append(
f' <text x="{self._fmt(point.x)}" y="{plot_bottom+22}" text-anchor="middle" font-size="12" font-family="monospace" fill="#222222">{stage_name}</text>'
)

elements.extend(
[
f' <polyline points="{polyline_points}" fill="none" stroke="#0055aa" stroke-width="3"/>',
f' <text x="{self.WIDTH/2:.1f}" y="{self.HEIGHT-20}" text-anchor="middle" font-size="13" font-family="monospace" fill="#111111">{self.X_LABEL}</text>',
f' <text x="20" y="{self.HEIGHT/2:.1f}" transform="rotate(-90 20 {self.HEIGHT/2:.1f})" text-anchor="middle" font-size="13" font-family="monospace" fill="#111111">{self.Y_LABEL}</text>',
]
)

for point in layouts:
elements.append(
f' <circle cx="{self._fmt(point.x)}" cy="{self._fmt(point.y)}" r="5" fill="#0055aa"/>'
)
elements.append(
f' <text x="{self._fmt(point.x)}" y="{self._fmt(point.y-12)}" text-anchor="middle" font-size="11" font-family="monospace" fill="#111111">{escape(point.fixture_id)} | {self._fmt(point.score)}</text>'
)

y_base = plot_bottom + 44
for point in layouts[1:]:
ordered_labels = [label for label in self.FAILURE_ANNOTATION_ORDER if label in point.failure_labels]
if ordered_labels:
elements.append(
f' <text x="{self._fmt(point.x)}" y="{y_base}" text-anchor="middle" font-size="10" font-family="monospace" fill="#aa2200">{", ".join(ordered_labels)}</text>'
)
Comment on lines +115 to +119
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Failure labels that are not explicitly listed in FAILURE_ANNOTATION_ORDER are silently omitted from the visualization. To ensure all degradation information is surfaced, consider appending any unexpected labels to the end of the annotation list.

Suggested change
ordered_labels = [label for label in self.FAILURE_ANNOTATION_ORDER if label in point.failure_labels]
if ordered_labels:
elements.append(
f' <text x="{self._fmt(point.x)}" y="{y_base}" text-anchor="middle" font-size="10" font-family="monospace" fill="#aa2200">{", ".join(ordered_labels)}</text>'
)
# Prioritize labels in FAILURE_ANNOTATION_ORDER, then include any others
labels = [l for l in self.FAILURE_ANNOTATION_ORDER if l in point.failure_labels]
labels.extend(sorted(set(point.failure_labels) - set(self.FAILURE_ANNOTATION_ORDER)))
if labels:
elements.append(
f' <text x="{self._fmt(point.x)}" y="{y_base}" text-anchor="middle" font-size="10" font-family="monospace" fill="#aa2200">{", ".join(labels)}</text>'
)


legend_x = 700
legend_y = 84
elements.append(f' <rect x="{legend_x}" y="{legend_y}" width="250" height="104" fill="#f8f8f8" stroke="#cccccc"/>')
elements.append(f' <text x="{legend_x+12}" y="{legend_y+18}" font-size="12" font-family="monospace" fill="#111111">Legend (component scores)</text>')
for idx, item in enumerate(self.LEGEND_ITEMS):
elements.append(
f' <text x="{legend_x+16}" y="{legend_y+36 + idx*16}" font-size="11" font-family="monospace" fill="#333333">- {item}</text>'
)

elements.append("</svg>")
return "\n".join(elements) + "\n"
66 changes: 66 additions & 0 deletions tests/test_svg_curve_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from __future__ import annotations

import json
import re
from pathlib import Path

from src.visualization.svg_curve_renderer import SVGCurveRenderer

INPUT_PATH = Path("artifacts/layered_admissibility_results.json")
SVG_PATH = Path("docs/media/layered_admissibility_curve.svg")


def _render() -> str:
payload = json.loads(INPUT_PATH.read_text(encoding="utf-8"))
return SVGCurveRenderer().render(payload)


def test_svg_render_is_deterministic() -> None:
assert _render() == _render()


def test_svg_root_exists() -> None:
output = _render()
assert output.startswith('<svg xmlns="http://www.w3.org/2000/svg"')
assert output.strip().endswith("</svg>")


def test_svg_contains_fixture_labels() -> None:
output = _render()
assert "coding_workflow_pr_review_v1" in output
assert "coding_workflow_pr_review_mild_v1" in output
assert "coding_workflow_pr_review_moderate_v1" in output
assert "coding_workflow_pr_review_degraded_v1" in output


def test_svg_contains_expected_failure_annotations() -> None:
output = _render()
for label in [
"RECOVERY_PATH_INVALID",
"CAUSAL_DEPENDENCY_LOSS",
"POLICY_ORDER_BROKEN",
"INVARIANT_VIOLATION",
]:
assert label in output


def test_svg_polyline_coordinates_monotonic_degradation() -> None:
output = _render()
match = re.search(r'<polyline points="([^"]+)"', output)
assert match
points = match.group(1).split(" ")
y_values = [float(point.split(",")[1]) for point in points]
assert y_values == sorted(y_values)


def test_svg_uses_stable_float_formatting() -> None:
output = _render()
assert "960.000,225.000" in output
assert "380.000,95.833" in output
assert "0.917" in output


def test_rendered_svg_matches_committed_artifact() -> None:
generated = _render()
committed = SVG_PATH.read_text(encoding="utf-8")
assert generated == committed
Loading