diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/perf/test_correctness_under_load.py b/tests/perf/test_correctness_under_load.py
new file mode 100644
index 0000000..e8ed73c
--- /dev/null
+++ b/tests/perf/test_correctness_under_load.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from benchmarks.registry import get_driver
+from tigrcorn.compat.perf_runner import PerfProfile
+
+ROOT = Path(__file__).resolve().parents[2]
+
+_ITERATIONS = 10
+_WARMUPS = 1
+
+
+def _make_profile(
+    driver: str,
+    *,
+    units_per_iteration: int = 1,
+    driver_config: dict | None = None,
+) -> PerfProfile:
+    return PerfProfile(
+        profile_id=f'correctness_{driver}',
+        family='correctness',
+        description=f'correctness test for {driver}',
+        driver=driver,
+        deployment_profile=driver,
+        iterations=_ITERATIONS,
+        warmups=_WARMUPS,
+        units_per_iteration=units_per_iteration,
+        driver_config=driver_config or {},
+    )
+
+
+def _run_driver(name: str, **kwargs) -> dict:
+    profile = _make_profile(name, **kwargs)
+    return get_driver(name)(profile, source_root=ROOT)
+
+
+def _assert_correctness_keys(measurement: dict, expected_keys: list[str]) -> None:
+    assert measurement['error_count'] == 0
+    checks = measurement['correctness_checks']
+    for key in expected_keys:
+        assert key in checks, f'missing correctness key: {key}'
+        assert checks[key], f'correctness check failed: {key}'
+
+
+def test_http11_parser_correctness():
+    measurement = _run_driver('http11_baseline')
+    _assert_correctness_keys(measurement, ['parsed_head'])
+
+
+def test_hpack_roundtrip_correctness():
+    measurement = _run_driver('http2_multiplex', units_per_iteration=10, driver_config={'stream_count': 10})
+    _assert_correctness_keys(measurement, ['hpack_roundtrip'])
+
+
+def test_qpack_roundtrip_correctness():
+    measurement = _run_driver('http3_clean_network')
+    _assert_correctness_keys(measurement, ['qpack_roundtrip', 'quic_decode'])
+
+
+def test_websocket_frame_correctness():
+    m1 = _run_driver('ws_http11')
+    _assert_correctness_keys(m1, ['frame_roundtrip'])
+    m2 = _run_driver('ws_http11_permessage_deflate')
+    _assert_correctness_keys(m2, ['deflate_roundtrip'])
+
+
+def test_tls_context_correctness():
+    m1 = _run_driver('tls_handshake')
+    _assert_correctness_keys(m1, ['context_built', 'default_alpn'])
+    m2 = _run_driver('mtls_handshake')
+    _assert_correctness_keys(m2, ['context_built', 'requires_client_cert'])
+
+
+def test_content_coding_correctness():
+    measurement = _run_driver(
+        'content_coding_under_load',
+        driver_config={'policy': 'allowlist', 'codings': ['gzip', 'deflate']},
+    )
+    _assert_correctness_keys(measurement, ['status_ok', 'selection_valid', 'body_nonempty'])
diff --git a/tests/perf/test_driver_latency.py b/tests/perf/test_driver_latency.py
new file mode 100644
index 0000000..0967a4f
--- /dev/null
+++ b/tests/perf/test_driver_latency.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from benchmarks.registry import get_driver
+from tigrcorn.compat.perf_runner import PerfProfile
+
+ROOT = Path(__file__).resolve().parents[2]
+
+_ITERATIONS = 20
+_WARMUPS = 2
+
+
+def _make_profile(
+    driver: str,
+    *,
+    iterations: int = _ITERATIONS,
+    warmups: int = _WARMUPS,
+    units_per_iteration: int = 1,
+    driver_config: dict | None = None,
+) -> PerfProfile:
+    return PerfProfile(
+        profile_id=f'perf_test_{driver}',
+        family='perf',
+        description=f'perf test for {driver}',
+        driver=driver,
+        deployment_profile=driver,
+        iterations=iterations,
+        warmups=warmups,
+        units_per_iteration=units_per_iteration,
+        driver_config=driver_config or {},
+    )
+
+
+def _assert_measurement_healthy(measurement: dict, *, max_p99_ms: float = 10.0) -> None:
+    assert measurement['error_count'] == 0
+    samples = measurement['samples_ms']
+    assert len(samples) > 0
+    assert measurement['total_duration_seconds'] > 0.0
+    sorted_samples = sorted(samples)
+    if len(sorted_samples) >= 2:
+        p99_index = int(0.99 * (len(sorted_samples) - 1))
+        assert sorted_samples[p99_index] < max_p99_ms, (
+            f'p99 latency {sorted_samples[p99_index]:.3f}ms exceeds {max_p99_ms}ms bound'
+        )
+    for key, value in measurement['correctness_checks'].items():
+        assert value, f'correctness check failed: {key}'
+
+
+def test_http11_baseline_latency():
+    profile = _make_profile('http11_baseline')
+    measurement = get_driver('http11_baseline')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+    assert len(measurement['samples_ms']) == _ITERATIONS
+
+
+def test_http11_keepalive_latency():
+    profile = _make_profile('http11_keepalive')
+    measurement = get_driver('http11_keepalive')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+
+
+def test_http11_chunked_latency():
+    profile = _make_profile('http11_chunked_upload_download')
+    measurement = get_driver('http11_chunked_upload_download')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+
+
+def test_http2_hpack_multiplex_latency():
+    profile = _make_profile('http2_multiplex', units_per_iteration=10, driver_config={'stream_count': 10})
+    measurement = get_driver('http2_multiplex')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+    assert measurement['streams'] >= _ITERATIONS * 10
+
+
+def test_http2_tls_context_latency():
+    profile = _make_profile('http2_tls')
+    measurement = get_driver('http2_tls')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=50.0)
+
+
+def test_http3_qpack_clean_latency():
+    profile = _make_profile('http3_clean_network')
+    measurement = get_driver('http3_clean_network')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+
+
+def test_http3_loss_recovery_latency():
+    profile = _make_profile('http3_loss_jitter')
+    measurement = get_driver('http3_loss_jitter')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+
+
+def test_websocket_frame_latency():
+    profile = _make_profile('ws_http11')
+    measurement = get_driver('ws_http11')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+
+
+def test_websocket_deflate_latency():
+    profile = _make_profile('ws_http11_permessage_deflate')
+    measurement = get_driver('ws_http11_permessage_deflate')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=10.0)
+
+
+def test_tls_handshake_latency():
+    profile = _make_profile('tls_handshake')
+    measurement = get_driver('tls_handshake')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=50.0)
+
+
+def test_mtls_handshake_latency():
+    profile = _make_profile('mtls_handshake')
+    measurement = get_driver('mtls_handshake')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=50.0)
+
+
+def test_content_coding_latency():
+    profile = _make_profile(
+        'content_coding_under_load',
+        driver_config={'policy': 'allowlist', 'codings': ['gzip', 'deflate']},
+    )
+    measurement = get_driver('content_coding_under_load')(profile, source_root=ROOT)
+    _assert_measurement_healthy(measurement, max_p99_ms=20.0)
diff --git a/tests/perf/test_matrix_integration.py b/tests/perf/test_matrix_integration.py
new file mode 100644
index 0000000..077259f
--- /dev/null
+++ b/tests/perf/test_matrix_integration.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import json
+import tempfile
+from pathlib import Path
+
+from tigrcorn.compat.perf_runner import run_performance_matrix
+
+ROOT = Path(__file__).resolve().parents[2]
+
+
+def _run_profiles_and_assert(profile_ids: list[str]) -> None:
+    with tempfile.TemporaryDirectory() as tmp:
+        summary = run_performance_matrix(
+            ROOT,
+            artifact_root=Path(tmp) / 'perf',
+            profile_ids=profile_ids,
+            establish_baseline=True,
+        )
+        assert summary.total == len(profile_ids)
+        for result in summary.profiles:
+            assert result.metrics['throughput_ops_per_sec'] > 0
+            assert result.metrics['error_count'] == 0
+            assert result.metrics['error_rate'] == 0.0
+            assert result.metrics['sample_count'] > 0
+            if result.correctness.get('required'):
+                assert result.correctness['passed'], f'{result.profile_id}: correctness failed'
+            profile_dir = Path(result.artifact_dir)
+            for filename in ('result.json', 'summary.json', 'env.json', 'correctness.json'):
+                assert (profile_dir / filename).exists(), f'{result.profile_id}: missing {filename}'
+            result_json = json.loads((profile_dir / 'result.json').read_text(encoding='utf-8'))
+            assert result_json['profile_id'] == result.profile_id
+            assert 'p99_9_ms' in result_json['metrics']
+            assert 'throughput_ops_per_sec' in result_json['metrics']
+
+
+def test_http_profiles_pass_thresholds():
+    _run_profiles_and_assert(['http11_baseline', 'http11_keepalive'])
+
+
+def test_websocket_profiles_pass_thresholds():
+    _run_profiles_and_assert(['ws_http11'])
+
+
+def test_tls_profiles_pass_thresholds():
+    _run_profiles_and_assert(['tls_handshake'])