Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,35 @@ jobs:
python -m pip install --upgrade pip
pip install -r backend/requirements.txt -r backend/requirements-dev.txt
- name: Run backend tests
run: pytest testing/backend -q
run: pytest testing/backend -q -m "not benchmark"

benchmark:
runs-on: ubuntu-latest
needs: [backend-lint]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install backend system dependencies
run: sudo apt-get update && sudo apt-get install -y libcairo2-dev pkg-config
- name: Install backend dependencies
run: |
python -m pip install --upgrade pip
pip install -r backend/requirements.txt -r backend/requirements-dev.txt
- name: Run benchmarks
id: run_benchmarks
run: python3 scripts/run_benchmarks.py
continue-on-error: true
- name: Upload benchmark results artifact
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: benchmark_results.json
- name: Add warning annotation on failure
if: steps.run_benchmarks.outcome == 'failure'
run: |
echo "::warning::Performance benchmark thresholds exceeded or benchmarks failed to run. Check the job logs for details."

frontend-checks:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ venv_tests/
*.swo
*~
.DS_Store

# Testing
.pytest_cache/
.coverage
htmlcov/
*.cover
.hypothesis/
benchmark_results.json

# Database
*.db
Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["."]
include = ["backend*"]

[tool.pytest.ini_options]
markers = [
"benchmark: performance benchmark tests (deselect with '-m not benchmark')",
]
asyncio_mode = "strict"
python_files = ["test_*.py", "bench_*.py"]
115 changes: 115 additions & 0 deletions scripts/run_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env python3
"""
Benchmark Runner Script for SecuScan.
Runs the performance benchmarks, compares results against thresholds,
and exits non-zero if any regressions are detected.
"""

import json
import os
import subprocess
import sys
from pathlib import Path

# ANSI color codes
GREEN = "\033[92m"
RED = "\033[91m"
BOLD = "\033[1m"
RESET = "\033[0m"


def main():
root_dir = Path(__file__).resolve().parents[1]
thresholds_path = (
root_dir / "testing" / "backend" / "benchmarks" / "thresholds.json"
)
results_path = root_dir / "benchmark_results.json"

# 1. Load thresholds
if not thresholds_path.exists():
print(f"{RED}Error: Thresholds file not found at {thresholds_path}{RESET}")
sys.exit(1)

with open(thresholds_path) as f:
thresholds = json.load(f)

# Remove stale results if they exist from a previous run
if results_path.exists():
try:
results_path.unlink()
except OSError:
pass

# 2. Run pytest benchmarks
print(f"{BOLD}Running SecuScan Performance Benchmarks...{RESET}\n")
cmd = [
sys.executable,
"-m",
"pytest",
str(root_dir / "testing" / "backend" / "benchmarks"),
"-m",
"benchmark",
"-v",
"-s",
]

# Run the tests. We capture output/errors normally.
result = subprocess.run(cmd, cwd=str(root_dir))

# 3. Read results
if not results_path.exists():
print(f"\n{RED}Error: Benchmark run did not produce {results_path}{RESET}")
sys.exit(1)

with open(results_path) as f:
results = json.load(f)

# 4. Compare results against thresholds
print(f"\n{BOLD}=== Performance Benchmark Report ==={RESET}\n")
print(
f"{'Benchmark Metric':<45} | {'Measured':<12} | {'Threshold':<12} | {'Status':<6}"
)
print("-" * 82)

has_regression = False
for metric, threshold in thresholds.items():
if metric not in results:
print(f"{metric:<45} | {'N/A':<12} | {threshold:<12} | {RED}MISSING{RESET}")
has_regression = True
continue

value = results[metric]

# Check if throughput metric (higher is better) or latency metric (lower is better)
if "throughput" in metric:
passed = value >= threshold
status_str = f"{GREEN}PASS{RESET}" if passed else f"{RED}FAIL{RESET}"
unit = "calls/s"
else:
passed = value <= threshold
status_str = f"{GREEN}PASS{RESET}" if passed else f"{RED}FAIL{RESET}"
unit = "ms"

val_fmt = f"{value:.2f} {unit}"
thresh_fmt = f"{threshold:.2f} {unit}"

# If we failed the threshold, mark regression
if not passed:
has_regression = True

print(f"{metric:<45} | {val_fmt:<12} | {thresh_fmt:<12} | {status_str:<6}")

print("\n" + "=" * 82 + "\n")

if has_regression:
print(
f"{RED}{BOLD}Performance regression detected! One or more metrics exceeded thresholds.{RESET}"
)
sys.exit(1)
else:
print(f"{GREEN}{BOLD}All performance benchmarks passed!{RESET}")
sys.exit(0)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions testing/backend/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Benchmark suite package
111 changes: 111 additions & 0 deletions testing/backend/benchmarks/bench_concurrent_task_start.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import asyncio
import statistics
import time
import pytest
from testing.backend.benchmarks.conftest import load_threshold


@pytest.mark.benchmark
@pytest.mark.asyncio
async def test_10_concurrent_task_creates(bench_env, record_benchmark):
executor = bench_env["executor"]
plugin_id = "icmp_ping"
inputs = {"target": "127.0.0.1"}

latencies = []

async def create_one():
start = time.perf_counter()
tid = await executor.create_task(plugin_id, inputs)
latencies.append((time.perf_counter() - start) * 1000.0)
return tid

start_total = time.perf_counter()
tasks = [create_one() for _ in range(10)]
await asyncio.gather(*tasks)
total_time_ms = (time.perf_counter() - start_total) * 1000.0

mean_lat = statistics.mean(latencies)
p50_lat = statistics.median(latencies)
latencies.sort()
p95_lat = (
latencies[int(len(latencies) * 0.95)] if len(latencies) >= 2 else latencies[-1]
)

# Record metric
record_benchmark("concurrent_task_creates_10_total_ms", total_time_ms)

threshold_total = load_threshold("concurrent_task_creates_10_total_ms")

print(
f"\n[bench_10_concurrent_task_creates] Total time: {total_time_ms:.2f}ms (threshold: {threshold_total}ms)"
)
print(f"Mean: {mean_lat:.2f}ms, P50: {p50_lat:.2f}ms, P95: {p95_lat:.2f}ms")

assert total_time_ms < threshold_total, (
f"10 concurrent task creates took {total_time_ms:.2f}ms, threshold: {threshold_total}ms"
)


@pytest.mark.benchmark
@pytest.mark.asyncio
async def test_20_sequential_task_creates(bench_env, record_benchmark):
executor = bench_env["executor"]
plugin_id = "icmp_ping"
inputs = {"target": "127.0.0.1"}

latencies = []
for _ in range(20):
start = time.perf_counter()
await executor.create_task(plugin_id, inputs)
latencies.append((time.perf_counter() - start) * 1000.0)

mean_lat = statistics.mean(latencies)

# Record metric
record_benchmark("sequential_task_creates_mean_ms", mean_lat)

threshold_mean = load_threshold("sequential_task_creates_mean_ms")

print(
f"\n[bench_20_sequential_task_creates] Mean latency: {mean_lat:.2f}ms (threshold: {threshold_mean}ms)"
)

assert mean_lat < threshold_mean, (
f"Mean sequential task create took {mean_lat:.2f}ms, threshold: {threshold_mean}ms"
)


@pytest.mark.benchmark
@pytest.mark.asyncio
async def test_concurrent_slot_saturation(bench_env, record_benchmark):
from backend.secuscan.ratelimit import concurrent_limiter

# Fills all 3 concurrency slots (via limiter), tries to acquire a 4th slot,
# asserts it is rejected in < 5 ms (no spin-wait regression).
async with concurrent_limiter.lock:
concurrent_limiter.running_tasks.clear()

# Fill slots
assert (await concurrent_limiter.acquire("task-1")) == (True, "")
assert (await concurrent_limiter.acquire("task-2")) == (True, "")
assert (await concurrent_limiter.acquire("task-3")) == (True, "")

# Try acquiring 4th slot, measure time
start = time.perf_counter()
acquired, msg = await concurrent_limiter.acquire("task-4")
elapsed_ms = (time.perf_counter() - start) * 1000.0

# Record metric
record_benchmark("slot_rejection_ms", elapsed_ms)

threshold_rejection = load_threshold("slot_rejection_ms")

print(
f"\n[bench_concurrent_slot_saturation] Slot rejection elapsed: {elapsed_ms:.4f}ms (threshold: {threshold_rejection}ms)"
)

assert not acquired, "Should not be able to acquire 4th slot"
assert elapsed_ms < threshold_rejection, (
f"Slot rejection took {elapsed_ms:.2f}ms, threshold: {threshold_rejection}ms"
)
Loading
Loading