From 42e9694ac6b68e797e2f0954202d4f3e9d0c6314 Mon Sep 17 00:00:00 2001 From: shrutu0929 Date: Tue, 7 Apr 2026 18:31:37 +0530 Subject: [PATCH 1/3] perf: Cache or narrow TestSuiteGate test discovery (avoid full rglob per verify) --- refactron/verification/checks/test_gate.py | 28 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/refactron/verification/checks/test_gate.py b/refactron/verification/checks/test_gate.py index 3f8327f..3bf824b 100644 --- a/refactron/verification/checks/test_gate.py +++ b/refactron/verification/checks/test_gate.py @@ -20,6 +20,8 @@ class TestSuiteGate(BaseCheck): def __init__(self, project_root: Optional[Path] = None): self.project_root = project_root + self._test_file_cache: Optional[Dict[str, List[Path]]] = None + self._all_test_files: Optional[List[Path]] = None def verify(self, original: str, transformed: str, file_path: Path) -> CheckResult: start = time.monotonic() @@ -110,11 +112,27 @@ def _find_relevant_tests(self, file_path: Path) -> List[Path]: module_name = file_path.stem search_root = self.project_root or file_path.parent + if self._test_file_cache is None: + self._test_file_cache = {} + self._all_test_files = [] + + test_dirs = [d for d in [search_root / "tests", search_root / "test"] if d.is_dir()] + search_dirs = test_dirs if test_dirs else [search_root] + excluded_dirs = {".git", ".rag", "__pycache__", "venv", ".venv", "env", "node_modules"} + + for root_dir in search_dirs: + for py_file in root_dir.rglob("*.py"): + if any(excluded in py_file.parts for excluded in excluded_dirs): + continue + name = py_file.name + if name.startswith("test_") or name.endswith("_test.py"): + self._all_test_files.append(py_file) + + if module_name in self._test_file_cache: + return self._test_file_cache[module_name] + test_files: List[Path] = [] - for py_file in search_root.rglob("*.py"): - name = py_file.name - if not (name.startswith("test_") or name.endswith("_test.py")): - continue + for py_file in self._all_test_files: # type: ignore if py_file == file_path: continue try: @@ -123,6 +141,8 @@ def _find_relevant_tests(self, file_path: Path) -> List[Path]: test_files.append(py_file) except Exception: continue + + self._test_file_cache[module_name] = test_files return test_files @staticmethod From 16f4bb9e89a190bc3e3069a7bc66308f504a2a66 Mon Sep 17 00:00:00 2001 From: shrutu0929 Date: Fri, 15 May 2026 20:46:58 +0530 Subject: [PATCH 2/3] fix: TestSuiteGate uses project root cwd and host Python for pytest TestSuiteGate.verify ran pytest via a hard-coded python3 interpreter with cwd set to file_path.parent. For nested files this misses the repo's pyproject.toml / pytest.ini / conftest.py and may use a different interpreter/venv than the host process, causing false positives or undiscovered tests and CI-vs-local mismatch. - Run pytest via sys.executable instead of python3. - Set cwd to the resolved project root (project_root or file dir). - Prepend the project root to PYTHONPATH for layouts that rely on it. - Add tests asserting the interpreter, cwd, and PYTHONPATH. Co-Authored-By: Claude Opus 4.7 (1M context) --- refactron/verification/checks/test_gate.py | 29 +++++++--- tests/test_test_gate.py | 61 ++++++++++++++++++++++ 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/refactron/verification/checks/test_gate.py b/refactron/verification/checks/test_gate.py index 3bf824b..6186b05 100644 --- a/refactron/verification/checks/test_gate.py +++ b/refactron/verification/checks/test_gate.py @@ -4,6 +4,7 @@ import os import shutil import subprocess +import sys import time from pathlib import Path from typing import Any, Dict, List, Optional @@ -56,16 +57,30 @@ def verify(self, original: str, transformed: str, file_path: Path) -> CheckResul # Delete .pyc cache self._clear_pycache(file_path) - # Run pytest - cmd = ["python3", "-m", "pytest", "-x", "-q"] + # Run pytest from the project root with the host interpreter so + # the repo's pyproject.toml / pytest.ini / conftest.py are picked + # up and the same venv as the host process is used. + run_cwd = (self.project_root or file_path.parent).resolve() + + # Make the project root importable for edge cases where the layout + # relies on PYTHONPATH rather than an installed package. + env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"} + existing_pythonpath = env.get("PYTHONPATH", "") + env["PYTHONPATH"] = ( + os.pathsep.join([str(run_cwd), existing_pythonpath]) + if existing_pythonpath + else str(run_cwd) + ) + + cmd = [sys.executable, "-m", "pytest", "-x", "-q"] cmd += [str(f) for f in test_files] result = subprocess.run( cmd, timeout=45, capture_output=True, text=True, - env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}, - cwd=str(file_path.parent), + env=env, + cwd=str(run_cwd), ) elapsed = int((time.monotonic() - start) * 1000) @@ -115,11 +130,11 @@ def _find_relevant_tests(self, file_path: Path) -> List[Path]: if self._test_file_cache is None: self._test_file_cache = {} self._all_test_files = [] - + test_dirs = [d for d in [search_root / "tests", search_root / "test"] if d.is_dir()] search_dirs = test_dirs if test_dirs else [search_root] excluded_dirs = {".git", ".rag", "__pycache__", "venv", ".venv", "env", "node_modules"} - + for root_dir in search_dirs: for py_file in root_dir.rglob("*.py"): if any(excluded in py_file.parts for excluded in excluded_dirs): @@ -141,7 +156,7 @@ def _find_relevant_tests(self, file_path: Path) -> List[Path]: test_files.append(py_file) except Exception: continue - + self._test_file_cache[module_name] = test_files return test_files diff --git a/tests/test_test_gate.py b/tests/test_test_gate.py index defd8d3..fa0f09a 100644 --- a/tests/test_test_gate.py +++ b/tests/test_test_gate.py @@ -1,9 +1,12 @@ """Unit tests for TestSuiteGate (Check 3).""" +import os +import sys from pathlib import Path import pytest +from refactron.verification.checks import test_gate as test_gate_module from refactron.verification.checks.test_gate import TestSuiteGate FIXTURES_DIR = Path(__file__).parent / "fixtures" @@ -71,3 +74,61 @@ def test_confidence_is_0_9(self, gate): original = file_path.read_text(encoding="utf-8") cr = gate.verify(original, original, file_path) assert cr.confidence == 0.9 + + +class TestPytestInvocation: + """The pytest subprocess must use the host interpreter and project root.""" + + def _capture_run(self, monkeypatch): + """Patch subprocess.run to capture its arguments without running pytest.""" + captured = {} + + def fake_run(cmd, **kwargs): + captured["cmd"] = cmd + captured["cwd"] = kwargs.get("cwd") + captured["env"] = kwargs.get("env") + + class _Completed: + returncode = 0 + stdout = "" + stderr = "" + + return _Completed() + + monkeypatch.setattr(test_gate_module.subprocess, "run", fake_run) + return captured + + def test_uses_host_interpreter(self, monkeypatch): + """pytest must run via sys.executable, not a hard-coded python3.""" + captured = self._capture_run(monkeypatch) + gate = TestSuiteGate(project_root=FIXTURES_DIR) + file_path = FIXTURES_DIR / "fixture_test_break.py" + original = file_path.read_text(encoding="utf-8") + + gate.verify(original, original, file_path) + + assert captured["cmd"][0] == sys.executable + assert captured["cmd"][1:4] == ["-m", "pytest", "-x"] + + def test_cwd_is_resolved_project_root(self, monkeypatch): + """pytest must run from the resolved project root, not file_path.parent.""" + captured = self._capture_run(monkeypatch) + gate = TestSuiteGate(project_root=FIXTURES_DIR) + file_path = FIXTURES_DIR / "fixture_test_break.py" + original = file_path.read_text(encoding="utf-8") + + gate.verify(original, original, file_path) + + assert captured["cwd"] == str(FIXTURES_DIR.resolve()) + + def test_project_root_added_to_pythonpath(self, monkeypatch): + """The project root must be present on PYTHONPATH for the subprocess.""" + captured = self._capture_run(monkeypatch) + gate = TestSuiteGate(project_root=FIXTURES_DIR) + file_path = FIXTURES_DIR / "fixture_test_break.py" + original = file_path.read_text(encoding="utf-8") + + gate.verify(original, original, file_path) + + pythonpath = captured["env"]["PYTHONPATH"].split(os.pathsep) + assert str(FIXTURES_DIR.resolve()) in pythonpath From b93c4ecea698f32279c675c92235313f53d029ed Mon Sep 17 00:00:00 2001 From: shrutu0929 Date: Sat, 16 May 2026 12:11:48 +0530 Subject: [PATCH 3/3] feat: machine-readable verification output and optional run-all checks Two related improvements to the verification pipeline. Machine-readable JSON output: CI gates, bots, and parent tooling previously had to scrape Rich-formatted terminal text. This adds to_json_dict() helpers on the VerificationResult and CheckResult data contracts (backed by a JSON_SCHEMA_VERSION constant so the schema can evolve safely), a format_verification_result_json() formatter, and a new `refactron verify` CLI command with a --json flag that emits a stable, versioned JSON object and returns exit code 0 when safe to apply or 1 when blocked. CLI subcommand registration in main.py is also hardened to catch any exception rather than only ImportError, so a broken optional dependency degrades one subcommand gracefully instead of taking down the whole CLI. Optional run-all verification checks: VerificationEngine.verify previously stopped after the first failing check, hiding stacked problems. A short_circuit flag (default True, preserving existing behaviour) is added; when False the full pipeline runs so every failure category surfaces in one run, with checks_failed aggregating all failures while blocking_reason stays the first failure for stable prioritization. A --all-checks CLI flag wires this through. Co-Authored-By: Claude Opus 4.7 (1M context) --- refactron/cli/main.py | 27 ++++++---- refactron/cli/verify.py | 88 +++++++++++++++++++++++++++++++ refactron/verification/engine.py | 37 +++++++++---- refactron/verification/report.py | 14 ++++- refactron/verification/result.py | 40 ++++++++++++++ tests/test_verification_engine.py | 22 ++++++++ tests/test_verification_result.py | 81 +++++++++++++++++++++++++++- 7 files changed, 289 insertions(+), 20 deletions(-) create mode 100644 refactron/cli/verify.py diff --git a/refactron/cli/main.py b/refactron/cli/main.py index 04426b8..6540610 100644 --- a/refactron/cli/main.py +++ b/refactron/cli/main.py @@ -89,8 +89,10 @@ def main(ctx: click.Context) -> None: # Register subcommands -# We import them here to ensure they are registered with the main group -# Using a try-except block to allow partial loading during refactoring +# We import them here to ensure they are registered with the main group. +# Each block is isolated so a single broken subcommand — or a broken optional +# dependency it pulls in (e.g. a failed native-library load raising OSError, +# not ImportError) — degrades gracefully instead of taking down the whole CLI. try: from refactron.cli.auth import auth, login, logout, telemetry @@ -98,7 +100,7 @@ def main(ctx: click.Context) -> None: main.add_command(logout) main.add_command(auth) main.add_command(telemetry) -except ImportError: +except Exception: pass try: @@ -109,7 +111,7 @@ def main(ctx: click.Context) -> None: main.add_command(metrics) main.add_command(serve_metrics) main.add_command(suggest) -except ImportError: +except Exception: pass try: @@ -119,28 +121,35 @@ def main(ctx: click.Context) -> None: main.add_command(autofix) main.add_command(rollback) main.add_command(document) -except ImportError: +except Exception: + pass + +try: + from refactron.cli.verify import verify + + main.add_command(verify) +except Exception: pass try: from refactron.cli.patterns import patterns main.add_command(patterns) -except ImportError: +except Exception: pass try: from refactron.cli.repo import repo main.add_command(repo) -except ImportError: +except Exception: pass try: from refactron.cli.rag import rag main.add_command(rag) -except ImportError: +except Exception: pass try: @@ -149,5 +158,5 @@ def main(ctx: click.Context) -> None: main.add_command(generate_cicd) main.add_command(feedback) main.add_command(init) -except ImportError: +except Exception: pass diff --git a/refactron/cli/verify.py b/refactron/cli/verify.py new file mode 100644 index 0000000..bc6e15a --- /dev/null +++ b/refactron/cli/verify.py @@ -0,0 +1,88 @@ +""" +Refactron CLI - Verification Module. +Command to run the Verification Engine on a code change and emit either a +human-readable report or a stable, machine-readable JSON object for CI gates. +""" + +from pathlib import Path +from typing import Optional + +import click + +from refactron.cli.ui import _auth_banner, console +from refactron.verification.engine import VerificationEngine +from refactron.verification.report import ( + format_verification_result, + format_verification_result_json, +) + + +@click.command() +@click.argument("target", type=click.Path(exists=True, dir_okay=False)) +@click.option( + "--against", + "-a", + "candidate", + type=click.Path(exists=True, dir_okay=False), + default=None, + help=( + "Path to the proposed/modified version of TARGET. " + "If omitted, TARGET is verified against itself." + ), +) +@click.option( + "--project-root", + type=click.Path(exists=True, file_okay=False), + default=None, + help="Project root used by the test-suite gate. Defaults to the current directory.", +) +@click.option( + "--all-checks", + is_flag=True, + default=False, + help=( + "Run every check even after one fails (no short-circuit), so all " + "failure categories surface in a single run." + ), +) +@click.option( + "--json", + "as_json", + is_flag=True, + default=False, + help="Emit a stable, machine-readable JSON report instead of formatted text.", +) +def verify( + target: str, + candidate: Optional[str], + project_root: Optional[str], + all_checks: bool, + as_json: bool, +) -> None: + """ + Verify that a code change is safe to apply. + + TARGET is the file as it currently lives in the project. With --against, + the proposed new content is checked against it (syntax, import integrity, + test suite). Exit code is 0 when safe to apply, 1 when blocked — and with + --json a versioned JSON object is printed for CI dashboards and bots. + """ + target_path = Path(target) + original = target_path.read_text(encoding="utf-8") + transformed = Path(candidate).read_text(encoding="utf-8") if candidate else original + + root = Path(project_root) if project_root else Path.cwd() + engine = VerificationEngine(project_root=root) + result = engine.verify( + original, transformed, target_path, short_circuit=not all_checks + ) + + if as_json: + # JSON mode prints only the JSON object so consumers can parse stdout. + click.echo(format_verification_result_json(result)) + else: + console.print() + _auth_banner("Verification") + format_verification_result(result, console) + + raise SystemExit(0 if result.safe_to_apply else 1) diff --git a/refactron/verification/engine.py b/refactron/verification/engine.py index 3e3771a..8208188 100644 --- a/refactron/verification/engine.py +++ b/refactron/verification/engine.py @@ -21,7 +21,7 @@ def verify(self, original: str, transformed: str, file_path: Path) -> CheckResul class VerificationEngine: - """Orchestrates verification checks in a short-circuit pipeline.""" + """Orchestrates verification checks, short-circuiting by default.""" def __init__( self, @@ -43,8 +43,25 @@ def __init__( TestSuiteGate(project_root=project_root), ] - def verify(self, original: str, transformed: str, file_path: Path) -> VerificationResult: - """Run all checks in order, short-circuiting on first failure.""" + def verify( + self, + original: str, + transformed: str, + file_path: Path, + short_circuit: bool = True, + ) -> VerificationResult: + """Run the verification checks in order and aggregate the results. + + Args: + original: Source code before the transform. + transformed: Source code after the transform. + file_path: Path of the file within the project. + short_circuit: When True (default), stop after the first failing + check and record the rest in ``skipped_checks`` — fastest, but + hides stacked problems. When False, run every check so the + caller sees all failure categories in a single run; nothing is + skipped. ``blocking_reason`` is always the first failure. + """ start = time.monotonic() check_results: List[CheckResult] = [] checks_run: List[str] = [] @@ -77,12 +94,14 @@ def verify(self, original: str, transformed: str, file_path: Path) -> Verificati checks_failed.append(check.name) if blocking_reason is None: blocking_reason = cr.blocking_reason - # Short-circuit: skip remaining checks - for remaining in self.checks[i + 1 :]: - skipped_checks.append( - (remaining.name, f"Short-circuited after {check.name} failed") - ) - break + if short_circuit: + # Skip remaining checks and stop the pipeline. + for remaining in self.checks[i + 1 :]: + skipped_checks.append( + (remaining.name, f"Short-circuited after {check.name} failed") + ) + break + # short_circuit=False: keep going so every failure surfaces. elapsed_ms = int((time.monotonic() - start) * 1000) confidence = self._compute_confidence(check_results) diff --git a/refactron/verification/report.py b/refactron/verification/report.py index 5ae0807..e245597 100644 --- a/refactron/verification/report.py +++ b/refactron/verification/report.py @@ -1,4 +1,6 @@ -"""Rich CLI output formatting for VerificationResult.""" +"""CLI output formatting for VerificationResult (Rich and JSON).""" + +import json from rich.console import Console @@ -29,3 +31,13 @@ def format_verification_result(result: VerificationResult, console: Console) -> for name, reason in result.skipped_checks: console.print(f" [dim]- {name}: {reason}[/dim]") console.print(f"\n [bold red]Blocked.[/bold red] {result.blocking_reason}") + + +def format_verification_result_json(result: VerificationResult, indent: int = 2) -> str: + """Render a VerificationResult as a stable, machine-readable JSON string. + + Intended for CI gates, bots, and parent tools that need structured data + rather than scraping terminal text. The schema is versioned — see + ``VerificationResult.to_json_dict``. + """ + return json.dumps(result.to_json_dict(), indent=indent, sort_keys=True) diff --git a/refactron/verification/result.py b/refactron/verification/result.py index c37b7ea..fe5aef5 100644 --- a/refactron/verification/result.py +++ b/refactron/verification/result.py @@ -6,6 +6,10 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple +# Bumped whenever the JSON shape produced by ``to_json_dict`` changes in a +# way consumers must adapt to. Additive fields do not require a bump. +JSON_SCHEMA_VERSION = 1 + @dataclass(frozen=True) class CheckResult: @@ -18,6 +22,17 @@ class CheckResult: duration_ms: int details: Dict[str, Any] + def to_json_dict(self) -> Dict[str, Any]: + """Return a JSON-serializable dict of this check's result.""" + return { + "check_name": self.check_name, + "passed": self.passed, + "blocking_reason": self.blocking_reason, + "confidence": self.confidence, + "duration_ms": self.duration_ms, + "details": self.details, + } + @dataclass(frozen=True) class VerificationResult: @@ -39,3 +54,28 @@ class VerificationResult: confidence_score: float verification_ms: int check_results: List[CheckResult] + + def to_json_dict(self) -> Dict[str, Any]: + """Return a stable, JSON-serializable dict of the full result. + + The shape is versioned via ``schema_version`` so machine consumers + (CI gates, bots, parent tools) can evolve safely. Adding new keys is + backwards-compatible; removing or renaming keys bumps the version. + """ + return { + "schema_version": JSON_SCHEMA_VERSION, + "status": "safe" if self.safe_to_apply else "blocked", + "safe_to_apply": self.safe_to_apply, + "passed": self.passed, + "blocking_reason": self.blocking_reason, + "confidence_score": self.confidence_score, + "verification_ms": self.verification_ms, + "checks_run": list(self.checks_run), + "checks_passed": list(self.checks_passed), + "checks_failed": list(self.checks_failed), + "skipped_checks": [ + {"check_name": name, "reason": reason} + for name, reason in self.skipped_checks + ], + "checks": [cr.to_json_dict() for cr in self.check_results], + } diff --git a/tests/test_verification_engine.py b/tests/test_verification_engine.py index 4caba8f..643e2c4 100644 --- a/tests/test_verification_engine.py +++ b/tests/test_verification_engine.py @@ -71,6 +71,28 @@ def test_first_fail_short_circuits(self): assert tracker.called is False # short-circuited assert ("tracker", "Short-circuited after always_fail failed") in result.skipped_checks + def test_all_checks_runs_every_check_after_failure(self): + tracker = _TrackingCheck() + engine = VerificationEngine(checks=[_FailingCheck(), tracker]) + result = engine.verify( + "a = 1", "a = 1", Path("/tmp/test.py"), short_circuit=False + ) + assert result.safe_to_apply is False + assert tracker.called is True # ran despite the earlier failure + assert result.checks_run == ["always_fail", "tracker"] + assert result.checks_failed == ["always_fail"] + assert result.checks_passed == ["tracker"] + assert result.skipped_checks == [] # nothing skipped when not short-circuiting + + def test_all_checks_aggregates_multiple_failures(self): + engine = VerificationEngine(checks=[_FailingCheck(), _PassingCheck(), _FailingCheck()]) + result = engine.verify( + "a = 1", "a = 1", Path("/tmp/test.py"), short_circuit=False + ) + assert result.checks_failed == ["always_fail", "always_fail"] + assert result.checks_passed == ["always_pass"] + assert result.blocking_reason == "Intentional failure" # first failure + def test_confidence_is_geometric_mean(self): engine = VerificationEngine(checks=[_PassingCheck(), _TrackingCheck()]) result = engine.verify("a = 1", "a = 1", Path("/tmp/test.py")) diff --git a/tests/test_verification_result.py b/tests/test_verification_result.py index 5c7d44b..68cad62 100644 --- a/tests/test_verification_result.py +++ b/tests/test_verification_result.py @@ -1,8 +1,15 @@ """Contract tests for VerificationResult and CheckResult.""" +import json + import pytest -from refactron.verification.result import CheckResult, VerificationResult +from refactron.verification.report import format_verification_result_json +from refactron.verification.result import ( + JSON_SCHEMA_VERSION, + CheckResult, + VerificationResult, +) class TestCheckResult: @@ -95,3 +102,75 @@ def test_confidence_zero_when_no_checks_passed(self): check_results=[], ) assert vr.confidence_score == 0.0 + + +def _sample_result(safe: bool) -> VerificationResult: + cr = CheckResult( + check_name="syntax", + passed=safe, + blocking_reason="" if safe else "SyntaxError on line 5", + confidence=1.0 if safe else 0.0, + duration_ms=12, + details={"line": 5}, + ) + return VerificationResult( + safe_to_apply=safe, + passed=True, + checks_run=["syntax"], + checks_passed=["syntax"] if safe else [], + checks_failed=[] if safe else ["syntax"], + skipped_checks=[] if safe else [("test_suite", "Short-circuited after syntax failed")], + blocking_reason=None if safe else "SyntaxError on line 5", + confidence_score=1.0 if safe else 0.0, + verification_ms=42, + check_results=[cr], + ) + + +class TestCheckResultJson: + def test_to_json_dict_has_all_fields(self): + cr = CheckResult( + check_name="syntax", + passed=False, + blocking_reason="boom", + confidence=0.0, + duration_ms=7, + details={"k": "v"}, + ) + d = cr.to_json_dict() + assert d == { + "check_name": "syntax", + "passed": False, + "blocking_reason": "boom", + "confidence": 0.0, + "duration_ms": 7, + "details": {"k": "v"}, + } + + +class TestVerificationResultJson: + def test_to_json_dict_safe(self): + d = _sample_result(safe=True).to_json_dict() + assert d["schema_version"] == JSON_SCHEMA_VERSION + assert d["status"] == "safe" + assert d["safe_to_apply"] is True + assert d["blocking_reason"] is None + assert d["checks_run"] == ["syntax"] + assert d["skipped_checks"] == [] + assert d["checks"][0]["check_name"] == "syntax" + + def test_to_json_dict_blocked(self): + d = _sample_result(safe=False).to_json_dict() + assert d["status"] == "blocked" + assert d["safe_to_apply"] is False + assert d["blocking_reason"] == "SyntaxError on line 5" + assert d["checks_failed"] == ["syntax"] + assert d["skipped_checks"] == [ + {"check_name": "test_suite", "reason": "Short-circuited after syntax failed"} + ] + + def test_format_json_is_parseable(self): + text = format_verification_result_json(_sample_result(safe=True)) + parsed = json.loads(text) + assert parsed["schema_version"] == JSON_SCHEMA_VERSION + assert parsed["status"] == "safe"