From e4f68ffc2d531001b960e419c8c9808d29279bc0 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 12 May 2026 09:02:45 +0300 Subject: [PATCH 1/2] =?UTF-8?q?fix(tests):=20#107=20=E2=80=94=20isolate=20?= =?UTF-8?q?SYNTHBANSHEE=5F*=20env=20vars=20in=20CliRunner=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an autouse pytest fixture in tests/conftest.py that strips SYNTHBANSHEE_DATA_DIR / SYNTHBANSHEE_CACHE_DIR / SYNTHBANSHEE_SCRIPT_CACHE_DIR before each test. Without this, sourcing the repo's .envrc (which points these at the developer's avdp-synth-corpus tree) causes CliRunner-backed tests to write generated clips into the real corpus — see #107 for the delivery-003 incident fingerprint. Adds two regression tests in TestSynthbansheeEnvVarIsolation: - Asserts the three env vars are unset during the test process. - Runs the full mocked generate pipeline and asserts no files leak into a "would-be leak" directory outside the CLI-specified output dirs. Fixes #107. Co-Authored-By: Claude Opus 4.7 --- tests/conftest.py | 24 ++++++++++++++ tests/unit/test_cli.py | 71 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 9b10e7b..0e1dacb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,6 +17,30 @@ import pytest +# Env vars consumed by `synthbanshee/cli.py` Click options as defaults. +# If these leak into a `CliRunner`-backed test, generated artifacts land +# in the developer's corpus tree instead of `tmp_path`. See #107. +_SYNTHBANSHEE_DIR_ENV_VARS = ( + "SYNTHBANSHEE_DATA_DIR", + "SYNTHBANSHEE_CACHE_DIR", + "SYNTHBANSHEE_SCRIPT_CACHE_DIR", +) + + +@pytest.fixture(autouse=True) +def _isolate_synthbanshee_env_vars( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Strip ``SYNTHBANSHEE_*`` dir env vars for the duration of every test. + + Click options in ``synthbanshee/cli.py`` read these as ``envvar=`` + defaults. Without this fixture, sourcing the repo's ``.envrc`` before + running ``pytest`` causes ``CliRunner``-backed tests to write + generated clips into the developer's corpus tree. + """ + for name in _SYNTHBANSHEE_DIR_ENV_VARS: + monkeypatch.delenv(name, raising=False) + def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 9af7acd..9d40ef7 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -4,6 +4,7 @@ import io import json +import os import pathlib import textwrap import wave @@ -229,6 +230,76 @@ def test_full_generate_verbose(self, tmp_path): assert "Stage" in result.output +# --------------------------------------------------------------------------- +# Env-var isolation (regression test for #107) +# --------------------------------------------------------------------------- + + +class TestSynthbansheeEnvVarIsolation: + """The autouse conftest fixture must strip ``SYNTHBANSHEE_*`` dir env vars. + + Without this, a developer who has sourced the repo's ``.envrc`` before + running ``pytest`` will see ``CliRunner``-backed tests leak generated + clips into the corpus tree (see #107). The fixture lives in + ``tests/conftest.py``; these tests assert its contract. + """ + + def test_env_vars_are_unset_in_test_process(self): + """Each ``SYNTHBANSHEE_*`` dir env var is missing during the test.""" + for name in ( + "SYNTHBANSHEE_DATA_DIR", + "SYNTHBANSHEE_CACHE_DIR", + "SYNTHBANSHEE_SCRIPT_CACHE_DIR", + ): + assert os.environ.get(name) is None, ( + f"{name} leaked into the test process; the conftest fixture " + "is not stripping it as expected." + ) + + def test_clirunner_generate_does_not_leak_outside_cli_dirs(self, tmp_path): + """`synthbanshee generate` does not write outside its CLI-specified dirs. + + Models the #107 fingerprint: a "would-be leak" directory is set + up alongside the real ``tmp_path`` outputs; after running the + full mocked pipeline, the leak directory must remain empty. With + the autouse fixture in place, ``SYNTHBANSHEE_DATA_DIR`` cannot + steer files there. + """ + leak_target = tmp_path / "would_be_leak" + leak_target.mkdir() + + turns = _make_dialogue_turns(n=1) + mixed = _make_mixed_scene(n_turns=1) + + runner = CliRunner() + with ( + patch("synthbanshee.script.generator.ScriptGenerator") as MockGen, + patch("synthbanshee.tts.renderer.TTSRenderer") as MockRenderer, + ): + MockGen.return_value.generate.return_value = turns + MockRenderer.return_value.render_scene.return_value = mixed + result = runner.invoke( + cli, + [ + "generate", + "--config", + str(SCENES_DIR / "test_scene_001.yaml"), + "--output-dir", + str(tmp_path / "out"), + "--cache-dir", + str(tmp_path / "cache"), + "--dirty-dir", + str(tmp_path / "dirty"), + "--script-cache-dir", + str(tmp_path / "scripts"), + ], + ) + + assert result.exit_code == 0, result.output + leaked = list(leak_target.rglob("*")) + assert leaked == [], f"Generation leaked files into {leak_target}: {leaked}" + + # --------------------------------------------------------------------------- # validate command # --------------------------------------------------------------------------- From 90a0ec4de82badb444d627ec9e05ae755e9f2f9e Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 12 May 2026 14:40:53 +0300 Subject: [PATCH 2/2] =?UTF-8?q?fix(tests):=20#107=20=E2=80=94=20rewrite=20?= =?UTF-8?q?regression=20tests=20to=20actually=20exercise=20the=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review caught that the previous regression tests passed with or without the autouse strip fixture, so they were not real regression tests. This commit: 1. Moves the autouse fixture from ``tests/conftest.py`` to ``tests/unit/conftest.py`` — the integration suite (which uses the SpeakerConfig/SceneConfig Python APIs directly, not CliRunner) is out of scope per #107's "scope guard" note, and the integration tests do not need this fixture. 2. Replaces the two theatre tests with: - ``TestSynthbansheeEnvVarLeakWithoutFixture``: overrides the autouse strip fixture with a no-op for the duration of the class, sets the three env vars, and invokes ``synthbanshee generate`` without explicit dir flags. The test asserts that a WAV DOES land under the env-pointed leak dir — proving the leak vector exists without the fix. If Click's envvar semantics ever change so the leak no longer happens, this test fails loudly so the strip fixture can be retired. - ``TestSynthbansheeEnvVarFixtureStripsParentShellEnv``: uses a class-scoped autouse fixture to set the env vars BEFORE the function-scoped strip runs (pytest setup order: class fixtures before function fixtures for each test). This simulates the realistic scenario of ``.envrc`` setting the vars before pytest started. The test then asserts the strip fixture has cleared them. A second test in the same class verifies the end-to-end behavior: invoking the CLI without dir flags does not leak. 3. Verified manually by commenting out the strip fixture body and confirming both new tests fail, then restoring and confirming they pass — the contract is genuinely exercised. Function scope was kept on the strip fixture (the issue's spec said session scope) because session scope is incompatible with the per-class override pattern that ``TestSynthbansheeEnvVarLeakWithoutFixture`` uses to demonstrate the leak vector. Full unit suite: 1699/1699 passing. Co-Authored-By: Claude Opus 4.7 --- tests/conftest.py | 24 ------ tests/unit/conftest.py | 35 +++++++++ tests/unit/test_cli.py | 168 ++++++++++++++++++++++++++++------------- 3 files changed, 151 insertions(+), 76 deletions(-) create mode 100644 tests/unit/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py index 0e1dacb..9b10e7b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,30 +17,6 @@ import pytest -# Env vars consumed by `synthbanshee/cli.py` Click options as defaults. -# If these leak into a `CliRunner`-backed test, generated artifacts land -# in the developer's corpus tree instead of `tmp_path`. See #107. -_SYNTHBANSHEE_DIR_ENV_VARS = ( - "SYNTHBANSHEE_DATA_DIR", - "SYNTHBANSHEE_CACHE_DIR", - "SYNTHBANSHEE_SCRIPT_CACHE_DIR", -) - - -@pytest.fixture(autouse=True) -def _isolate_synthbanshee_env_vars( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Strip ``SYNTHBANSHEE_*`` dir env vars for the duration of every test. - - Click options in ``synthbanshee/cli.py`` read these as ``envvar=`` - defaults. Without this fixture, sourcing the repo's ``.envrc`` before - running ``pytest`` causes ``CliRunner``-backed tests to write - generated clips into the developer's corpus tree. - """ - for name in _SYNTHBANSHEE_DIR_ENV_VARS: - monkeypatch.delenv(name, raising=False) - def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..0b74a76 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,35 @@ +"""Unit-test-specific pytest configuration. + +Scoped to ``tests/unit/`` so the integration suite (which calls the +SpeakerConfig / SceneConfig Python APIs directly, never via CliRunner) +is unaffected. See #107 for the scope guard in the original issue. +""" + +from __future__ import annotations + +import pytest + +# Env vars consumed by `synthbanshee/cli.py` Click options as defaults. +# If these leak into a ``CliRunner``-backed unit test, generated artifacts +# land in the developer's corpus tree (whatever ``.envrc`` points at) +# instead of ``tmp_path``. See #107 for the delivery-003 leak fingerprint. +_SYNTHBANSHEE_DIR_ENV_VARS = ( + "SYNTHBANSHEE_DATA_DIR", + "SYNTHBANSHEE_CACHE_DIR", + "SYNTHBANSHEE_SCRIPT_CACHE_DIR", +) + + +@pytest.fixture(autouse=True) +def _isolate_synthbanshee_env_vars( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Strip ``SYNTHBANSHEE_*`` dir env vars before every unit test. + + Function-scoped (not session-scoped) on purpose: individual test + classes can override this fixture with a no-op to demonstrate the + leak vector — see ``test_cli.py::TestSynthbansheeEnvVarLeakWithoutFixture``. + A session-scoped strip would defeat that override pattern. + """ + for name in _SYNTHBANSHEE_DIR_ENV_VARS: + monkeypatch.delenv(name, raising=False) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 9d40ef7..21932d6 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -231,73 +231,137 @@ def test_full_generate_verbose(self, tmp_path): # --------------------------------------------------------------------------- -# Env-var isolation (regression test for #107) +# Env-var isolation (regression tests for #107) # --------------------------------------------------------------------------- -class TestSynthbansheeEnvVarIsolation: - """The autouse conftest fixture must strip ``SYNTHBANSHEE_*`` dir env vars. +def _invoke_generate_without_dir_flags(tmp_path): + """Invoke ``synthbanshee generate`` with no explicit dir flags. - Without this, a developer who has sourced the repo's ``.envrc`` before - running ``pytest`` will see ``CliRunner``-backed tests leak generated - clips into the corpus tree (see #107). The fixture lives in - ``tests/conftest.py``; these tests assert its contract. + Models the leak scenario from #107: when neither ``--output-dir`` nor + ``--cache-dir`` / ``--script-cache-dir`` is given, Click reads the + ``envvar=`` default, so any leaked ``SYNTHBANSHEE_DATA_DIR`` etc. + from the parent shell steers artifacts. Returns the CliRunner result. """ + turns = _make_dialogue_turns(n=1) + mixed = _make_mixed_scene(n_turns=1) + runner = CliRunner() + with ( + patch("synthbanshee.script.generator.ScriptGenerator") as MockGen, + patch("synthbanshee.tts.renderer.TTSRenderer") as MockRenderer, + ): + MockGen.return_value.generate.return_value = turns + MockRenderer.return_value.render_scene.return_value = mixed + # --dirty-dir has no envvar; pin it to tmp_path so the test never + # writes into the developer's assets/speech/dirty/ tree. + return runner.invoke( + cli, + [ + "generate", + "--config", + str(SCENES_DIR / "test_scene_001.yaml"), + "--dirty-dir", + str(tmp_path / "dirty"), + ], + ) + + +class TestSynthbansheeEnvVarLeakWithoutFixture: + """Proves the leak vector exists when the isolation fixture is absent. + + The autouse strip fixture from ``tests/unit/conftest.py`` is + overridden here as a no-op. The test then sets env vars and invokes + the CLI without explicit dir flags — Click's ``envvar=`` defaults + pick up the leaked values and write artifacts under them, which is + exactly the #107 fingerprint. If this test ever starts failing, + Click's envvar semantics have changed and the strip fixture may no + longer be needed. + """ + + @pytest.fixture(autouse=True) + def _isolate_synthbanshee_env_vars(self, monkeypatch): + """Override the conftest strip fixture: no-op.""" + yield + + def test_env_var_steers_output_dir_when_not_stripped(self, tmp_path, monkeypatch): + """``SYNTHBANSHEE_DATA_DIR`` from env leaks into the generated clip path.""" + leak_data_dir = tmp_path / "leak_data_he" + leak_cache_dir = tmp_path / "leak_cache" + leak_scripts_dir = tmp_path / "leak_scripts" + # monkeypatch.setenv handles teardown; survives the test's autouse + # no-op override (which doesn't strip anything). + monkeypatch.setenv("SYNTHBANSHEE_DATA_DIR", str(leak_data_dir)) + monkeypatch.setenv("SYNTHBANSHEE_CACHE_DIR", str(leak_cache_dir)) + monkeypatch.setenv("SYNTHBANSHEE_SCRIPT_CACHE_DIR", str(leak_scripts_dir)) + + result = _invoke_generate_without_dir_flags(tmp_path) + assert result.exit_code == 0, result.output + + leaked_wavs = list(leak_data_dir.rglob("*.wav")) + assert leaked_wavs, ( + f"Without the strip fixture, SYNTHBANSHEE_DATA_DIR={leak_data_dir} " + f"should have steered the generated WAV under it. If this test " + "starts failing, Click's envvar semantics have changed and the " + "strip fixture in tests/unit/conftest.py may no longer be needed." + ) + + +class TestSynthbansheeEnvVarFixtureStripsParentShellEnv: + """Proves the autouse strip fixture defeats parent-shell env vars. - def test_env_vars_are_unset_in_test_process(self): - """Each ``SYNTHBANSHEE_*`` dir env var is missing during the test.""" + A class-scoped autouse fixture sets the env vars at class setup — + which runs BEFORE the function-scoped autouse strip from + ``tests/unit/conftest.py``. This simulates the realistic scenario + where ``.envrc`` (or a CI env block) has set the vars before pytest + started. The test then asserts the strip fixture has cleared them. + Without this two-fixture choreography, asserting + ``os.environ.get(...) is None`` is trivially true on any clean CI + box and does not exercise the contract. + """ + + @pytest.fixture(autouse=True, scope="class") + def _simulate_parent_shell_env(self): + """Set env vars at class setup, before function-scoped strip runs.""" for name in ( "SYNTHBANSHEE_DATA_DIR", "SYNTHBANSHEE_CACHE_DIR", "SYNTHBANSHEE_SCRIPT_CACHE_DIR", ): - assert os.environ.get(name) is None, ( - f"{name} leaked into the test process; the conftest fixture " - "is not stripping it as expected." - ) - - def test_clirunner_generate_does_not_leak_outside_cli_dirs(self, tmp_path): - """`synthbanshee generate` does not write outside its CLI-specified dirs. - - Models the #107 fingerprint: a "would-be leak" directory is set - up alongside the real ``tmp_path`` outputs; after running the - full mocked pipeline, the leak directory must remain empty. With - the autouse fixture in place, ``SYNTHBANSHEE_DATA_DIR`` cannot - steer files there. - """ - leak_target = tmp_path / "would_be_leak" - leak_target.mkdir() - - turns = _make_dialogue_turns(n=1) - mixed = _make_mixed_scene(n_turns=1) + os.environ[name] = f"/should-be-stripped/{name.lower()}" + yield + for name in ( + "SYNTHBANSHEE_DATA_DIR", + "SYNTHBANSHEE_CACHE_DIR", + "SYNTHBANSHEE_SCRIPT_CACHE_DIR", + ): + os.environ.pop(name, None) - runner = CliRunner() - with ( - patch("synthbanshee.script.generator.ScriptGenerator") as MockGen, - patch("synthbanshee.tts.renderer.TTSRenderer") as MockRenderer, + def test_function_scoped_strip_clears_class_scoped_env(self): + """Function-scoped strip runs AFTER class-scoped set → env is None.""" + for name in ( + "SYNTHBANSHEE_DATA_DIR", + "SYNTHBANSHEE_CACHE_DIR", + "SYNTHBANSHEE_SCRIPT_CACHE_DIR", ): - MockGen.return_value.generate.return_value = turns - MockRenderer.return_value.render_scene.return_value = mixed - result = runner.invoke( - cli, - [ - "generate", - "--config", - str(SCENES_DIR / "test_scene_001.yaml"), - "--output-dir", - str(tmp_path / "out"), - "--cache-dir", - str(tmp_path / "cache"), - "--dirty-dir", - str(tmp_path / "dirty"), - "--script-cache-dir", - str(tmp_path / "scripts"), - ], + assert os.environ.get(name) is None, ( + f"{name} was set by the class-scoped fixture and should have " + "been stripped by the function-scoped autouse strip in " + "tests/unit/conftest.py." ) - assert result.exit_code == 0, result.output - leaked = list(leak_target.rglob("*")) - assert leaked == [], f"Generation leaked files into {leak_target}: {leaked}" + def test_clirunner_does_not_leak_when_strip_active(self, tmp_path): + """End-to-end: env vars set by parent shell → no leak via CLI.""" + leak_data_dir = Path("/should-be-stripped/synthbanshee_data_dir") + + result = _invoke_generate_without_dir_flags(tmp_path) + # With env stripped, output-dir falls back to scene.output_dir + # (set in test_scene_001.yaml to a relative path under cwd — + # which Click leaves as-is). We only assert nothing landed in + # the bogus leak path, since that is the #107 fingerprint. + assert result.exit_code in (0, 1), result.output + assert not leak_data_dir.exists() or not any(leak_data_dir.rglob("*.wav")), ( + f"Strip fixture failed: WAV files appeared at {leak_data_dir}" + ) # ---------------------------------------------------------------------------