From a3a8b0767d2d0269ef72a8712cd331480d98b739 Mon Sep 17 00:00:00 2001 From: Joe Doss Date: Fri, 17 Apr 2026 15:45:43 -0500 Subject: [PATCH] Restart psi-secrets after refresh so serve reloads fresh cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every time psi-{provider}-refresh.timer fires, setup re-registers secrets via delete+create through the Podman API, which assigns fresh hex IDs. Setup writes those new IDs to the on-disk cache file and the prune step from PR #32 drops the old entries. But serve holds the OLD cache in memory from its last startup and never picks up the new file state — so every lookup after the first refresh goes straight to the provider, and the cache does no work until an operator manually restarts psi-secrets. Observed on the test server: 1554 secret lookups over 30 minutes, zero cache hits. All source=provider. The refresh timer had fired 7 minutes earlier and silently broke the cache. Add a second ExecStart to the refresh wrapper that runs systemctl try-restart psi-secrets.service after setup completes. try-restart is a no-op if serve is not currently active, so this is safe on hosts that have intentionally stopped psi-secrets. There is a brief (~30s on HSM) lookup-fails-to-cache window during the serve restart, but this happens at most once per cache.refresh_interval (default 1h) instead of never. --- psi/unitgen.py | 13 ++++++++++--- tests/test_unitgen.py | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/psi/unitgen.py b/psi/unitgen.py index 5ebc496..56f0958 100644 --- a/psi/unitgen.py +++ b/psi/unitgen.py @@ -160,9 +160,15 @@ def generate_provider_refresh_service(provider: str) -> str: This wrapper is a plain oneshot with no ``RemainAfterExit``, so its ``ActiveEnterTimestamp`` updates every run. The timer uses - ``OnUnitActiveSec`` against the wrapper and re-arms correctly. Each run - calls ``systemctl restart`` on the setup unit, which DOES re-run the - ExecStart even when it was ``active (exited)``. + ``OnUnitActiveSec`` against the wrapper and re-arms correctly. Each run: + + 1. ``systemctl restart psi-{provider}-setup.service`` — re-runs setup, + which re-registers secrets with fresh hex IDs and writes the updated + cache file to disk. + 2. ``systemctl try-restart psi-secrets.service`` — restarts serve so it + reloads the fresh cache from disk. Without this, serve's in-memory + cache keeps the old hex IDs after each refresh and every subsequent + lookup misses the cache until the next operator-triggered restart. """ return ( "[Unit]\n" @@ -172,6 +178,7 @@ def generate_provider_refresh_service(provider: str) -> str: "[Service]\n" "Type=oneshot\n" f"ExecStart=/usr/bin/systemctl restart psi-{provider}-setup.service\n" + "ExecStart=/usr/bin/systemctl try-restart psi-secrets.service\n" ) diff --git a/tests/test_unitgen.py b/tests/test_unitgen.py index a526ef4..0017757 100644 --- a/tests/test_unitgen.py +++ b/tests/test_unitgen.py @@ -478,6 +478,14 @@ def test_orders_after_setup_unit(self) -> None: content = generate_provider_refresh_service("infisical") assert "After=psi-infisical-setup.service" in content + def test_restarts_psi_secrets_so_serve_reloads_the_fresh_cache(self) -> None: + """After setup writes a fresh cache with new hex IDs, psi-secrets must + restart to reload it — otherwise serve keeps the old IDs in memory and + every subsequent lookup misses the cache. + """ + content = generate_provider_refresh_service("infisical") + assert "ExecStart=/usr/bin/systemctl try-restart psi-secrets.service" in content + class TestProviderRefreshTimer: def test_targets_the_refresh_wrapper_not_the_setup_unit(self) -> None: