diff --git a/CLAUDE.md b/CLAUDE.md index e7d1d27..e22e355 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -201,8 +201,11 @@ psi cache init --backend {tpm,hsm} Provision cache encryption key psi cache status [--verify] Show cache status (fast) or decrypt and count (slow) psi cache refresh Re-run setup to repopulate the cache psi cache invalidate Drop an entry and persist -# A psi-{provider}-setup.timer is auto-generated on `psi systemd install` -# when cache.backend is set; cadence via cache.refresh_interval (default 1h). +# A psi-{provider}-refresh.timer + wrapper service pair is auto-generated on +# `psi systemd install` when cache.backend is set; cadence via +# cache.refresh_interval (default 1h). The wrapper calls `systemctl restart` +# on the setup unit because its RemainAfterExit=yes freezes +# ActiveEnterTimestamp, which would otherwise break the timer's re-arming. # Infisical provider psi infisical login Test authentication diff --git a/README.md b/README.md index 40939b9..9bd0700 100644 --- a/README.md +++ b/README.md @@ -187,8 +187,9 @@ The HSM backend reuses the existing Nitrokey hybrid envelope (RSA-OAEP + AES-256 unwrapping the AES key via PKCS#11 at `psi serve` startup. With the cache enabled, `psi systemd install` also generates a periodic refresh timer -(`psi-infisical-setup.timer`) that runs the setup unit on `refresh_interval`, so a -secret rotated upstream makes its way into PSI without manual intervention. +(`psi-infisical-refresh.timer`) plus a small wrapper service that restarts the setup +unit on `refresh_interval`, so a secret rotated upstream makes its way into PSI without +manual intervention. ```bash # One-time provisioning (host) @@ -602,7 +603,7 @@ the exact invocation. Generates per-provider setup units based on configured providers: - `psi-secrets.container` — long-running lookup service - `psi-{provider}-setup.container` — oneshot per provider (e.g. `psi-infisical-setup`, `psi-nitrokeyhsm-setup`) -- `psi-infisical-setup.timer` — periodic cache refresh (only when the secret cache is enabled) +- `psi-infisical-refresh.service` + `psi-infisical-refresh.timer` — periodic cache refresh wrapper (only when the secret cache is enabled) - `psi-tls-renew.timer` + service — daily TLS renewal (if configured) When the [secret cache](docs/secret-cache.md) is configured, the generator automatically diff --git a/docs/secret-cache.md b/docs/secret-cache.md index c90cae8..79d1dc1 100644 --- a/docs/secret-cache.md +++ b/docs/secret-cache.md @@ -378,11 +378,22 @@ on cache miss at lookup time. ### Scheduled refresh (timer) -`psi systemd install` generates a `psi-{provider}-setup.timer` next to -each refreshable provider's setup service — today that means -`psi-infisical-setup.timer`. The timer triggers the same setup unit that -ran at boot, which re-fetches every configured secret value and -atomically replaces `cache.enc`. Tune the cadence in config: +`psi systemd install` generates two extra units per refreshable provider — +today that means `infisical`: + +- `psi-infisical-refresh.service` — a native oneshot whose only job is + `ExecStart=/usr/bin/systemctl restart psi-infisical-setup.service`. This + wrapper exists because the setup unit uses `RemainAfterExit=yes`, which + freezes `ActiveEnterTimestamp` after the first run. A timer anchored + directly to the setup unit via `OnUnitActiveSec` would only fire once. + The wrapper is a normal oneshot so its own `ActiveEnterTimestamp` moves + forward every cycle, and `systemctl restart` makes the setup unit + re-run even when it is currently in `active (exited)` state. +- `psi-infisical-refresh.timer` — triggers the wrapper on the configured + cadence. Each wrapper run re-fetches every configured secret value and + atomically replaces `cache.enc`. + +Tune the cadence in config: ```yaml cache: @@ -405,10 +416,11 @@ The timer is only generated when `cache.enabled` is true and and no timer is written. The nitrokeyhsm provider does not get a timer — its secrets are local-only and do not need periodic re-fetching. -To override the interval without editing config, drop a systemd override: +To override the interval without editing config, drop a systemd override +on the timer: ```bash -sudo systemctl edit psi-infisical-setup.timer +sudo systemctl edit psi-infisical-refresh.timer ``` ```ini diff --git a/psi/installer.py b/psi/installer.py index 7a74504..548ab56 100644 --- a/psi/installer.py +++ b/psi/installer.py @@ -20,7 +20,8 @@ generate_native_provider_setup_service, generate_native_serve_service, generate_native_tls_renew_service, - generate_provider_setup_timer, + generate_provider_refresh_service, + generate_provider_refresh_timer, generate_tls_renew_timer, provider_supports_refresh, ) @@ -206,7 +207,17 @@ def _write_provider_setup_units_container( def _write_refresh_timers(settings: PsiSettings, unit_dir: Path) -> list[str]: - """Write periodic cache-refresh timers for providers that support them. + """Write periodic cache-refresh wrapper services and timers. + + For each provider that supports periodic refresh, writes two units: + + - ``psi-{provider}-refresh.service`` — a native oneshot that calls + ``systemctl restart psi-{provider}-setup.service``. This exists because + the setup unit uses ``RemainAfterExit=yes`` so ``ActiveEnterTimestamp`` + stops updating after the first run, which in turn would prevent + ``OnUnitActiveSec`` on the timer from re-arming. + - ``psi-{provider}-refresh.timer`` — fires on the configured cadence and + triggers the wrapper service. Returns the list of timer unit names written. Emits nothing and returns an empty list when the cache is disabled or no backend is configured — @@ -219,10 +230,14 @@ def _write_refresh_timers(settings: PsiSettings, unit_dir: Path) -> list[str]: for provider_name in settings.providers: if not provider_supports_refresh(provider_name): continue - timer_name = f"psi-{provider_name}-setup.timer" + _write_unit( + unit_dir / f"psi-{provider_name}-refresh.service", + generate_provider_refresh_service(provider_name), + ) + timer_name = f"psi-{provider_name}-refresh.timer" _write_unit( unit_dir / timer_name, - generate_provider_setup_timer( + generate_provider_refresh_timer( provider_name, settings.cache.refresh_interval, settings.cache.refresh_randomized_delay, diff --git a/psi/unitgen.py b/psi/unitgen.py index 7c410b2..5ebc496 100644 --- a/psi/unitgen.py +++ b/psi/unitgen.py @@ -148,22 +148,49 @@ def provider_supports_refresh(provider: str) -> bool: return provider in _REFRESHABLE_PROVIDERS -def generate_provider_setup_timer( +def generate_provider_refresh_service(provider: str) -> str: + """Generate psi-{provider}-refresh.service — the wrapper unit the timer fires. + + The wrapper exists because ``psi-{provider}-setup.service`` uses + ``Type=oneshot`` with ``RemainAfterExit=yes`` so other units can depend on + "setup has successfully run" without re-triggering it on every restart. The + side effect is that ``ActiveEnterTimestamp`` on the setup unit only updates + on the first run, and a timer's ``OnUnitActiveSec`` anchored to that + timestamp will only fire once. + + This wrapper is a plain oneshot with no ``RemainAfterExit``, so its + ``ActiveEnterTimestamp`` updates every run. The timer uses + ``OnUnitActiveSec`` against the wrapper and re-arms correctly. Each run + calls ``systemctl restart`` on the setup unit, which DOES re-run the + ExecStart even when it was ``active (exited)``. + """ + return ( + "[Unit]\n" + f"Description=PSI {provider} secret cache refresh\n" + f"After=psi-{provider}-setup.service\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + f"ExecStart=/usr/bin/systemctl restart psi-{provider}-setup.service\n" + ) + + +def generate_provider_refresh_timer( provider: str, interval: str, randomized_delay: str, ) -> str: - """Generate psi-{provider}-setup.timer for periodic secret cache refresh. + """Generate psi-{provider}-refresh.timer for periodic secret cache refresh. - The timer triggers the matching ``psi-{provider}-setup.service`` unit on - a relative interval (``OnUnitActiveSec``) so the cache picks up secrets - rotated upstream between reboots. ``Persistent=true`` ensures a missed - refresh runs on the next boot rather than waiting a full interval. + Triggers :func:`generate_provider_refresh_service` on a relative interval + (``OnUnitActiveSec``) so the cache picks up secrets rotated upstream + between reboots. ``Persistent=true`` catches up missed refreshes after + downtime. Args: provider: Provider name (currently only ``infisical`` is supported). - interval: systemd time string for ``OnUnitActiveSec`` (e.g. ``"1h"``, - ``"30m"``, ``"2h"``). + interval: systemd time string for ``OnUnitActiveSec`` and + ``OnBootSec`` (e.g. ``"1h"``, ``"30m"``, ``"2h"``). randomized_delay: systemd time string for ``RandomizedDelaySec`` to spread refresh events across a fleet. """ @@ -172,7 +199,7 @@ def generate_provider_setup_timer( f"Description=Periodic PSI {provider} secret cache refresh\n" "\n" "[Timer]\n" - f"Unit=psi-{provider}-setup.service\n" + f"Unit=psi-{provider}-refresh.service\n" f"OnBootSec={interval}\n" f"OnUnitActiveSec={interval}\n" f"RandomizedDelaySec={randomized_delay}\n" diff --git a/tests/test_installer.py b/tests/test_installer.py index e515078..164eb33 100644 --- a/tests/test_installer.py +++ b/tests/test_installer.py @@ -187,7 +187,7 @@ def test_container_infisical_content(self, tmp_path: Path) -> None: class TestWriteRefreshTimers: - def test_infisical_timer_written_when_cache_enabled_with_backend( + def test_wrapper_and_timer_written_when_cache_enabled_with_backend( self, tmp_path: Path, ) -> None: @@ -199,13 +199,23 @@ def test_infisical_timer_written_when_cache_enabled_with_backend( unit_dir = tmp_path / "units" unit_dir.mkdir() timers = _write_refresh_timers(settings, unit_dir) - assert timers == ["psi-infisical-setup.timer"] - assert (unit_dir / "psi-infisical-setup.timer").exists() - content = (unit_dir / "psi-infisical-setup.timer").read_text() - assert "Unit=psi-infisical-setup.service" in content - assert "OnUnitActiveSec=1h" in content + assert timers == ["psi-infisical-refresh.timer"] - def test_no_timer_when_cache_disabled(self, tmp_path: Path) -> None: + wrapper = unit_dir / "psi-infisical-refresh.service" + timer = unit_dir / "psi-infisical-refresh.timer" + assert wrapper.exists() + assert timer.exists() + + wrapper_content = wrapper.read_text() + assert "Type=oneshot" in wrapper_content + assert "RemainAfterExit=yes" not in wrapper_content + assert "systemctl restart psi-infisical-setup.service" in wrapper_content + + timer_content = timer.read_text() + assert "Unit=psi-infisical-refresh.service" in timer_content + assert "OnUnitActiveSec=1h" in timer_content + + def test_no_units_when_cache_disabled(self, tmp_path: Path) -> None: settings = _mock_settings( tmp_path, providers={"infisical": {}}, @@ -216,9 +226,10 @@ def test_no_timer_when_cache_disabled(self, tmp_path: Path) -> None: unit_dir.mkdir() timers = _write_refresh_timers(settings, unit_dir) assert timers == [] - assert not (unit_dir / "psi-infisical-setup.timer").exists() + assert not (unit_dir / "psi-infisical-refresh.service").exists() + assert not (unit_dir / "psi-infisical-refresh.timer").exists() - def test_no_timer_when_no_backend(self, tmp_path: Path) -> None: + def test_no_units_when_no_backend(self, tmp_path: Path) -> None: settings = _mock_settings( tmp_path, providers={"infisical": {}}, @@ -228,8 +239,9 @@ def test_no_timer_when_no_backend(self, tmp_path: Path) -> None: unit_dir.mkdir() timers = _write_refresh_timers(settings, unit_dir) assert timers == [] + assert not (unit_dir / "psi-infisical-refresh.service").exists() - def test_no_timer_for_nitrokeyhsm_provider(self, tmp_path: Path) -> None: + def test_no_units_for_nitrokeyhsm_provider(self, tmp_path: Path) -> None: """HSM is local-only — nothing to periodically re-fetch.""" settings = _mock_settings( tmp_path, @@ -240,7 +252,7 @@ def test_no_timer_for_nitrokeyhsm_provider(self, tmp_path: Path) -> None: unit_dir.mkdir() timers = _write_refresh_timers(settings, unit_dir) assert timers == [] - assert not (unit_dir / "psi-nitrokeyhsm-setup.timer").exists() + assert not (unit_dir / "psi-nitrokeyhsm-refresh.service").exists() def test_custom_interval_is_honored(self, tmp_path: Path) -> None: settings = _mock_settings( @@ -253,6 +265,6 @@ def test_custom_interval_is_honored(self, tmp_path: Path) -> None: unit_dir = tmp_path / "units" unit_dir.mkdir() _write_refresh_timers(settings, unit_dir) - content = (unit_dir / "psi-infisical-setup.timer").read_text() + content = (unit_dir / "psi-infisical-refresh.timer").read_text() assert "OnUnitActiveSec=15m" in content assert "RandomizedDelaySec=1m" in content diff --git a/tests/test_unitgen.py b/tests/test_unitgen.py index c6b0c55..a526ef4 100644 --- a/tests/test_unitgen.py +++ b/tests/test_unitgen.py @@ -16,7 +16,8 @@ generate_native_provider_setup_service, generate_native_serve_service, generate_native_tls_renew_service, - generate_provider_setup_timer, + generate_provider_refresh_service, + generate_provider_refresh_timer, generate_tls_renew_timer, provider_supports_refresh, ) @@ -455,27 +456,54 @@ def test_unknown_provider_does_not_support_refresh(self) -> None: assert provider_supports_refresh("random-name") is False -class TestProviderSetupTimer: - def test_targets_matching_setup_unit(self) -> None: - content = generate_provider_setup_timer("infisical", "1h", "5m") - assert "Unit=psi-infisical-setup.service" in content +class TestProviderRefreshService: + def test_is_oneshot_without_remain_after_exit(self) -> None: + """RemainAfterExit would break OnUnitActiveSec re-arming on the timer. + + The wrapper MUST go inactive after each run so the timer's + OnUnitActiveSec has a moving ActiveEnterTimestamp to anchor to. + """ + content = generate_provider_refresh_service("infisical") + assert "Type=oneshot" in content + assert "RemainAfterExit=yes" not in content + + def test_execs_systemctl_restart_on_the_setup_unit(self) -> None: + """The wrapper restarts the setup unit so it re-runs even when it is + currently in active (exited) state from the previous run. + """ + content = generate_provider_refresh_service("infisical") + assert "ExecStart=/usr/bin/systemctl restart psi-infisical-setup.service" in content + + def test_orders_after_setup_unit(self) -> None: + content = generate_provider_refresh_service("infisical") + assert "After=psi-infisical-setup.service" in content + + +class TestProviderRefreshTimer: + def test_targets_the_refresh_wrapper_not_the_setup_unit(self) -> None: + """Regression test for PR #20's broken timer — it pointed directly at + the setup unit whose ActiveEnterTimestamp never updated. + """ + content = generate_provider_refresh_timer("infisical", "1h", "5m") + assert "Unit=psi-infisical-refresh.service" in content + assert "Unit=psi-infisical-setup.service" not in content def test_interval_and_randomized_delay_are_passed_through(self) -> None: - content = generate_provider_setup_timer("infisical", "30m", "2m") + content = generate_provider_refresh_timer("infisical", "30m", "2m") assert "OnUnitActiveSec=30m" in content assert "OnBootSec=30m" in content assert "RandomizedDelaySec=2m" in content def test_is_persistent_so_missed_refreshes_run_on_next_boot(self) -> None: - content = generate_provider_setup_timer("infisical", "1h", "5m") + content = generate_provider_refresh_timer("infisical", "1h", "5m") assert "Persistent=true" in content def test_install_section_hooks_into_timers_target(self) -> None: - content = generate_provider_setup_timer("infisical", "1h", "5m") + content = generate_provider_refresh_timer("infisical", "1h", "5m") assert "[Install]" in content assert "WantedBy=timers.target" in content def test_description_mentions_cache_refresh(self) -> None: - content = generate_provider_setup_timer("infisical", "1h", "5m") + content = generate_provider_refresh_timer("infisical", "1h", "5m") assert "Description=" in content assert "cache refresh" in content.lower()