diff --git a/pgbackrest-archive-push-wrapper.sh b/pgbackrest-archive-push-wrapper.sh index 10eb835..9c08932 100755 --- a/pgbackrest-archive-push-wrapper.sh +++ b/pgbackrest-archive-push-wrapper.sh @@ -4,20 +4,22 @@ # Wraps `pgbackrest archive-push` so that any kind of archive failure (hard # repo error, stuck async worker, anything else) cannot fill pg_wal/ and halt # Postgres. When pgbackrest fails AND pg_wal/ has grown past a threshold -# (default 500 MiB, override via WAL_DROP_THRESHOLD_MB), the wrapper returns -# success to Postgres anyway. Postgres recycles the WAL segment as if -# archiving were disabled. The PITR window gets a coverage gap from this -# segment forward; below the threshold pg_stat_archiver.failed_count climbs -# normally and the dashboard surfaces "PITR broken — fix archiving config", -# so the underlying issue (bad creds, deleted bucket, expired keys, …) gets -# fixed before the threshold trips and the failure signal disappears. +# (WAL_DROP_THRESHOLD_MB; sized by wrapper.sh's compute_volume_thresholds to +# min(500 MiB, ~10% of volume) with operator override via this env var), the +# wrapper returns success to Postgres anyway. Postgres recycles the WAL +# segment as if archiving were disabled. The PITR window gets a coverage +# gap from this segment forward; below the threshold +# pg_stat_archiver.failed_count climbs normally and the dashboard surfaces +# "PITR broken — fix archiving config", so the underlying issue (bad creds, +# deleted bucket, expired keys, …) gets fixed before the threshold trips +# and the failure signal disappears. # # The env var name avoids the PGBACKREST_* prefix on purpose: pgBackRest # treats every PGBACKREST_* variable as a config option and warns about # unknown names on every invocation. WAL_DROP_THRESHOLD_MB sits outside # that namespace so it doesn't pollute logs. # -# Why 500 MiB here, vs pgBackRest's archive-push-queue-max=5GiB: +# Why ≤500 MiB here, vs pgBackRest's archive-push-queue-max ≤5GiB: # the two thresholds gate orthogonal failure regimes. archive-push-queue-max # governs the SPOOL — graceful absorption of transient S3 stalls, where the # async worker keeps retrying and most segments eventually get pushed. A @@ -28,6 +30,9 @@ # Holding 5 GiB of pg_wal hostage waiting for a fix that requires a config # change wastes data-volume disk; 500 MiB is enough to ride out a multi- # minute config-redeploy window without eating into customer disk budgets. +# Both ceilings scale down proportionally on small volumes (1 GiB Hobby ⇒ +# ~100 MiB pg_wal / ~512 MiB spool) so a tiny volume isn't dominated by +# archive buffers; on ≥25 GiB volumes both caps hold. # # Below the threshold the wrapper surfaces pgbackrest's failure to Postgres # normally, so transient errors retry on the next archive_timeout instead diff --git a/wrapper.sh b/wrapper.sh index 353b360..53ef20c 100644 --- a/wrapper.sh +++ b/wrapper.sh @@ -131,16 +131,18 @@ fi # async mode: archive_command writes WAL into the local spool dir and # returns in milliseconds; a background worker pushes from there to S3. # Two orthogonal thresholds gate the "WAL is accumulating, do something": -# - archive-push-queue-max=5GiB (set in /etc/pgbackrest/pgbackrest.conf -# below) governs the SPOOL. Trips on transient S3 stalls; pgBackRest -# drops segments from spool and reports success to archive_command. -# Generous buffer to absorb multi-hour outages cleanly. -# - pgbackrest-archive-push-wrapper.sh's WAL_DROP_THRESHOLD_MB -# (default 500 MiB) governs pg_wal/. Trips on HARD failures (bad creds, -# deleted bucket, expired keys) where pgbackrest's foreground returns -# non-zero and retrying without operator intervention has zero chance -# of success. Smaller cap because we shouldn't hold 5 GiB of pg_wal -# hostage waiting for a config fix. +# - archive-push-queue-max (set in /etc/pgbackrest/pgbackrest.conf) governs +# the SPOOL. Trips on transient S3 stalls; pgBackRest drops segments +# from spool and reports success to archive_command. Generous buffer +# to absorb multi-hour outages cleanly. Default 5 GiB on volumes ≥10 GiB; +# scales down to ~50% of volume below that (see compute_volume_thresholds). +# - pgbackrest-archive-push-wrapper.sh's WAL_DROP_THRESHOLD_MB governs +# pg_wal/. Trips on HARD failures (bad creds, deleted bucket, expired +# keys) where pgbackrest's foreground returns non-zero and retrying +# without operator intervention has zero chance of success. Smaller +# cap because we shouldn't hold 5 GiB of pg_wal hostage waiting for a +# config fix. Default 500 MiB on volumes ≥5 GiB; scales down to ~10% +# of volume below that, floor 64 MiB. # Either way, PITR window truncates; DB stays up. # ----------------------------------------------------------------------------- @@ -279,6 +281,54 @@ detect_cpus() { nproc 2>/dev/null || echo 1 } +# Read total filesystem bytes of the data volume. Uses `df -Pk` (POSIX) and +# returns 1024-byte blocks. Echoes 0 on any failure so callers can fall back +# to the absolute defaults. +detect_volume_total_kib() { + local vol_path="${RAILWAY_VOLUME_MOUNT_PATH:-$EXPECTED_VOLUME_MOUNT_PATH}" + [ ! -d "$vol_path" ] && { echo 0; return; } + df -Pk "$vol_path" 2>/dev/null | awk 'NR==2 { print $2 }' | grep -E '^[0-9]+$' || echo 0 +} + +# Compute volume-proportional WAL/spool thresholds and write them to globals +# COMPUTED_WAL_DROP_MB + COMPUTED_QUEUE_MAX_MIB. Both scale DOWN from the +# absolute defaults (500 MiB pg_wal drop / 5 GiB spool queue-max) on smaller +# volumes — never up. Hobby's 1 GiB volume can't carry 5 GiB of spool, and +# 500 MiB of pg_wal is half the disk; on a 25+ GiB volume the absolutes hold. +# +# Ratios: wal-drop ~ 10% of volume (hard-failure pg_wal hostage), queue-max +# ~ 50% of volume (transient-stall spool absorption). The 10× spread between +# the two budgets is preserved across all volume sizes — hard failures still +# bail fast, transient stalls still absorb generously. +# +# Floor: 64 MiB on wal-drop (~4 WAL segments — enough for one short stall), +# 128 MiB on queue-max (~8 segments). Below these, archiving is effectively +# disabled and the dashboard surfaces it. +compute_volume_thresholds() { + local total_kib total_mib wal_drop queue_max cap_queue + total_kib=$(detect_volume_total_kib) + if [ "$total_kib" -lt 1 ]; then + COMPUTED_WAL_DROP_MB=500 + COMPUTED_QUEUE_MAX_MIB=5120 + echo "pgbackrest: volume size unknown; using absolute thresholds wal-drop=500 MiB queue-max=5 GiB" + return + fi + total_mib=$(( total_kib / 1024 )) + + wal_drop=$(( total_mib / 10 )) + [ "$wal_drop" -gt 500 ] && wal_drop=500 + [ "$wal_drop" -lt 64 ] && wal_drop=64 + COMPUTED_WAL_DROP_MB=$wal_drop + + cap_queue=$(( 5 * 1024 )) + queue_max=$(( total_mib / 2 )) + [ "$queue_max" -gt "$cap_queue" ] && queue_max=$cap_queue + [ "$queue_max" -lt 128 ] && queue_max=128 + COMPUTED_QUEUE_MAX_MIB=$queue_max + + echo "pgbackrest: volume ${total_mib} MiB; sized wal-drop=${wal_drop} MiB queue-max=${queue_max} MiB" +} + # Compute per-command process-max with a clamp(value, min, max) shape. # Sized off detected CPUs because: # archive-push: serial 16 MiB segment arrival + per-PUT S3 overhead. @@ -363,7 +413,7 @@ repo1-path=${WAL_ARCHIVE_PATH:-/pgbackrest} log-level-console=info log-level-file=off archive-async=y -archive-push-queue-max=5GiB +archive-push-queue-max=${COMPUTED_QUEUE_MAX_MIB:-5120}MiB archive-get-queue-max=1GiB spool-path=${PGBACKREST_SPOOL_DIR} compress-type=zst @@ -744,6 +794,15 @@ fork_pgbackrest_backup_watcher() { gosu postgres /usr/local/bin/pgbackrest-backup-watcher.sh & } +compute_volume_thresholds +# Inject the computed pg_wal drop threshold into the env that postgres (and +# its archive_command wrapper) inherits, unless the operator pinned it. The +# wrapper script reads WAL_DROP_THRESHOLD_MB at archive-push time, so this +# export must precede docker-entrypoint.sh. +if [ -z "${WAL_DROP_THRESHOLD_MB:-}" ]; then + export WAL_DROP_THRESHOLD_MB="$COMPUTED_WAL_DROP_MB" +fi + render_pgbackrest_conf restore_from_pgbackrest_if_empty_volume clear_pgbackrest_state_if_disabled