From b27a117a5d367fdc284bd6a14d154364c9c82434 Mon Sep 17 00:00:00 2001 From: Tomas Srnka Date: Sat, 30 May 2026 17:12:00 +0200 Subject: [PATCH 1/2] perf(iac): tune NFS caching and RPC parallelism for persistent volumes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes targeting NFS-volume read latency: 1. Relax the persistent-volume mount cache options from `noac, lookupcache=none` to `actimeo=1, lookupcache=positive`. The current options force every stat() and lookup to round-trip to Filestore, which dominates metadata-heavy workloads. `actimeo=1` bounds cross-host attribute staleness to ~1s — below the VM-side NFS client's own cache floor — while eliminating the bulk of redundant GETATTR RPCs. `lookupcache=positive` keeps negative lookups uncached so new files created by peer sandboxes still appear promptly. Cross-sandbox strict coherency, when actually needed, should use NLM locks (already enabled via `lock,local_lock=none`) or out-of-band signaling. 2. Raise `sunrpc.tcp_slot_table_entries` to 128 (default 2) via modprobe.d on client nodes. With `nconnect=7` the host can open 7 TCP connections to Filestore, but the default 2 in-flight RPCs per connection caps total in-flight RPCs at 14 — far too low for many concurrent sandboxes generating uncached metadata RPCs. `cmd/simulate-nfs-traffic` already validated this experimentally; this wires it into the production startup script. --- iac/provider-gcp/main.tf | 9 +++++++-- .../nomad-cluster/scripts/start-client.sh | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/iac/provider-gcp/main.tf b/iac/provider-gcp/main.tf index 747fa95db3..36641e08f3 100644 --- a/iac/provider-gcp/main.tf +++ b/iac/provider-gcp/main.tf @@ -81,8 +81,13 @@ locals { "local_lock=none", // all locks are network locks // caching - "noac", // disable attribute caching. slower, but more reliable - "lookupcache=none", // disable lookup caching + // Bound cross-host staleness to ~1s while avoiding a Filestore RPC on every + // metadata op. Sandboxes mounting the same volume on different hosts still see + // peer writes within the attribute timer; strict coherency for shared mutable + // state should use NLM locks (already enabled via lock,local_lock=none) or an + // out-of-band signal. + "actimeo=1", // cap all attribute caches at 1 second + "lookupcache=positive", // cache hits; negative lookups still hit the server so new files appear promptly // security "noacl", // do not use an acl diff --git a/iac/provider-gcp/nomad-cluster/scripts/start-client.sh b/iac/provider-gcp/nomad-cluster/scripts/start-client.sh index e4d6f68d10..149e798e61 100755 --- a/iac/provider-gcp/nomad-cluster/scripts/start-client.sh +++ b/iac/provider-gcp/nomad-cluster/scripts/start-client.sh @@ -83,6 +83,19 @@ echo "$SWAPFILE none swap sw 0 0" | tee -a /etc/fstab sysctl vm.swappiness=10 sysctl vm.vfs_cache_pressure=50 +# Increase RPC slot table so nconnect can actually parallelize. Default of 2 in-flight +# RPCs per TCP connection bottlenecks metadata-heavy NFS workloads (uncached lookups, +# many concurrent sandboxes). Modprobe options apply when sunrpc loads (first NFS mount); +# the runtime sysctl handles the case where the module is already loaded. +mkdir -p /etc/modprobe.d +cat <<'EOF' >/etc/modprobe.d/sunrpc.conf +options sunrpc tcp_slot_table_entries=128 tcp_max_slot_table_entries=128 +EOF +if [ -d /proc/sys/sunrpc ]; then + sysctl -w sunrpc.tcp_slot_table_entries=128 || true + sysctl -w sunrpc.tcp_max_slot_table_entries=128 || true +fi + # TODO: Optimize the mount more according to https://cloud.google.com/filestore/docs/mounting-fileshares %{ if USE_FILESTORE_CACHE } # Configure NFS read ahead From 9f90ac9ec996dbeffbc8130f4e95856750a89879 Mon Sep 17 00:00:00 2001 From: Tomas Srnka Date: Sat, 30 May 2026 17:12:58 +0200 Subject: [PATCH 2/2] fixup: drop `|| true` from sunrpc sysctls The [ -d /proc/sys/sunrpc ] guard already handles the "module not loaded" case; `tcp_slot_table_entries` and `tcp_max_slot_table_entries` have been stable kernel sysctls for over a decade. A write failure past the guard is a real misconfiguration we want to surface at boot, not silently swallow. --- iac/provider-gcp/nomad-cluster/scripts/start-client.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iac/provider-gcp/nomad-cluster/scripts/start-client.sh b/iac/provider-gcp/nomad-cluster/scripts/start-client.sh index 149e798e61..a1a5712051 100755 --- a/iac/provider-gcp/nomad-cluster/scripts/start-client.sh +++ b/iac/provider-gcp/nomad-cluster/scripts/start-client.sh @@ -92,8 +92,8 @@ cat <<'EOF' >/etc/modprobe.d/sunrpc.conf options sunrpc tcp_slot_table_entries=128 tcp_max_slot_table_entries=128 EOF if [ -d /proc/sys/sunrpc ]; then - sysctl -w sunrpc.tcp_slot_table_entries=128 || true - sysctl -w sunrpc.tcp_max_slot_table_entries=128 || true + sysctl -w sunrpc.tcp_slot_table_entries=128 + sysctl -w sunrpc.tcp_max_slot_table_entries=128 fi # TODO: Optimize the mount more according to https://cloud.google.com/filestore/docs/mounting-fileshares