From e86687361f83db9683e060e867ea93e84f06de2e Mon Sep 17 00:00:00 2001 From: James Purcell Date: Tue, 5 May 2026 12:42:13 +0100 Subject: [PATCH 1/7] Add rebased Helm security hardening options --- .gitignore | 3 +- .../examples/google-autopilot-cel/values.yaml | 146 ++++++++++++++++++ braintrust/templates/api-deployment.yaml | 28 +++- .../brainstore-fastreader-deployment.yaml | 15 +- .../brainstore-reader-deployment.yaml | 15 +- .../brainstore-writer-deployment.yaml | 15 +- braintrust/values.yaml | 58 +++++++ 7 files changed, 273 insertions(+), 7 deletions(-) create mode 100644 braintrust/examples/google-autopilot-cel/values.yaml diff --git a/.gitignore b/.gitignore index 62d2ec7..626fc04 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ Chart.lock **/secrets.yml values-local.yaml values-local.yml +values-*.yaml +values-*.yml # Helm output and temporary files *.tmp @@ -31,4 +33,3 @@ test-output/ manifests/ rendered/ debug/ - diff --git a/braintrust/examples/google-autopilot-cel/values.yaml b/braintrust/examples/google-autopilot-cel/values.yaml new file mode 100644 index 0000000..37b579e --- /dev/null +++ b/braintrust/examples/google-autopilot-cel/values.yaml @@ -0,0 +1,146 @@ +# Sample values for GKE Autopilot deployment with CEL policy compliance + +global: + orgName: "" + namespace: "braintrust" + +cloud: "google" + +google: + mode: "autopilot" + autopilotMachineFamily: "c4" + +objectStorage: + google: + brainstoreBucket: "" + apiBucket: "" + +api: + name: "braintrust-api" + annotations: + service: + networking.gke.io/load-balancer-type: "Internal" + replicas: 4 + service: + type: LoadBalancer + port: 8000 + portName: http + serviceAccount: + name: "braintrust-api" + googleServiceAccount: "" + enableGcsAuth: false + resources: + requests: + cpu: "4" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + tmpVolume: + enabled: true + sizeLimit: "1Gi" + extraEnvVars: + - name: AWS_REGION + value: "us-central1" + +brainstore: + serviceAccount: + name: "brainstore" + googleServiceAccount: "" + locksBackend: "objectStorage" + + reader: + name: "brainstore-reader" + replicas: 2 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "900Gi" + verbose: true + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volume: + size: "1000Gi" + sizeLimit: "900Gi" + extraEnvVars: + + fastreader: + name: "brainstore-fastreader" + replicas: 2 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "900Gi" + verbose: true + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volume: + size: "1000Gi" + sizeLimit: "900Gi" + extraEnvVars: + + writer: + name: "brainstore-writer" + replicas: 1 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "32" + memory: "64Gi" + limits: + cpu: "32" + memory: "64Gi" + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "900Gi" + verbose: true + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volume: + size: "1000Gi" + sizeLimit: "900Gi" + extraEnvVars: diff --git a/braintrust/templates/api-deployment.yaml b/braintrust/templates/api-deployment.yaml index 87f346c..fbbde8b 100644 --- a/braintrust/templates/api-deployment.yaml +++ b/braintrust/templates/api-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.api.serviceAccount.name }} + {{- with .Values.api.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- with .Values.api.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -60,6 +64,10 @@ spec: - name: api image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag }}" imagePullPolicy: {{ .Values.api.image.pullPolicy }} + {{- with .Values.api.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} ports: - containerPort: {{ .Values.api.service.port }} resources: @@ -122,17 +130,32 @@ spec: {{- if .Values.api.extraEnvVars }} {{- toYaml .Values.api.extraEnvVars | nindent 12 }} {{- end }} - {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} + {{- if or .Values.api.tmpVolume.enabled (and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver) }} volumeMounts: + {{- if .Values.api.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} + {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" readOnly: true + {{- end }} {{- end }} {{- with .Values.api.extraContainers }} {{- toYaml . | nindent 8 }} {{- end }} volumes: - {{- if or (and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver) .Values.api.extraVolumes }} + {{- if or .Values.api.tmpVolume.enabled (and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver) .Values.api.extraVolumes }} + {{- if .Values.api.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.api.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.api.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: @@ -147,4 +170,3 @@ spec: {{- else }} [] {{- end }} - diff --git a/braintrust/templates/brainstore-fastreader-deployment.yaml b/braintrust/templates/brainstore-fastreader-deployment.yaml index ebb8a7d..5852e2e 100644 --- a/braintrust/templates/brainstore-fastreader-deployment.yaml +++ b/braintrust/templates/brainstore-fastreader-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.brainstore.serviceAccount.name }} + {{- with .Values.brainstore.fastreader.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.brainstore.fastreader.nodeSelector (and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot")) }} nodeSelector: {{- with .Values.brainstore.fastreader.nodeSelector }} @@ -67,6 +71,10 @@ spec: - name: brainstore-fastreader image: "{{ .Values.brainstore.image.repository }}:{{ .Values.brainstore.image.tag }}" imagePullPolicy: {{ .Values.brainstore.image.pullPolicy }} + {{- with .Values.brainstore.fastreader.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} command: ["brainstore"] args: ["web"] ports: @@ -155,7 +163,12 @@ spec: requests: storage: {{ required "brainstore.fastreader.volume.size must be set" .Values.brainstore.fastreader.volume.size | quote }} {{- else }} - emptyDir: {} + emptyDir: + {{- if .Values.brainstore.fastreader.volume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.fastreader.volume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline diff --git a/braintrust/templates/brainstore-reader-deployment.yaml b/braintrust/templates/brainstore-reader-deployment.yaml index 8773906..7c8e1f6 100644 --- a/braintrust/templates/brainstore-reader-deployment.yaml +++ b/braintrust/templates/brainstore-reader-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.brainstore.serviceAccount.name }} + {{- with .Values.brainstore.reader.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.brainstore.reader.nodeSelector (and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot")) }} nodeSelector: {{- with .Values.brainstore.reader.nodeSelector }} @@ -67,6 +71,10 @@ spec: - name: brainstore-reader image: "{{ .Values.brainstore.image.repository }}:{{ .Values.brainstore.image.tag }}" imagePullPolicy: {{ .Values.brainstore.image.pullPolicy }} + {{- with .Values.brainstore.reader.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} command: ["brainstore"] args: ["web"] ports: @@ -155,7 +163,12 @@ spec: requests: storage: {{ required "brainstore.reader.volume.size must be set" .Values.brainstore.reader.volume.size | quote }} {{- else }} - emptyDir: {} + emptyDir: + {{- if .Values.brainstore.reader.volume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.reader.volume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline diff --git a/braintrust/templates/brainstore-writer-deployment.yaml b/braintrust/templates/brainstore-writer-deployment.yaml index 79b5159..de21635 100644 --- a/braintrust/templates/brainstore-writer-deployment.yaml +++ b/braintrust/templates/brainstore-writer-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.brainstore.serviceAccount.name }} + {{- with .Values.brainstore.writer.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.brainstore.writer.nodeSelector (and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot")) }} nodeSelector: {{- with .Values.brainstore.writer.nodeSelector }} @@ -67,6 +71,10 @@ spec: - name: brainstore-writer image: "{{ .Values.brainstore.image.repository }}:{{ .Values.brainstore.image.tag }}" imagePullPolicy: {{ .Values.brainstore.image.pullPolicy }} + {{- with .Values.brainstore.writer.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} command: ["brainstore"] args: ["web"] ports: @@ -155,7 +163,12 @@ spec: requests: storage: {{ required "brainstore.writer.volume.size must be set" .Values.brainstore.writer.volume.size | quote }} {{- else }} - emptyDir: {} + emptyDir: + {{- if .Values.brainstore.writer.volume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.writer.volume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline diff --git a/braintrust/values.yaml b/braintrust/values.yaml index b3a3739..a82f67a 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -116,6 +116,22 @@ api: limits: cpu: "4" memory: "8Gi" + # Optional: Pod-level security context + # podSecurityContext: + # runAsUser: 1000 + # runAsGroup: 1000 + # fsGroup: 1000 + # Optional: Container-level security context + # securityContext: + # readOnlyRootFilesystem: true + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL + # Optional: Writable /tmp mount for readOnlyRootFilesystem setups + tmpVolume: + enabled: false + sizeLimit: "" # Allow running user generated code functions (e.g. scorers/tools) allowCodeFunctionExecution: true # Brainstore backfill configuration. These defaults are fine for most cases. @@ -223,6 +239,18 @@ brainstore: limits: cpu: "16" memory: "32Gi" + # Optional: Pod-level security context + # podSecurityContext: + # runAsUser: 1000 + # runAsGroup: 1000 + # fsGroup: 1000 + # Optional: Container-level security context + # securityContext: + # readOnlyRootFilesystem: true + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" objectStoreCacheFileSize: "1000Gi" @@ -232,6 +260,8 @@ brainstore: volume: # Storage size for ephemeral storage requests (used with GKE Autopilot local SSDs) size: "" + # Optional emptyDir size limit for CEL policy compliance + sizeLimit: "" extraEnvVars: [] nodeSelector: {} tolerations: [] @@ -262,6 +292,18 @@ brainstore: limits: cpu: "16" memory: "32Gi" + # Optional: Pod-level security context + # podSecurityContext: + # runAsUser: 1000 + # runAsGroup: 1000 + # fsGroup: 1000 + # Optional: Container-level security context + # securityContext: + # readOnlyRootFilesystem: true + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" objectStoreCacheFileSize: "1000Gi" @@ -271,6 +313,8 @@ brainstore: volume: # Storage size for ephemeral storage requests (used with GKE Autopilot local SSDs) size: "" + # Optional emptyDir size limit for CEL policy compliance + sizeLimit: "" extraEnvVars: [] nodeSelector: {} tolerations: [] @@ -301,6 +345,18 @@ brainstore: limits: cpu: "32" memory: "64Gi" + # Optional: Pod-level security context + # podSecurityContext: + # runAsUser: 1000 + # runAsGroup: 1000 + # fsGroup: 1000 + # Optional: Container-level security context + # securityContext: + # readOnlyRootFilesystem: true + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" objectStoreCacheFileSize: "1000Gi" @@ -311,6 +367,8 @@ brainstore: # Storage size for ephemeral storage requests # Used with GKE Autopilot local SSDs and Azure Container Storage CSI size: "" + # Optional emptyDir size limit for CEL policy compliance + sizeLimit: "" extraEnvVars: [] # Example: # - name: MY_ENV_VAR From c37813d48ac10cf489ed506b88633cd0d4f4447f Mon Sep 17 00:00:00 2001 From: James Purcell Date: Tue, 5 May 2026 14:25:32 +0100 Subject: [PATCH 2/7] update values.yaml --- braintrust/examples/google-autopilot-cel/values.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/braintrust/examples/google-autopilot-cel/values.yaml b/braintrust/examples/google-autopilot-cel/values.yaml index 37b579e..a380cbb 100644 --- a/braintrust/examples/google-autopilot-cel/values.yaml +++ b/braintrust/examples/google-autopilot-cel/values.yaml @@ -17,6 +17,10 @@ objectStorage: api: name: "braintrust-api" + # Uncomment the following section to use a different image or tag from the version in the Helm release + #image: + #repository: public.ecr.aws/braintrust/standalone-api + #tag: "" annotations: service: networking.gke.io/load-balancer-type: "Internal" @@ -53,6 +57,10 @@ brainstore: serviceAccount: name: "brainstore" googleServiceAccount: "" + # Uncomment the following section to use a different image or tag from the version in the Helm release + #image: + #repository: public.ecr.aws/braintrust/brainstore + #tag: "" locksBackend: "objectStorage" reader: From b736b89efb5e3e8f783c9afe68b4b31a5b009484 Mon Sep 17 00:00:00 2001 From: James Purcell Date: Tue, 5 May 2026 14:38:36 +0100 Subject: [PATCH 3/7] Remove commented out code from root values.yaml --- braintrust/values.yaml | 49 ------------------------------------------ 1 file changed, 49 deletions(-) diff --git a/braintrust/values.yaml b/braintrust/values.yaml index a82f67a..5ae0088 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -116,19 +116,6 @@ api: limits: cpu: "4" memory: "8Gi" - # Optional: Pod-level security context - # podSecurityContext: - # runAsUser: 1000 - # runAsGroup: 1000 - # fsGroup: 1000 - # Optional: Container-level security context - # securityContext: - # readOnlyRootFilesystem: true - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL - # Optional: Writable /tmp mount for readOnlyRootFilesystem setups tmpVolume: enabled: false sizeLimit: "" @@ -239,18 +226,6 @@ brainstore: limits: cpu: "16" memory: "32Gi" - # Optional: Pod-level security context - # podSecurityContext: - # runAsUser: 1000 - # runAsGroup: 1000 - # fsGroup: 1000 - # Optional: Container-level security context - # securityContext: - # readOnlyRootFilesystem: true - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" objectStoreCacheFileSize: "1000Gi" @@ -292,18 +267,6 @@ brainstore: limits: cpu: "16" memory: "32Gi" - # Optional: Pod-level security context - # podSecurityContext: - # runAsUser: 1000 - # runAsGroup: 1000 - # fsGroup: 1000 - # Optional: Container-level security context - # securityContext: - # readOnlyRootFilesystem: true - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" objectStoreCacheFileSize: "1000Gi" @@ -345,18 +308,6 @@ brainstore: limits: cpu: "32" memory: "64Gi" - # Optional: Pod-level security context - # podSecurityContext: - # runAsUser: 1000 - # runAsGroup: 1000 - # fsGroup: 1000 - # Optional: Container-level security context - # securityContext: - # readOnlyRootFilesystem: true - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" objectStoreCacheFileSize: "1000Gi" From cd6a44ec59617b77e235f6759b8b94083e397e7f Mon Sep 17 00:00:00 2001 From: James Purcell Date: Wed, 6 May 2026 16:54:26 +0100 Subject: [PATCH 4/7] Add helm examples for EKS with and without Quarantine VPC enabled --- README.md | 2 +- braintrust/README.md | 6 + .../examples/aws-eks-quarantine/values.yaml | 109 ++++++++++++++++++ braintrust/examples/aws-eks/values.yaml | 90 +++++++++++++++ 4 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 braintrust/examples/aws-eks-quarantine/values.yaml create mode 100644 braintrust/examples/aws-eks/values.yaml diff --git a/README.md b/README.md index 5ec78fd..348ec1b 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,6 @@ helm upgrade --install \ ## Prerequisites -Before installing the Braintrust Helm chart, ensure you have run the appropriate braintrust terraform module [Google](https://github.com/braintrustdata/terraform-google-braintrust-data-plane) or [Azure](https://github.com/braintrustdata/terraform-azure-braintrust-data-plane) to deploy the base infrastructure. +Before installing the Braintrust Helm chart, ensure you have run the appropriate Braintrust Terraform module for [AWS](https://github.com/braintrustdata/terraform-aws-braintrust-data-plane), [Google](https://github.com/braintrustdata/terraform-google-braintrust-data-plane), or [Azure](https://github.com/braintrustdata/terraform-azure-braintrust-data-plane) to deploy the base infrastructure. See the [Braintrust Helm Chart](./braintrust/README.md) for more details. diff --git a/braintrust/README.md b/braintrust/README.md index a912b70..d413adf 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -192,3 +192,9 @@ This version also adds first-class `brainstoreWalFooterVersion` support and auto ## Example Values Files Example values files for different cloud providers and configurations are located in the `examples/` folder. + +- `examples/aws-eks/values.yaml`: AWS EKS deployment without a quarantine VPC. User-defined functions execute in the API pod. Includes the API service annotations needed for the Terraform-managed CloudFront plus adopted internal NLB path. +- `examples/aws-eks-quarantine/values.yaml`: AWS EKS deployment with user-defined functions routed into the quarantine VPC. Includes the API service annotations needed for the Terraform-managed CloudFront plus adopted internal NLB path. +- `examples/google-autopilot/values.yaml`: GKE Autopilot deployment. +- `examples/google-autopilot-cel/values.yaml`: GKE Autopilot deployment with CEL-friendly security settings. +- `examples/google-standard/values.yaml`: GKE Standard deployment. diff --git a/braintrust/examples/aws-eks-quarantine/values.yaml b/braintrust/examples/aws-eks-quarantine/values.yaml new file mode 100644 index 0000000..1fef7f7 --- /dev/null +++ b/braintrust/examples/aws-eks-quarantine/values.yaml @@ -0,0 +1,109 @@ +# Sample values for AWS EKS deployment with a quarantine VPC + +global: + orgName: "" + namespace: "braintrust" + +cloud: "aws" + +objectStorage: + aws: + brainstoreBucket: "" + responseBucket: "" + codeBundleBucket: "" + +api: + name: "braintrust-api" + replicas: 1 + # Disable in-pod code execution so user-defined functions run in the quarantine VPC. + allowCodeFunctionExecution: false + annotations: + service: + # Internal NLB via the AWS Load Balancer Controller. + # If you are using the terraform-aws-braintrust-data-plane EKS CloudFront path, + # set these so the controller adopts the pre-created internal NLB. + service.beta.kubernetes.io/aws-load-balancer-scheme: "internal" + service.beta.kubernetes.io/aws-load-balancer-type: "external" + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "instance" + service.beta.kubernetes.io/aws-load-balancer-security-groups: "" + service.beta.kubernetes.io/aws-load-balancer-name: "" + service: + type: LoadBalancer + port: 8000 + portName: http + serviceAccount: + name: "braintrust-api" + awsRoleArn: "" + resources: + requests: + cpu: "4" + memory: "16Gi" + limits: + cpu: "8" + memory: "16Gi" + extraEnvVars: + - name: QUARANTINE_INVOKE_ROLE + value: "" + - name: QUARANTINE_FUNCTION_ROLE + value: "" + - name: QUARANTINE_REGION + value: "" + - name: QUARANTINE_PRIVATE_SUBNET_1_ID + value: "" + - name: QUARANTINE_PRIVATE_SUBNET_2_ID + value: "" + - name: QUARANTINE_PRIVATE_SUBNET_3_ID + value: "" + - name: QUARANTINE_PUB_PRIVATE_VPC_DEFAULT_SECURITY_GROUP + value: "" + - name: QUARANTINE_PUB_PRIVATE_VPC_ID + value: "" + # nodeSelector: + # topology.kubernetes.io/zone: us-east-1a + +brainstore: + serviceAccount: + name: "brainstore" + awsRoleArn: "" + reader: + name: "brainstore-reader" + replicas: 2 + service: + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + fastreader: + name: "brainstore-fastreader" + replicas: 2 + service: + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + writer: + name: "brainstore-writer" + replicas: 1 + service: + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "32" + memory: "64Gi" + limits: + cpu: "32" + memory: "64Gi" diff --git a/braintrust/examples/aws-eks/values.yaml b/braintrust/examples/aws-eks/values.yaml new file mode 100644 index 0000000..b676ea3 --- /dev/null +++ b/braintrust/examples/aws-eks/values.yaml @@ -0,0 +1,90 @@ +# Sample values for AWS EKS deployment without a quarantine VPC + +global: + orgName: "" + namespace: "braintrust" + +cloud: "aws" + +objectStorage: + aws: + brainstoreBucket: "" + responseBucket: "" + codeBundleBucket: "" + +api: + name: "braintrust-api" + annotations: + service: + # Internal NLB via the AWS Load Balancer Controller. + # If you are using the terraform-aws-braintrust-data-plane EKS CloudFront path, + # set these so the controller adopts the pre-created internal NLB. + service.beta.kubernetes.io/aws-load-balancer-scheme: "internal" + service.beta.kubernetes.io/aws-load-balancer-type: "external" + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "instance" + service.beta.kubernetes.io/aws-load-balancer-security-groups: "" + service.beta.kubernetes.io/aws-load-balancer-name: "" + replicas: 1 + service: + type: LoadBalancer + port: 8000 + portName: http + serviceAccount: + name: "braintrust-api" + awsRoleArn: "" + # Keep code execution enabled when not using a quarantine VPC. + allowCodeFunctionExecution: true + resources: + requests: + cpu: "4" + memory: "16Gi" + limits: + cpu: "8" + memory: "16Gi" + +brainstore: + serviceAccount: + name: "brainstore" + awsRoleArn: "" + reader: + name: "brainstore-reader" + replicas: 2 + service: + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + fastreader: + name: "brainstore-fastreader" + replicas: 2 + service: + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + writer: + name: "brainstore-writer" + replicas: 1 + service: + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "32" + memory: "64Gi" + limits: + cpu: "32" + memory: "64Gi" From a36c5916961bf5375556ac02334789c898187bdc Mon Sep 17 00:00:00 2001 From: James Purcell Date: Tue, 12 May 2026 14:53:25 +0100 Subject: [PATCH 5/7] Changes for EKS & GKE deployments --- braintrust/examples/aws-eks-cel/values.yaml | 58 +++++++++++++++++++ .../examples/google-autopilot-cel/values.yaml | 9 +++ .../brainstore-fastreader-deployment.yaml | 19 +++++- .../brainstore-reader-deployment.yaml | 19 +++++- .../brainstore-writer-deployment.yaml | 19 +++++- braintrust/values.yaml | 14 +++++ 6 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 braintrust/examples/aws-eks-cel/values.yaml diff --git a/braintrust/examples/aws-eks-cel/values.yaml b/braintrust/examples/aws-eks-cel/values.yaml new file mode 100644 index 0000000..db4d320 --- /dev/null +++ b/braintrust/examples/aws-eks-cel/values.yaml @@ -0,0 +1,58 @@ +# CEL-friendly overlay for AWS EKS deployments. +# +# Use this together with the Terraform-generated EKS values file, for example: +# helm upgrade --install braintrust ./braintrust \ +# --namespace braintrust \ +# --values /path/to/braintrust-generated-values.yaml \ +# --values ./braintrust/examples/aws-eks-cel/values.yaml +# +# This file intentionally does not repeat AWS-specific service account, bucket, +# or NLB settings. Those should continue to come from the Terraform-generated +# values so the chart stays aligned with the cluster infrastructure. + +cloud: "aws" + +api: + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + tmpVolume: + enabled: true + sizeLimit: "1Gi" + +brainstore: + reader: + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + tmpVolume: + enabled: true + sizeLimit: "1Gi" + + fastreader: + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + tmpVolume: + enabled: true + sizeLimit: "1Gi" + + writer: + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + tmpVolume: + enabled: true + sizeLimit: "1Gi" diff --git a/braintrust/examples/google-autopilot-cel/values.yaml b/braintrust/examples/google-autopilot-cel/values.yaml index a380cbb..2024a9d 100644 --- a/braintrust/examples/google-autopilot-cel/values.yaml +++ b/braintrust/examples/google-autopilot-cel/values.yaml @@ -91,6 +91,9 @@ brainstore: volume: size: "1000Gi" sizeLimit: "900Gi" + tmpVolume: + enabled: true + sizeLimit: "1Gi" extraEnvVars: fastreader: @@ -121,6 +124,9 @@ brainstore: volume: size: "1000Gi" sizeLimit: "900Gi" + tmpVolume: + enabled: true + sizeLimit: "1Gi" extraEnvVars: writer: @@ -151,4 +157,7 @@ brainstore: volume: size: "1000Gi" sizeLimit: "900Gi" + tmpVolume: + enabled: true + sizeLimit: "1Gi" extraEnvVars: diff --git a/braintrust/templates/brainstore-fastreader-deployment.yaml b/braintrust/templates/brainstore-fastreader-deployment.yaml index 5852e2e..0b5fcd5 100644 --- a/braintrust/templates/brainstore-fastreader-deployment.yaml +++ b/braintrust/templates/brainstore-fastreader-deployment.yaml @@ -142,6 +142,10 @@ spec: volumeMounts: - name: cache-volume mountPath: {{ .Values.brainstore.fastreader.cacheDir }} + {{- if .Values.brainstore.fastreader.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" @@ -152,7 +156,11 @@ spec: {{- end }} volumes: - name: cache-volume - {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} + {{- if .Values.brainstore.storage.hostPath }} + hostPath: + path: {{ .Values.brainstore.storage.hostPath | quote }} + type: DirectoryOrCreate + {{- else if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} ephemeral: volumeClaimTemplate: spec: @@ -170,6 +178,15 @@ spec: {} {{- end }} {{- end }} + {{- if .Values.brainstore.fastreader.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.brainstore.fastreader.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.fastreader.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: diff --git a/braintrust/templates/brainstore-reader-deployment.yaml b/braintrust/templates/brainstore-reader-deployment.yaml index 7c8e1f6..4975df9 100644 --- a/braintrust/templates/brainstore-reader-deployment.yaml +++ b/braintrust/templates/brainstore-reader-deployment.yaml @@ -142,6 +142,10 @@ spec: volumeMounts: - name: cache-volume mountPath: {{ .Values.brainstore.reader.cacheDir }} + {{- if .Values.brainstore.reader.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" @@ -152,7 +156,11 @@ spec: {{- end }} volumes: - name: cache-volume - {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} + {{- if .Values.brainstore.storage.hostPath }} + hostPath: + path: {{ .Values.brainstore.storage.hostPath | quote }} + type: DirectoryOrCreate + {{- else if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} ephemeral: volumeClaimTemplate: spec: @@ -170,6 +178,15 @@ spec: {} {{- end }} {{- end }} + {{- if .Values.brainstore.reader.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.brainstore.reader.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.reader.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: diff --git a/braintrust/templates/brainstore-writer-deployment.yaml b/braintrust/templates/brainstore-writer-deployment.yaml index de21635..b1cda47 100644 --- a/braintrust/templates/brainstore-writer-deployment.yaml +++ b/braintrust/templates/brainstore-writer-deployment.yaml @@ -142,6 +142,10 @@ spec: volumeMounts: - name: cache-volume mountPath: {{ .Values.brainstore.writer.cacheDir }} + {{- if .Values.brainstore.writer.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" @@ -152,7 +156,11 @@ spec: {{- end }} volumes: - name: cache-volume - {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} + {{- if .Values.brainstore.storage.hostPath }} + hostPath: + path: {{ .Values.brainstore.storage.hostPath | quote }} + type: DirectoryOrCreate + {{- else if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} ephemeral: volumeClaimTemplate: spec: @@ -170,6 +178,15 @@ spec: {} {{- end }} {{- end }} + {{- if .Values.brainstore.writer.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.brainstore.writer.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.writer.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: diff --git a/braintrust/values.yaml b/braintrust/values.yaml index 5ae0088..1961772 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -202,6 +202,11 @@ brainstore: timeoutSeconds: 5 failureThreshold: 3 successThreshold: 1 + # Shared cache storage configuration. + storage: + # Optional host path for node-local storage, for example an NVMe mount on EKS nodes. + # When set, Brainstore cache volumes use hostPath instead of emptyDir / ephemeral PVCs. + hostPath: "" # Brainstore Reader configuration reader: @@ -237,6 +242,9 @@ brainstore: size: "" # Optional emptyDir size limit for CEL policy compliance sizeLimit: "" + tmpVolume: + enabled: false + sizeLimit: "" extraEnvVars: [] nodeSelector: {} tolerations: [] @@ -278,6 +286,9 @@ brainstore: size: "" # Optional emptyDir size limit for CEL policy compliance sizeLimit: "" + tmpVolume: + enabled: false + sizeLimit: "" extraEnvVars: [] nodeSelector: {} tolerations: [] @@ -320,6 +331,9 @@ brainstore: size: "" # Optional emptyDir size limit for CEL policy compliance sizeLimit: "" + tmpVolume: + enabled: false + sizeLimit: "" extraEnvVars: [] # Example: # - name: MY_ENV_VAR From d05abbb6004b5559492421eacd00dcdfa5d1a071 Mon Sep 17 00:00:00 2001 From: James Purcell Date: Wed, 13 May 2026 18:06:58 +0100 Subject: [PATCH 6/7] Updates to ephemeral storage and host path --- braintrust/README.md | 23 ++++++ braintrust/examples/aws-eks-cel/values.yaml | 9 +++ .../examples/aws-eks-quarantine/values.yaml | 9 +++ braintrust/examples/aws-eks/values.yaml | 9 +++ braintrust/templates/_helpers.tpl | 31 ++++++++ .../brainstore-fastreader-deployment.yaml | 17 ++-- .../brainstore-reader-deployment.yaml | 17 ++-- .../brainstore-writer-deployment.yaml | 17 ++-- .../tests/brainstore-fastreader_test.yaml | 58 ++++++++++++++ braintrust/tests/brainstore-reader_test.yaml | 79 +++++++++++++++++++ braintrust/tests/brainstore-writer_test.yaml | 78 ++++++++++++++++++ braintrust/values.yaml | 27 +++++-- 12 files changed, 338 insertions(+), 36 deletions(-) diff --git a/braintrust/README.md b/braintrust/README.md index d413adf..bd8bd78 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -90,6 +90,17 @@ brainstore: **Supported machine families:** c4, c4d +If you need the request to cover more than the cache volume alone, set an explicit total pod-local storage budget: + +```yaml +brainstore: + reader: + volume: + size: "900Gi" + ephemeralStorage: + request: "905Gi" # cache + /tmp (if enabled) + logs/writable-layer overhead +``` + ### GKE Standard Mode For Standard mode clusters, create node pools with local SSDs, then deploy: @@ -147,6 +158,18 @@ For Standard mode clusters, create node pools with local SSDs, then deploy: - Local SSDs are automatically available via emptyDir volumes - Pod anti-affinity ensures readers and writers don't share nodes (each pod gets dedicated node access) +## AWS EKS Local Storage + +On EKS, Brainstore uses Kubernetes-managed `emptyDir` volumes for cache storage. To make scheduling reflect the real local-disk budget, set `brainstore..ephemeralStorage.request` for each Brainstore role. + +Size the request for the pod's full local-storage usage: +- cache `emptyDir` +- optional `/tmp` `emptyDir` +- container logs +- writable layer overhead + +When you enable `tmpVolume`, make sure the `ephemeralStorage.request` still covers that extra space. + ## Testing This Helm chart includes comprehensive automated unit tests. diff --git a/braintrust/examples/aws-eks-cel/values.yaml b/braintrust/examples/aws-eks-cel/values.yaml index db4d320..bade996 100644 --- a/braintrust/examples/aws-eks-cel/values.yaml +++ b/braintrust/examples/aws-eks-cel/values.yaml @@ -31,6 +31,9 @@ brainstore: capabilities: drop: - ALL + ephemeralStorage: + # Include cache, this /tmp volume, logs, and writable layer overhead. + request: "" tmpVolume: enabled: true sizeLimit: "1Gi" @@ -42,6 +45,9 @@ brainstore: capabilities: drop: - ALL + ephemeralStorage: + # Include cache, this /tmp volume, logs, and writable layer overhead. + request: "" tmpVolume: enabled: true sizeLimit: "1Gi" @@ -53,6 +59,9 @@ brainstore: capabilities: drop: - ALL + ephemeralStorage: + # Include cache, this /tmp volume, logs, and writable layer overhead. + request: "" tmpVolume: enabled: true sizeLimit: "1Gi" diff --git a/braintrust/examples/aws-eks-quarantine/values.yaml b/braintrust/examples/aws-eks-quarantine/values.yaml index 1fef7f7..2858dad 100644 --- a/braintrust/examples/aws-eks-quarantine/values.yaml +++ b/braintrust/examples/aws-eks-quarantine/values.yaml @@ -79,6 +79,9 @@ brainstore: limits: cpu: "16" memory: "32Gi" + ephemeralStorage: + # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. + request: "" fastreader: name: "brainstore-fastreader" replicas: 2 @@ -93,6 +96,9 @@ brainstore: limits: cpu: "16" memory: "32Gi" + ephemeralStorage: + # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. + request: "" writer: name: "brainstore-writer" replicas: 1 @@ -107,3 +113,6 @@ brainstore: limits: cpu: "32" memory: "64Gi" + ephemeralStorage: + # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. + request: "" diff --git a/braintrust/examples/aws-eks/values.yaml b/braintrust/examples/aws-eks/values.yaml index b676ea3..a941c6d 100644 --- a/braintrust/examples/aws-eks/values.yaml +++ b/braintrust/examples/aws-eks/values.yaml @@ -60,6 +60,9 @@ brainstore: limits: cpu: "16" memory: "32Gi" + ephemeralStorage: + # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. + request: "" fastreader: name: "brainstore-fastreader" replicas: 2 @@ -74,6 +77,9 @@ brainstore: limits: cpu: "16" memory: "32Gi" + ephemeralStorage: + # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. + request: "" writer: name: "brainstore-writer" replicas: 1 @@ -88,3 +94,6 @@ brainstore: limits: cpu: "32" memory: "64Gi" + ephemeralStorage: + # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. + request: "" diff --git a/braintrust/templates/_helpers.tpl b/braintrust/templates/_helpers.tpl index 88322b2..0d485eb 100644 --- a/braintrust/templates/_helpers.tpl +++ b/braintrust/templates/_helpers.tpl @@ -24,3 +24,34 @@ Static fast reader query sources used by API. -}} {{- join "," $sources -}} {{- end -}} + +{{/* +Render Brainstore container resources with provider-specific ephemeral storage. + +Google Autopilot keeps the legacy behavior of defaulting the ephemeral-storage +request to volume.size when no explicit total request is set. AWS EKS requires +an explicit total pod-local storage budget that includes cache, optional /tmp, +and normal writable-layer/log overhead. +*/}} +{{- define "braintrust.brainstoreResources" -}} +{{- $root := .root -}} +{{- $resources := deepCopy .resources -}} +{{- $supportsEphemeralStorage := or (eq $root.Values.cloud "aws") (and (eq $root.Values.cloud "google") (eq $root.Values.google.mode "autopilot")) -}} +{{- $request := "" -}} +{{- if and .ephemeralStorage .ephemeralStorage.request -}} +{{- $request = .ephemeralStorage.request -}} +{{- else if and (eq $root.Values.cloud "google") (eq $root.Values.google.mode "autopilot") .volumeSize -}} +{{- $request = .volumeSize -}} +{{- end -}} +{{- if and $supportsEphemeralStorage $request -}} +{{- $requests := deepCopy (default (dict) $resources.requests) -}} +{{- $_ := set $requests "ephemeral-storage" $request -}} +{{- $_ := set $resources "requests" $requests -}} +{{- end -}} +{{- if and $supportsEphemeralStorage .ephemeralStorage .ephemeralStorage.limit -}} +{{- $limits := deepCopy (default (dict) $resources.limits) -}} +{{- $_ := set $limits "ephemeral-storage" .ephemeralStorage.limit -}} +{{- $_ := set $resources "limits" $limits -}} +{{- end -}} +{{- toYaml $resources -}} +{{- end -}} diff --git a/braintrust/templates/brainstore-fastreader-deployment.yaml b/braintrust/templates/brainstore-fastreader-deployment.yaml index 0b5fcd5..ea43276 100644 --- a/braintrust/templates/brainstore-fastreader-deployment.yaml +++ b/braintrust/templates/brainstore-fastreader-deployment.yaml @@ -80,11 +80,12 @@ spec: ports: - containerPort: {{ .Values.brainstore.fastreader.service.port }} resources: - {{- $resources := .Values.brainstore.fastreader.resources }} - {{- if and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot") .Values.brainstore.fastreader.volume.size }} - {{- $resources = merge (dict "requests" (merge $resources.requests (dict "ephemeral-storage" .Values.brainstore.fastreader.volume.size))) $resources }} - {{- end }} - {{- toYaml $resources | nindent 12 }} + {{- include "braintrust.brainstoreResources" (dict + "root" . + "resources" .Values.brainstore.fastreader.resources + "volumeSize" .Values.brainstore.fastreader.volume.size + "ephemeralStorage" .Values.brainstore.fastreader.ephemeralStorage + ) | nindent 12 }} {{- with .Values.brainstore.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} @@ -156,11 +157,7 @@ spec: {{- end }} volumes: - name: cache-volume - {{- if .Values.brainstore.storage.hostPath }} - hostPath: - path: {{ .Values.brainstore.storage.hostPath | quote }} - type: DirectoryOrCreate - {{- else if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} + {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} ephemeral: volumeClaimTemplate: spec: diff --git a/braintrust/templates/brainstore-reader-deployment.yaml b/braintrust/templates/brainstore-reader-deployment.yaml index 4975df9..d3aa0b5 100644 --- a/braintrust/templates/brainstore-reader-deployment.yaml +++ b/braintrust/templates/brainstore-reader-deployment.yaml @@ -80,11 +80,12 @@ spec: ports: - containerPort: {{ .Values.brainstore.reader.service.port }} resources: - {{- $resources := .Values.brainstore.reader.resources }} - {{- if and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot") .Values.brainstore.reader.volume.size }} - {{- $resources = merge (dict "requests" (merge $resources.requests (dict "ephemeral-storage" .Values.brainstore.reader.volume.size))) $resources }} - {{- end }} - {{- toYaml $resources | nindent 12 }} + {{- include "braintrust.brainstoreResources" (dict + "root" . + "resources" .Values.brainstore.reader.resources + "volumeSize" .Values.brainstore.reader.volume.size + "ephemeralStorage" .Values.brainstore.reader.ephemeralStorage + ) | nindent 12 }} {{- with .Values.brainstore.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} @@ -156,11 +157,7 @@ spec: {{- end }} volumes: - name: cache-volume - {{- if .Values.brainstore.storage.hostPath }} - hostPath: - path: {{ .Values.brainstore.storage.hostPath | quote }} - type: DirectoryOrCreate - {{- else if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} + {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} ephemeral: volumeClaimTemplate: spec: diff --git a/braintrust/templates/brainstore-writer-deployment.yaml b/braintrust/templates/brainstore-writer-deployment.yaml index b1cda47..0a2dab2 100644 --- a/braintrust/templates/brainstore-writer-deployment.yaml +++ b/braintrust/templates/brainstore-writer-deployment.yaml @@ -80,11 +80,12 @@ spec: ports: - containerPort: {{ .Values.brainstore.writer.service.port }} resources: - {{- $resources := .Values.brainstore.writer.resources }} - {{- if and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot") .Values.brainstore.writer.volume.size }} - {{- $resources = merge (dict "requests" (merge $resources.requests (dict "ephemeral-storage" .Values.brainstore.writer.volume.size))) $resources }} - {{- end }} - {{- toYaml $resources | nindent 12 }} + {{- include "braintrust.brainstoreResources" (dict + "root" . + "resources" .Values.brainstore.writer.resources + "volumeSize" .Values.brainstore.writer.volume.size + "ephemeralStorage" .Values.brainstore.writer.ephemeralStorage + ) | nindent 12 }} {{- with .Values.brainstore.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} @@ -156,11 +157,7 @@ spec: {{- end }} volumes: - name: cache-volume - {{- if .Values.brainstore.storage.hostPath }} - hostPath: - path: {{ .Values.brainstore.storage.hostPath | quote }} - type: DirectoryOrCreate - {{- else if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} + {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureContainerStorageDriver }} ephemeral: volumeClaimTemplate: spec: diff --git a/braintrust/tests/brainstore-fastreader_test.yaml b/braintrust/tests/brainstore-fastreader_test.yaml index 7d15760..9293725 100644 --- a/braintrust/tests/brainstore-fastreader_test.yaml +++ b/braintrust/tests/brainstore-fastreader_test.yaml @@ -44,3 +44,61 @@ tests: content: name: CUSTOM_VAR value: "custom-value" + + - it: should use explicit ephemeral-storage request for AWS + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/aws-values.yaml + set: + brainstore.fastreader.ephemeralStorage.request: "101Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "101Gi" + + - it: should not inject ephemeral-storage request for Azure + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/azure-values.yaml + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should not inject ephemeral-storage request for GKE Standard + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + google.mode: "standard" + brainstore.fastreader.ephemeralStorage.request: "101Gi" + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should mount tmp volume only when enabled + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + set: + brainstore.fastreader.tmpVolume.enabled: true + brainstore.fastreader.tmpVolume.sizeLimit: "1Gi" + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: tmp-volume + mountPath: /tmp + - equal: + path: spec.template.spec.volumes[1].emptyDir.sizeLimit + value: "1Gi" diff --git a/braintrust/tests/brainstore-reader_test.yaml b/braintrust/tests/brainstore-reader_test.yaml index 91e5596..b885550 100644 --- a/braintrust/tests/brainstore-reader_test.yaml +++ b/braintrust/tests/brainstore-reader_test.yaml @@ -80,6 +80,19 @@ tests: - isNotNull: path: spec.template.spec.volumes[0].emptyDir + - it: should not inject ephemeral-storage request for Azure + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/azure-values.yaml + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + - isNull: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + - it: should include Azure Key Vault volume mount when enabled template: brainstore-reader-deployment.yaml values: @@ -113,6 +126,72 @@ tests: path: spec.template.spec.nodeSelector["cloud.google.com/compute-class"] value: "Performance" + - it: should use explicit ephemeral-storage request and limit for AWS + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/aws-values.yaml + set: + brainstore.reader.ephemeralStorage.request: "101Gi" + brainstore.reader.ephemeralStorage.limit: "101Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "101Gi" + - equal: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + value: "101Gi" + + - it: should prefer explicit ephemeral-storage request over volume size on GKE Autopilot + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + brainstore.reader.volume.size: "100Gi" + brainstore.reader.ephemeralStorage.request: "102Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "102Gi" + + - it: should not inject ephemeral-storage request for GKE Standard + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + google.mode: "standard" + brainstore.reader.ephemeralStorage.request: "102Gi" + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should mount tmp volume only when enabled + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + set: + brainstore.reader.tmpVolume.enabled: true + brainstore.reader.tmpVolume.sizeLimit: "1Gi" + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: tmp-volume + mountPath: /tmp + - equal: + path: spec.template.spec.volumes[1].emptyDir.sizeLimit + value: "1Gi" + - it: should include BRAINSTORE_LOCKS_URI when locksBackend is redis template: brainstore-reader-deployment.yaml values: diff --git a/braintrust/tests/brainstore-writer_test.yaml b/braintrust/tests/brainstore-writer_test.yaml index cc8315b..c656c32 100644 --- a/braintrust/tests/brainstore-writer_test.yaml +++ b/braintrust/tests/brainstore-writer_test.yaml @@ -80,6 +80,19 @@ tests: - isNotNull: path: spec.template.spec.volumes[0].emptyDir + - it: should not inject ephemeral-storage request for Azure + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/azure-values.yaml + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + - isNull: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + - it: should include Azure Key Vault volume mount when enabled template: brainstore-writer-deployment.yaml values: @@ -113,6 +126,71 @@ tests: path: spec.template.spec.nodeSelector["cloud.google.com/compute-class"] value: "Performance" + - it: should use explicit ephemeral-storage request and limit for AWS + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/aws-values.yaml + set: + brainstore.writer.ephemeralStorage.request: "201Gi" + brainstore.writer.ephemeralStorage.limit: "201Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "201Gi" + - equal: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + value: "201Gi" + + - it: should keep GKE Autopilot volume size fallback when no explicit request is set + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + brainstore.writer.volume.size: "200Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "200Gi" + + - it: should not inject ephemeral-storage request for GKE Standard + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + google.mode: "standard" + brainstore.writer.ephemeralStorage.request: "201Gi" + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should mount tmp volume only when enabled + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + set: + brainstore.writer.tmpVolume.enabled: true + brainstore.writer.tmpVolume.sizeLimit: "1Gi" + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: tmp-volume + mountPath: /tmp + - equal: + path: spec.template.spec.volumes[1].emptyDir.sizeLimit + value: "1Gi" + - it: should include BRAINSTORE_LOCKS_URI when locksBackend is redis template: brainstore-writer-deployment.yaml values: diff --git a/braintrust/values.yaml b/braintrust/values.yaml index 1961772..d0762bb 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -202,12 +202,6 @@ brainstore: timeoutSeconds: 5 failureThreshold: 3 successThreshold: 1 - # Shared cache storage configuration. - storage: - # Optional host path for node-local storage, for example an NVMe mount on EKS nodes. - # When set, Brainstore cache volumes use hostPath instead of emptyDir / ephemeral PVCs. - hostPath: "" - # Brainstore Reader configuration reader: name: "brainstore-reader" @@ -242,6 +236,13 @@ brainstore: size: "" # Optional emptyDir size limit for CEL policy compliance sizeLimit: "" + ephemeralStorage: + # Total pod-local storage budget for ephemeral-storage requests. + # Size this for cache emptyDir, optional /tmp emptyDir, and writable layer/log overhead. + # On GKE Autopilot, volume.size is still used as the fallback request when this is unset. + request: "" + # Optional total pod-local storage limit. + limit: "" tmpVolume: enabled: false sizeLimit: "" @@ -286,6 +287,13 @@ brainstore: size: "" # Optional emptyDir size limit for CEL policy compliance sizeLimit: "" + ephemeralStorage: + # Total pod-local storage budget for ephemeral-storage requests. + # Size this for cache emptyDir, optional /tmp emptyDir, and writable layer/log overhead. + # On GKE Autopilot, volume.size is still used as the fallback request when this is unset. + request: "" + # Optional total pod-local storage limit. + limit: "" tmpVolume: enabled: false sizeLimit: "" @@ -331,6 +339,13 @@ brainstore: size: "" # Optional emptyDir size limit for CEL policy compliance sizeLimit: "" + ephemeralStorage: + # Total pod-local storage budget for ephemeral-storage requests. + # Size this for cache emptyDir, optional /tmp emptyDir, and writable layer/log overhead. + # On GKE Autopilot, volume.size is still used as the fallback request when this is unset. + request: "" + # Optional total pod-local storage limit. + limit: "" tmpVolume: enabled: false sizeLimit: "" From 7468b515d6e2230b324377ebef00eb0f28f3a275 Mon Sep 17 00:00:00 2001 From: James Purcell Date: Fri, 15 May 2026 17:31:01 +0100 Subject: [PATCH 7/7] Remove EKS examples from security enhancements --- README.md | 2 +- braintrust/README.md | 2 - braintrust/examples/aws-eks-cel/values.yaml | 67 ---------- .../examples/aws-eks-quarantine/values.yaml | 118 ------------------ braintrust/examples/aws-eks/values.yaml | 99 --------------- 5 files changed, 1 insertion(+), 287 deletions(-) delete mode 100644 braintrust/examples/aws-eks-cel/values.yaml delete mode 100644 braintrust/examples/aws-eks-quarantine/values.yaml delete mode 100644 braintrust/examples/aws-eks/values.yaml diff --git a/README.md b/README.md index 348ec1b..5ec78fd 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,6 @@ helm upgrade --install \ ## Prerequisites -Before installing the Braintrust Helm chart, ensure you have run the appropriate Braintrust Terraform module for [AWS](https://github.com/braintrustdata/terraform-aws-braintrust-data-plane), [Google](https://github.com/braintrustdata/terraform-google-braintrust-data-plane), or [Azure](https://github.com/braintrustdata/terraform-azure-braintrust-data-plane) to deploy the base infrastructure. +Before installing the Braintrust Helm chart, ensure you have run the appropriate braintrust terraform module [Google](https://github.com/braintrustdata/terraform-google-braintrust-data-plane) or [Azure](https://github.com/braintrustdata/terraform-azure-braintrust-data-plane) to deploy the base infrastructure. See the [Braintrust Helm Chart](./braintrust/README.md) for more details. diff --git a/braintrust/README.md b/braintrust/README.md index bd8bd78..5b66cd1 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -216,8 +216,6 @@ This version also adds first-class `brainstoreWalFooterVersion` support and auto Example values files for different cloud providers and configurations are located in the `examples/` folder. -- `examples/aws-eks/values.yaml`: AWS EKS deployment without a quarantine VPC. User-defined functions execute in the API pod. Includes the API service annotations needed for the Terraform-managed CloudFront plus adopted internal NLB path. -- `examples/aws-eks-quarantine/values.yaml`: AWS EKS deployment with user-defined functions routed into the quarantine VPC. Includes the API service annotations needed for the Terraform-managed CloudFront plus adopted internal NLB path. - `examples/google-autopilot/values.yaml`: GKE Autopilot deployment. - `examples/google-autopilot-cel/values.yaml`: GKE Autopilot deployment with CEL-friendly security settings. - `examples/google-standard/values.yaml`: GKE Standard deployment. diff --git a/braintrust/examples/aws-eks-cel/values.yaml b/braintrust/examples/aws-eks-cel/values.yaml deleted file mode 100644 index bade996..0000000 --- a/braintrust/examples/aws-eks-cel/values.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# CEL-friendly overlay for AWS EKS deployments. -# -# Use this together with the Terraform-generated EKS values file, for example: -# helm upgrade --install braintrust ./braintrust \ -# --namespace braintrust \ -# --values /path/to/braintrust-generated-values.yaml \ -# --values ./braintrust/examples/aws-eks-cel/values.yaml -# -# This file intentionally does not repeat AWS-specific service account, bucket, -# or NLB settings. Those should continue to come from the Terraform-generated -# values so the chart stays aligned with the cluster infrastructure. - -cloud: "aws" - -api: - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - tmpVolume: - enabled: true - sizeLimit: "1Gi" - -brainstore: - reader: - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - ephemeralStorage: - # Include cache, this /tmp volume, logs, and writable layer overhead. - request: "" - tmpVolume: - enabled: true - sizeLimit: "1Gi" - - fastreader: - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - ephemeralStorage: - # Include cache, this /tmp volume, logs, and writable layer overhead. - request: "" - tmpVolume: - enabled: true - sizeLimit: "1Gi" - - writer: - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - ephemeralStorage: - # Include cache, this /tmp volume, logs, and writable layer overhead. - request: "" - tmpVolume: - enabled: true - sizeLimit: "1Gi" diff --git a/braintrust/examples/aws-eks-quarantine/values.yaml b/braintrust/examples/aws-eks-quarantine/values.yaml deleted file mode 100644 index 2858dad..0000000 --- a/braintrust/examples/aws-eks-quarantine/values.yaml +++ /dev/null @@ -1,118 +0,0 @@ -# Sample values for AWS EKS deployment with a quarantine VPC - -global: - orgName: "" - namespace: "braintrust" - -cloud: "aws" - -objectStorage: - aws: - brainstoreBucket: "" - responseBucket: "" - codeBundleBucket: "" - -api: - name: "braintrust-api" - replicas: 1 - # Disable in-pod code execution so user-defined functions run in the quarantine VPC. - allowCodeFunctionExecution: false - annotations: - service: - # Internal NLB via the AWS Load Balancer Controller. - # If you are using the terraform-aws-braintrust-data-plane EKS CloudFront path, - # set these so the controller adopts the pre-created internal NLB. - service.beta.kubernetes.io/aws-load-balancer-scheme: "internal" - service.beta.kubernetes.io/aws-load-balancer-type: "external" - service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "instance" - service.beta.kubernetes.io/aws-load-balancer-security-groups: "" - service.beta.kubernetes.io/aws-load-balancer-name: "" - service: - type: LoadBalancer - port: 8000 - portName: http - serviceAccount: - name: "braintrust-api" - awsRoleArn: "" - resources: - requests: - cpu: "4" - memory: "16Gi" - limits: - cpu: "8" - memory: "16Gi" - extraEnvVars: - - name: QUARANTINE_INVOKE_ROLE - value: "" - - name: QUARANTINE_FUNCTION_ROLE - value: "" - - name: QUARANTINE_REGION - value: "" - - name: QUARANTINE_PRIVATE_SUBNET_1_ID - value: "" - - name: QUARANTINE_PRIVATE_SUBNET_2_ID - value: "" - - name: QUARANTINE_PRIVATE_SUBNET_3_ID - value: "" - - name: QUARANTINE_PUB_PRIVATE_VPC_DEFAULT_SECURITY_GROUP - value: "" - - name: QUARANTINE_PUB_PRIVATE_VPC_ID - value: "" - # nodeSelector: - # topology.kubernetes.io/zone: us-east-1a - -brainstore: - serviceAccount: - name: "brainstore" - awsRoleArn: "" - reader: - name: "brainstore-reader" - replicas: 2 - service: - type: ClusterIP - port: 4000 - portName: http - resources: - requests: - cpu: "16" - memory: "32Gi" - limits: - cpu: "16" - memory: "32Gi" - ephemeralStorage: - # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. - request: "" - fastreader: - name: "brainstore-fastreader" - replicas: 2 - service: - type: ClusterIP - port: 4000 - portName: http - resources: - requests: - cpu: "16" - memory: "32Gi" - limits: - cpu: "16" - memory: "32Gi" - ephemeralStorage: - # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. - request: "" - writer: - name: "brainstore-writer" - replicas: 1 - service: - type: ClusterIP - port: 4000 - portName: http - resources: - requests: - cpu: "32" - memory: "64Gi" - limits: - cpu: "32" - memory: "64Gi" - ephemeralStorage: - # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. - request: "" diff --git a/braintrust/examples/aws-eks/values.yaml b/braintrust/examples/aws-eks/values.yaml deleted file mode 100644 index a941c6d..0000000 --- a/braintrust/examples/aws-eks/values.yaml +++ /dev/null @@ -1,99 +0,0 @@ -# Sample values for AWS EKS deployment without a quarantine VPC - -global: - orgName: "" - namespace: "braintrust" - -cloud: "aws" - -objectStorage: - aws: - brainstoreBucket: "" - responseBucket: "" - codeBundleBucket: "" - -api: - name: "braintrust-api" - annotations: - service: - # Internal NLB via the AWS Load Balancer Controller. - # If you are using the terraform-aws-braintrust-data-plane EKS CloudFront path, - # set these so the controller adopts the pre-created internal NLB. - service.beta.kubernetes.io/aws-load-balancer-scheme: "internal" - service.beta.kubernetes.io/aws-load-balancer-type: "external" - service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "instance" - service.beta.kubernetes.io/aws-load-balancer-security-groups: "" - service.beta.kubernetes.io/aws-load-balancer-name: "" - replicas: 1 - service: - type: LoadBalancer - port: 8000 - portName: http - serviceAccount: - name: "braintrust-api" - awsRoleArn: "" - # Keep code execution enabled when not using a quarantine VPC. - allowCodeFunctionExecution: true - resources: - requests: - cpu: "4" - memory: "16Gi" - limits: - cpu: "8" - memory: "16Gi" - -brainstore: - serviceAccount: - name: "brainstore" - awsRoleArn: "" - reader: - name: "brainstore-reader" - replicas: 2 - service: - type: ClusterIP - port: 4000 - portName: http - resources: - requests: - cpu: "16" - memory: "32Gi" - limits: - cpu: "16" - memory: "32Gi" - ephemeralStorage: - # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. - request: "" - fastreader: - name: "brainstore-fastreader" - replicas: 2 - service: - type: ClusterIP - port: 4000 - portName: http - resources: - requests: - cpu: "16" - memory: "32Gi" - limits: - cpu: "16" - memory: "32Gi" - ephemeralStorage: - # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. - request: "" - writer: - name: "brainstore-writer" - replicas: 1 - service: - type: ClusterIP - port: 4000 - portName: http - resources: - requests: - cpu: "32" - memory: "64Gi" - limits: - cpu: "32" - memory: "64Gi" - ephemeralStorage: - # Total pod-local storage budget for cache, optional /tmp, logs, and writable layers. - request: ""