diff --git a/.gitignore b/.gitignore index 62d2ec7..626fc04 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ Chart.lock **/secrets.yml values-local.yaml values-local.yml +values-*.yaml +values-*.yml # Helm output and temporary files *.tmp @@ -31,4 +33,3 @@ test-output/ manifests/ rendered/ debug/ - diff --git a/braintrust/README.md b/braintrust/README.md index a912b70..5b66cd1 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -90,6 +90,17 @@ brainstore: **Supported machine families:** c4, c4d +If you need the request to cover more than the cache volume alone, set an explicit total pod-local storage budget: + +```yaml +brainstore: + reader: + volume: + size: "900Gi" + ephemeralStorage: + request: "905Gi" # cache + /tmp (if enabled) + logs/writable-layer overhead +``` + ### GKE Standard Mode For Standard mode clusters, create node pools with local SSDs, then deploy: @@ -147,6 +158,18 @@ For Standard mode clusters, create node pools with local SSDs, then deploy: - Local SSDs are automatically available via emptyDir volumes - Pod anti-affinity ensures readers and writers don't share nodes (each pod gets dedicated node access) +## AWS EKS Local Storage + +On EKS, Brainstore uses Kubernetes-managed `emptyDir` volumes for cache storage. To make scheduling reflect the real local-disk budget, set `brainstore..ephemeralStorage.request` for each Brainstore role. + +Size the request for the pod's full local-storage usage: +- cache `emptyDir` +- optional `/tmp` `emptyDir` +- container logs +- writable layer overhead + +When you enable `tmpVolume`, make sure the `ephemeralStorage.request` still covers that extra space. + ## Testing This Helm chart includes comprehensive automated unit tests. @@ -192,3 +215,7 @@ This version also adds first-class `brainstoreWalFooterVersion` support and auto ## Example Values Files Example values files for different cloud providers and configurations are located in the `examples/` folder. + +- `examples/google-autopilot/values.yaml`: GKE Autopilot deployment. +- `examples/google-autopilot-cel/values.yaml`: GKE Autopilot deployment with CEL-friendly security settings. +- `examples/google-standard/values.yaml`: GKE Standard deployment. diff --git a/braintrust/examples/google-autopilot-cel/values.yaml b/braintrust/examples/google-autopilot-cel/values.yaml new file mode 100644 index 0000000..2024a9d --- /dev/null +++ b/braintrust/examples/google-autopilot-cel/values.yaml @@ -0,0 +1,163 @@ +# Sample values for GKE Autopilot deployment with CEL policy compliance + +global: + orgName: "" + namespace: "braintrust" + +cloud: "google" + +google: + mode: "autopilot" + autopilotMachineFamily: "c4" + +objectStorage: + google: + brainstoreBucket: "" + apiBucket: "" + +api: + name: "braintrust-api" + # Uncomment the following section to use a different image or tag from the version in the Helm release + #image: + #repository: public.ecr.aws/braintrust/standalone-api + #tag: "" + annotations: + service: + networking.gke.io/load-balancer-type: "Internal" + replicas: 4 + service: + type: LoadBalancer + port: 8000 + portName: http + serviceAccount: + name: "braintrust-api" + googleServiceAccount: "" + enableGcsAuth: false + resources: + requests: + cpu: "4" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + tmpVolume: + enabled: true + sizeLimit: "1Gi" + extraEnvVars: + - name: AWS_REGION + value: "us-central1" + +brainstore: + serviceAccount: + name: "brainstore" + googleServiceAccount: "" + # Uncomment the following section to use a different image or tag from the version in the Helm release + #image: + #repository: public.ecr.aws/braintrust/brainstore + #tag: "" + locksBackend: "objectStorage" + + reader: + name: "brainstore-reader" + replicas: 2 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "900Gi" + verbose: true + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volume: + size: "1000Gi" + sizeLimit: "900Gi" + tmpVolume: + enabled: true + sizeLimit: "1Gi" + extraEnvVars: + + fastreader: + name: "brainstore-fastreader" + replicas: 2 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "900Gi" + verbose: true + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volume: + size: "1000Gi" + sizeLimit: "900Gi" + tmpVolume: + enabled: true + sizeLimit: "1Gi" + extraEnvVars: + + writer: + name: "brainstore-writer" + replicas: 1 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + resources: + requests: + cpu: "32" + memory: "64Gi" + limits: + cpu: "32" + memory: "64Gi" + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "900Gi" + verbose: true + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volume: + size: "1000Gi" + sizeLimit: "900Gi" + tmpVolume: + enabled: true + sizeLimit: "1Gi" + extraEnvVars: diff --git a/braintrust/templates/_helpers.tpl b/braintrust/templates/_helpers.tpl index 88322b2..0d485eb 100644 --- a/braintrust/templates/_helpers.tpl +++ b/braintrust/templates/_helpers.tpl @@ -24,3 +24,34 @@ Static fast reader query sources used by API. -}} {{- join "," $sources -}} {{- end -}} + +{{/* +Render Brainstore container resources with provider-specific ephemeral storage. + +Google Autopilot keeps the legacy behavior of defaulting the ephemeral-storage +request to volume.size when no explicit total request is set. AWS EKS requires +an explicit total pod-local storage budget that includes cache, optional /tmp, +and normal writable-layer/log overhead. +*/}} +{{- define "braintrust.brainstoreResources" -}} +{{- $root := .root -}} +{{- $resources := deepCopy .resources -}} +{{- $supportsEphemeralStorage := or (eq $root.Values.cloud "aws") (and (eq $root.Values.cloud "google") (eq $root.Values.google.mode "autopilot")) -}} +{{- $request := "" -}} +{{- if and .ephemeralStorage .ephemeralStorage.request -}} +{{- $request = .ephemeralStorage.request -}} +{{- else if and (eq $root.Values.cloud "google") (eq $root.Values.google.mode "autopilot") .volumeSize -}} +{{- $request = .volumeSize -}} +{{- end -}} +{{- if and $supportsEphemeralStorage $request -}} +{{- $requests := deepCopy (default (dict) $resources.requests) -}} +{{- $_ := set $requests "ephemeral-storage" $request -}} +{{- $_ := set $resources "requests" $requests -}} +{{- end -}} +{{- if and $supportsEphemeralStorage .ephemeralStorage .ephemeralStorage.limit -}} +{{- $limits := deepCopy (default (dict) $resources.limits) -}} +{{- $_ := set $limits "ephemeral-storage" .ephemeralStorage.limit -}} +{{- $_ := set $resources "limits" $limits -}} +{{- end -}} +{{- toYaml $resources -}} +{{- end -}} diff --git a/braintrust/templates/api-deployment.yaml b/braintrust/templates/api-deployment.yaml index 87f346c..fbbde8b 100644 --- a/braintrust/templates/api-deployment.yaml +++ b/braintrust/templates/api-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.api.serviceAccount.name }} + {{- with .Values.api.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- with .Values.api.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -60,6 +64,10 @@ spec: - name: api image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag }}" imagePullPolicy: {{ .Values.api.image.pullPolicy }} + {{- with .Values.api.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} ports: - containerPort: {{ .Values.api.service.port }} resources: @@ -122,17 +130,32 @@ spec: {{- if .Values.api.extraEnvVars }} {{- toYaml .Values.api.extraEnvVars | nindent 12 }} {{- end }} - {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} + {{- if or .Values.api.tmpVolume.enabled (and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver) }} volumeMounts: + {{- if .Values.api.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} + {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" readOnly: true + {{- end }} {{- end }} {{- with .Values.api.extraContainers }} {{- toYaml . | nindent 8 }} {{- end }} volumes: - {{- if or (and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver) .Values.api.extraVolumes }} + {{- if or .Values.api.tmpVolume.enabled (and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver) .Values.api.extraVolumes }} + {{- if .Values.api.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.api.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.api.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: @@ -147,4 +170,3 @@ spec: {{- else }} [] {{- end }} - diff --git a/braintrust/templates/brainstore-fastreader-deployment.yaml b/braintrust/templates/brainstore-fastreader-deployment.yaml index ebb8a7d..ea43276 100644 --- a/braintrust/templates/brainstore-fastreader-deployment.yaml +++ b/braintrust/templates/brainstore-fastreader-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.brainstore.serviceAccount.name }} + {{- with .Values.brainstore.fastreader.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.brainstore.fastreader.nodeSelector (and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot")) }} nodeSelector: {{- with .Values.brainstore.fastreader.nodeSelector }} @@ -67,16 +71,21 @@ spec: - name: brainstore-fastreader image: "{{ .Values.brainstore.image.repository }}:{{ .Values.brainstore.image.tag }}" imagePullPolicy: {{ .Values.brainstore.image.pullPolicy }} + {{- with .Values.brainstore.fastreader.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} command: ["brainstore"] args: ["web"] ports: - containerPort: {{ .Values.brainstore.fastreader.service.port }} resources: - {{- $resources := .Values.brainstore.fastreader.resources }} - {{- if and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot") .Values.brainstore.fastreader.volume.size }} - {{- $resources = merge (dict "requests" (merge $resources.requests (dict "ephemeral-storage" .Values.brainstore.fastreader.volume.size))) $resources }} - {{- end }} - {{- toYaml $resources | nindent 12 }} + {{- include "braintrust.brainstoreResources" (dict + "root" . + "resources" .Values.brainstore.fastreader.resources + "volumeSize" .Values.brainstore.fastreader.volume.size + "ephemeralStorage" .Values.brainstore.fastreader.ephemeralStorage + ) | nindent 12 }} {{- with .Values.brainstore.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} @@ -134,6 +143,10 @@ spec: volumeMounts: - name: cache-volume mountPath: {{ .Values.brainstore.fastreader.cacheDir }} + {{- if .Values.brainstore.fastreader.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" @@ -155,8 +168,22 @@ spec: requests: storage: {{ required "brainstore.fastreader.volume.size must be set" .Values.brainstore.fastreader.volume.size | quote }} {{- else }} - emptyDir: {} + emptyDir: + {{- if .Values.brainstore.fastreader.volume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.fastreader.volume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} {{- end }} + {{- if .Values.brainstore.fastreader.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.brainstore.fastreader.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.fastreader.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: diff --git a/braintrust/templates/brainstore-reader-deployment.yaml b/braintrust/templates/brainstore-reader-deployment.yaml index 8773906..d3aa0b5 100644 --- a/braintrust/templates/brainstore-reader-deployment.yaml +++ b/braintrust/templates/brainstore-reader-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.brainstore.serviceAccount.name }} + {{- with .Values.brainstore.reader.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.brainstore.reader.nodeSelector (and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot")) }} nodeSelector: {{- with .Values.brainstore.reader.nodeSelector }} @@ -67,16 +71,21 @@ spec: - name: brainstore-reader image: "{{ .Values.brainstore.image.repository }}:{{ .Values.brainstore.image.tag }}" imagePullPolicy: {{ .Values.brainstore.image.pullPolicy }} + {{- with .Values.brainstore.reader.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} command: ["brainstore"] args: ["web"] ports: - containerPort: {{ .Values.brainstore.reader.service.port }} resources: - {{- $resources := .Values.brainstore.reader.resources }} - {{- if and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot") .Values.brainstore.reader.volume.size }} - {{- $resources = merge (dict "requests" (merge $resources.requests (dict "ephemeral-storage" .Values.brainstore.reader.volume.size))) $resources }} - {{- end }} - {{- toYaml $resources | nindent 12 }} + {{- include "braintrust.brainstoreResources" (dict + "root" . + "resources" .Values.brainstore.reader.resources + "volumeSize" .Values.brainstore.reader.volume.size + "ephemeralStorage" .Values.brainstore.reader.ephemeralStorage + ) | nindent 12 }} {{- with .Values.brainstore.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} @@ -134,6 +143,10 @@ spec: volumeMounts: - name: cache-volume mountPath: {{ .Values.brainstore.reader.cacheDir }} + {{- if .Values.brainstore.reader.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" @@ -155,8 +168,22 @@ spec: requests: storage: {{ required "brainstore.reader.volume.size must be set" .Values.brainstore.reader.volume.size | quote }} {{- else }} - emptyDir: {} + emptyDir: + {{- if .Values.brainstore.reader.volume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.reader.volume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} {{- end }} + {{- if .Values.brainstore.reader.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.brainstore.reader.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.reader.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: diff --git a/braintrust/templates/brainstore-writer-deployment.yaml b/braintrust/templates/brainstore-writer-deployment.yaml index 79b5159..0a2dab2 100644 --- a/braintrust/templates/brainstore-writer-deployment.yaml +++ b/braintrust/templates/brainstore-writer-deployment.yaml @@ -44,6 +44,10 @@ spec: {{- end }} spec: serviceAccountName: {{ .Values.brainstore.serviceAccount.name }} + {{- with .Values.brainstore.writer.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.brainstore.writer.nodeSelector (and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot")) }} nodeSelector: {{- with .Values.brainstore.writer.nodeSelector }} @@ -67,16 +71,21 @@ spec: - name: brainstore-writer image: "{{ .Values.brainstore.image.repository }}:{{ .Values.brainstore.image.tag }}" imagePullPolicy: {{ .Values.brainstore.image.pullPolicy }} + {{- with .Values.brainstore.writer.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} command: ["brainstore"] args: ["web"] ports: - containerPort: {{ .Values.brainstore.writer.service.port }} resources: - {{- $resources := .Values.brainstore.writer.resources }} - {{- if and (eq .Values.cloud "google") (eq .Values.google.mode "autopilot") .Values.brainstore.writer.volume.size }} - {{- $resources = merge (dict "requests" (merge $resources.requests (dict "ephemeral-storage" .Values.brainstore.writer.volume.size))) $resources }} - {{- end }} - {{- toYaml $resources | nindent 12 }} + {{- include "braintrust.brainstoreResources" (dict + "root" . + "resources" .Values.brainstore.writer.resources + "volumeSize" .Values.brainstore.writer.volume.size + "ephemeralStorage" .Values.brainstore.writer.ephemeralStorage + ) | nindent 12 }} {{- with .Values.brainstore.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} @@ -134,6 +143,10 @@ spec: volumeMounts: - name: cache-volume mountPath: {{ .Values.brainstore.writer.cacheDir }} + {{- if .Values.brainstore.writer.tmpVolume.enabled }} + - name: tmp-volume + mountPath: /tmp + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline mountPath: "/mnt/secrets-store" @@ -155,8 +168,22 @@ spec: requests: storage: {{ required "brainstore.writer.volume.size must be set" .Values.brainstore.writer.volume.size | quote }} {{- else }} - emptyDir: {} + emptyDir: + {{- if .Values.brainstore.writer.volume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.writer.volume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} {{- end }} + {{- if .Values.brainstore.writer.tmpVolume.enabled }} + - name: tmp-volume + emptyDir: + {{- if .Values.brainstore.writer.tmpVolume.sizeLimit }} + sizeLimit: {{ .Values.brainstore.writer.tmpVolume.sizeLimit | quote }} + {{- else }} + {} + {{- end }} + {{- end }} {{- if and (eq .Values.cloud "azure") .Values.azure.enableAzureKeyVaultDriver }} - name: secrets-store-inline csi: diff --git a/braintrust/tests/brainstore-fastreader_test.yaml b/braintrust/tests/brainstore-fastreader_test.yaml index 7d15760..9293725 100644 --- a/braintrust/tests/brainstore-fastreader_test.yaml +++ b/braintrust/tests/brainstore-fastreader_test.yaml @@ -44,3 +44,61 @@ tests: content: name: CUSTOM_VAR value: "custom-value" + + - it: should use explicit ephemeral-storage request for AWS + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/aws-values.yaml + set: + brainstore.fastreader.ephemeralStorage.request: "101Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "101Gi" + + - it: should not inject ephemeral-storage request for Azure + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/azure-values.yaml + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should not inject ephemeral-storage request for GKE Standard + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + google.mode: "standard" + brainstore.fastreader.ephemeralStorage.request: "101Gi" + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should mount tmp volume only when enabled + template: brainstore-fastreader-deployment.yaml + values: + - __fixtures__/base-values.yaml + set: + brainstore.fastreader.tmpVolume.enabled: true + brainstore.fastreader.tmpVolume.sizeLimit: "1Gi" + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: tmp-volume + mountPath: /tmp + - equal: + path: spec.template.spec.volumes[1].emptyDir.sizeLimit + value: "1Gi" diff --git a/braintrust/tests/brainstore-reader_test.yaml b/braintrust/tests/brainstore-reader_test.yaml index 91e5596..b885550 100644 --- a/braintrust/tests/brainstore-reader_test.yaml +++ b/braintrust/tests/brainstore-reader_test.yaml @@ -80,6 +80,19 @@ tests: - isNotNull: path: spec.template.spec.volumes[0].emptyDir + - it: should not inject ephemeral-storage request for Azure + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/azure-values.yaml + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + - isNull: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + - it: should include Azure Key Vault volume mount when enabled template: brainstore-reader-deployment.yaml values: @@ -113,6 +126,72 @@ tests: path: spec.template.spec.nodeSelector["cloud.google.com/compute-class"] value: "Performance" + - it: should use explicit ephemeral-storage request and limit for AWS + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/aws-values.yaml + set: + brainstore.reader.ephemeralStorage.request: "101Gi" + brainstore.reader.ephemeralStorage.limit: "101Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "101Gi" + - equal: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + value: "101Gi" + + - it: should prefer explicit ephemeral-storage request over volume size on GKE Autopilot + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + brainstore.reader.volume.size: "100Gi" + brainstore.reader.ephemeralStorage.request: "102Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "102Gi" + + - it: should not inject ephemeral-storage request for GKE Standard + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + google.mode: "standard" + brainstore.reader.ephemeralStorage.request: "102Gi" + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should mount tmp volume only when enabled + template: brainstore-reader-deployment.yaml + values: + - __fixtures__/base-values.yaml + set: + brainstore.reader.tmpVolume.enabled: true + brainstore.reader.tmpVolume.sizeLimit: "1Gi" + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: tmp-volume + mountPath: /tmp + - equal: + path: spec.template.spec.volumes[1].emptyDir.sizeLimit + value: "1Gi" + - it: should include BRAINSTORE_LOCKS_URI when locksBackend is redis template: brainstore-reader-deployment.yaml values: diff --git a/braintrust/tests/brainstore-writer_test.yaml b/braintrust/tests/brainstore-writer_test.yaml index cc8315b..c656c32 100644 --- a/braintrust/tests/brainstore-writer_test.yaml +++ b/braintrust/tests/brainstore-writer_test.yaml @@ -80,6 +80,19 @@ tests: - isNotNull: path: spec.template.spec.volumes[0].emptyDir + - it: should not inject ephemeral-storage request for Azure + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/azure-values.yaml + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + - isNull: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + - it: should include Azure Key Vault volume mount when enabled template: brainstore-writer-deployment.yaml values: @@ -113,6 +126,71 @@ tests: path: spec.template.spec.nodeSelector["cloud.google.com/compute-class"] value: "Performance" + - it: should use explicit ephemeral-storage request and limit for AWS + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/aws-values.yaml + set: + brainstore.writer.ephemeralStorage.request: "201Gi" + brainstore.writer.ephemeralStorage.limit: "201Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "201Gi" + - equal: + path: spec.template.spec.containers[0].resources.limits["ephemeral-storage"] + value: "201Gi" + + - it: should keep GKE Autopilot volume size fallback when no explicit request is set + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + brainstore.writer.volume.size: "200Gi" + release: + namespace: "braintrust" + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + value: "200Gi" + + - it: should not inject ephemeral-storage request for GKE Standard + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + - __fixtures__/google-values.yaml + set: + google.mode: "standard" + brainstore.writer.ephemeralStorage.request: "201Gi" + release: + namespace: "braintrust" + asserts: + - isNull: + path: spec.template.spec.containers[0].resources.requests["ephemeral-storage"] + + - it: should mount tmp volume only when enabled + template: brainstore-writer-deployment.yaml + values: + - __fixtures__/base-values.yaml + set: + brainstore.writer.tmpVolume.enabled: true + brainstore.writer.tmpVolume.sizeLimit: "1Gi" + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: tmp-volume + mountPath: /tmp + - equal: + path: spec.template.spec.volumes[1].emptyDir.sizeLimit + value: "1Gi" + - it: should include BRAINSTORE_LOCKS_URI when locksBackend is redis template: brainstore-writer-deployment.yaml values: diff --git a/braintrust/values.yaml b/braintrust/values.yaml index b3a3739..d0762bb 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -116,6 +116,9 @@ api: limits: cpu: "4" memory: "8Gi" + tmpVolume: + enabled: false + sizeLimit: "" # Allow running user generated code functions (e.g. scorers/tools) allowCodeFunctionExecution: true # Brainstore backfill configuration. These defaults are fine for most cases. @@ -199,7 +202,6 @@ brainstore: timeoutSeconds: 5 failureThreshold: 3 successThreshold: 1 - # Brainstore Reader configuration reader: name: "brainstore-reader" @@ -232,6 +234,18 @@ brainstore: volume: # Storage size for ephemeral storage requests (used with GKE Autopilot local SSDs) size: "" + # Optional emptyDir size limit for CEL policy compliance + sizeLimit: "" + ephemeralStorage: + # Total pod-local storage budget for ephemeral-storage requests. + # Size this for cache emptyDir, optional /tmp emptyDir, and writable layer/log overhead. + # On GKE Autopilot, volume.size is still used as the fallback request when this is unset. + request: "" + # Optional total pod-local storage limit. + limit: "" + tmpVolume: + enabled: false + sizeLimit: "" extraEnvVars: [] nodeSelector: {} tolerations: [] @@ -271,6 +285,18 @@ brainstore: volume: # Storage size for ephemeral storage requests (used with GKE Autopilot local SSDs) size: "" + # Optional emptyDir size limit for CEL policy compliance + sizeLimit: "" + ephemeralStorage: + # Total pod-local storage budget for ephemeral-storage requests. + # Size this for cache emptyDir, optional /tmp emptyDir, and writable layer/log overhead. + # On GKE Autopilot, volume.size is still used as the fallback request when this is unset. + request: "" + # Optional total pod-local storage limit. + limit: "" + tmpVolume: + enabled: false + sizeLimit: "" extraEnvVars: [] nodeSelector: {} tolerations: [] @@ -311,6 +337,18 @@ brainstore: # Storage size for ephemeral storage requests # Used with GKE Autopilot local SSDs and Azure Container Storage CSI size: "" + # Optional emptyDir size limit for CEL policy compliance + sizeLimit: "" + ephemeralStorage: + # Total pod-local storage budget for ephemeral-storage requests. + # Size this for cache emptyDir, optional /tmp emptyDir, and writable layer/log overhead. + # On GKE Autopilot, volume.size is still used as the fallback request when this is unset. + request: "" + # Optional total pod-local storage limit. + limit: "" + tmpVolume: + enabled: false + sizeLimit: "" extraEnvVars: [] # Example: # - name: MY_ENV_VAR