Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: runwhen.com/v1
kind: GenerationRules
spec:
generationRules:
- resourceTypes:
- service
matchRules:
- type: pattern
pattern: "litellm"
properties: [name, label-values]
mode: substring
slxs:
- baseName: litellm-spend-gov
shortenedBaseName: llm-spend-gov
levelOfDetail: basic
qualifiers: ["resource", "namespace", "cluster"]
baseTemplateName: k8s-litellm-spend-governance
outputItems:
- type: slx
- type: sli
- type: runbook
templateName: k8s-litellm-spend-governance-taskset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelIndicator
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
displayUnitsLong: OK
displayUnitsShort: ok
locations:
- {{default_location}}
description: Scores LiteLLM proxy reachability, global spend versus threshold, and spend-log failure heuristics.
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/k8s-litellm-spend-governance/sli.robot
intervalStrategy: intermezzo
intervalSeconds: 300
configProvided:
- name: PROXY_BASE_URL
value: "{{ custom.litellm_proxy_base_url | default('') }}"
- name: LITELLM_SERVICE_NAME
value: "{{match_resource.resource.metadata.name}}"
- name: RW_LOOKBACK_WINDOW
value: "24h"
- name: LITELLM_SPEND_THRESHOLD_USD
value: "0"
secretsProvided:
{% if wb_version %}
{% include "kubernetes-auth.yaml" ignore missing %}
- name: litellm_master_key
workspaceKey: {{ custom.litellm_master_key_secret_name | default("litellm_master_key") }}
{% else %}
- name: kubeconfig
workspaceKey: {{ custom.kubeconfig_secret_name | default("kubeconfig") }}
- name: litellm_master_key
workspaceKey: {{ custom.litellm_master_key_secret_name | default("litellm_master_key") }}
{% endif %}

alertConfig:
tasks:
persona: eager-edgar
sessionTTL: 10m
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelX
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/kubernetes/resources/labeled/svc.svg
alias: {{match_resource.resource.metadata.name}} LiteLLM Spend Governance
asMeasuredBy: LiteLLM Admin API checks for spend logs, global spend, and budget pressure.
configProvided:
- name: PROXY_BASE_URL
value: "{{ custom.litellm_proxy_base_url | default('') }}"
owners:
- {{workspace.owner_email}}
statement: Monitors LiteLLM proxy spend, budgets, and blocked-request signals for this service.
additionalContext:
{% include "kubernetes-hierarchy.yaml" ignore missing %}
qualified_name: "{{ match_resource.qualified_name }}"
tags:
{% include "kubernetes-tags.yaml" ignore missing %}
- name: access
value: read-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: runwhen.com/v1
kind: Runbook
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
location: {{default_location}}
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/k8s-litellm-spend-governance/runbook.robot
configProvided:
- name: KUBERNETES_DISTRIBUTION_BINARY
value: "{{ custom.kubernetes_distribution_binary | default('kubectl') }}"
- name: NAMESPACE
value: "{{match_resource.resource.metadata.namespace}}"
- name: CONTEXT
value: "{{context}}"
- name: LITELLM_SERVICE_NAME
value: "{{match_resource.resource.metadata.name}}"
- name: PROXY_BASE_URL
value: "{{ custom.litellm_proxy_base_url | default('') }}"
- name: RW_LOOKBACK_WINDOW
value: "24h"
- name: LITELLM_SPEND_THRESHOLD_USD
value: "0"
- name: LITELLM_USER_IDS
value: ""
- name: LITELLM_TEAM_IDS
value: ""
secretsProvided:
{% if wb_version %}
{% include "kubernetes-auth.yaml" ignore missing %}
- name: litellm_master_key
workspaceKey: {{ custom.litellm_master_key_secret_name | default("litellm_master_key") }}
{% else %}
- name: kubeconfig
workspaceKey: {{ custom.kubeconfig_secret_name | default("kubeconfig") }}
- name: litellm_master_key
workspaceKey: {{ custom.litellm_master_key_secret_name | default("litellm_master_key") }}
{% endif %}

110 changes: 110 additions & 0 deletions codebundles/k8s-litellm-spend-governance/.test/Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
version: "3"

tasks:
default:
desc: "Run/refresh config"
cmds:
- task: check-unpushed-commits
- task: generate-rwl-config
- task: run-rwl-discovery

clean:
desc: "Run cleanup tasks"
cmds:
- task: remove-kubernetes-objects
- task: clean-rwl-discovery

build-infra:
desc: "Build test infrastructure"
cmds:
- task: create-kubernetes-objects

create-kubernetes-objects:
desc: "Apply manifests from kubernetes directory using kubectl"
cmds:
- kubectl apply -f kubernetes/manifest.yaml
silent: true

remove-kubernetes-objects:
desc: "Delete kubernetes objects"
cmds:
- kubectl delete -f kubernetes/manifest.yaml --ignore-not-found=true
silent: true

check-unpushed-commits:
desc: Check if outstanding commits or file updates need to be pushed before testing.
vars:
BASE_DIR: "../"
cmds:
- |
echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..."
UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
if [ -n "$UNCOMMITTED_FILES" ]; then
echo "Uncommitted changes found:"
echo "$UNCOMMITTED_FILES"
exit 1
fi
- |
git fetch origin
UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
if [ -n "$UNPUSHED_FILES" ]; then
echo "Unpushed commits found:"
echo "$UNPUSHED_FILES"
exit 1
fi
silent: true

generate-rwl-config:
desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)"
env:
RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}'
cmds:
- |
repo_url=$(git config --get remote.origin.url)
branch_name=$(git rev-parse --abbrev-ref HEAD)
codebundle=$(basename "$(dirname "$PWD")")
namespace=$(yq e 'select(.kind == "Namespace") | .metadata.name' kubernetes/manifest.yaml -N)
cat <<EOF > workspaceInfo.yaml
workspaceName: "$RW_WORKSPACE"
workspaceOwnerEmail: authors@runwhen.com
defaultLocation: location-01
defaultLOD: none
cloudConfig:
kubernetes:
kubeconfigFile: /shared/kubeconfig
namespaceLODs:
$namespace: detailed
namespaces:
- $namespace
codeCollections:
- repoURL: "$repo_url"
branch: "$branch_name"
codeBundles: ["$codebundle"]
custom:
kubeconfig_secret_name: "kubeconfig"
kubernetes_distribution_binary: kubectl
litellm_proxy_base_url: "http://127.0.0.1:4000"
EOF
silent: true

run-rwl-discovery:
desc: "Run RunWhen Local Discovery on test infrastructure (requires Docker + kubeconfig)"
cmds:
- |
echo "See test-infra-kubernetes.md — start runwhen-local with kubeconfig mounted at /shared/kubeconfig when ready."
silent: true

validate-generation-rules:
desc: "Validate YAML files in .runwhen/generation-rules"
cmds:
- |
for yaml_file in ../.runwhen/generation-rules/*.yaml; do
python3 -c "import yaml,sys; yaml.safe_load(open(sys.argv[1]))" "$yaml_file" && echo "OK $yaml_file"
done
silent: true

clean-rwl-discovery:
desc: "Clean discovery output"
cmds:
- rm -rf output workspaceInfo.yaml
silent: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: v1
kind: Namespace
metadata:
name: test-litellm-spend-governance

---
apiVersion: v1
kind: Service
metadata:
name: litellm-proxy
namespace: test-litellm-spend-governance
labels:
app.kubernetes.io/name: litellm
spec:
ports:
- port: 4000
name: http
selector:
app.kubernetes.io/name: litellm
69 changes: 69 additions & 0 deletions codebundles/k8s-litellm-spend-governance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Kubernetes LiteLLM Spend and Governance

This CodeBundle queries the LiteLLM proxy Admin and spend APIs (not container logs alone) to surface cost pressure, budget blocks, rate limits, and provider-side failures. Pair it with cluster connectivity (`kubectl`) for context and with port-forward or in-cluster URLs for `PROXY_BASE_URL`.

## Overview

- **Spend logs**: Scans `/spend/logs` for budget, rate-limit, and HTTP error heuristics in the lookback window.
- **Global spend**: Reads `/global/spend/report` and optionally compares estimated USD spend to `LITELLM_SPEND_THRESHOLD_USD`.
- **Keys**: When `/key/list` is available, flags keys near `max_budget` or past `expires`.
- **Users / teams**: Optional `/user/info` and `/team/info` checks for cooldowns and budget risk.
- **Aggregate triage**: Summarizes failure-mode counts from spend logs for quick review.

## Configuration

### Required variables

- `CONTEXT`: Kubernetes context for `kubectl` correlation and cluster verification.
- `NAMESPACE`: Namespace where the LiteLLM `Service` runs.
- `PROXY_BASE_URL`: Reachable LiteLLM base URL (for example `http://litellm.default.svc.cluster.local:4000` or a port-forward URL).
- `LITELLM_SERVICE_NAME`: Kubernetes `Service` name used in titles and reports.

### Optional variables

- `RW_LOOKBACK_WINDOW`: Window for log/report date mapping (default: `24h`). Supports forms like `24h`, `7d`, `30m`.
- `LITELLM_SPEND_THRESHOLD_USD`: Alert when estimated global spend in the window exceeds this USD amount; `0` disables (default: `0`).
- `LITELLM_USER_IDS`: Comma-separated internal `user_id` values for `/user/info`; empty skips user checks.
- `LITELLM_TEAM_IDS`: Comma-separated team ids for `/team/info`; empty skips team checks.
- `KUBERNETES_DISTRIBUTION_BINARY`: `kubectl` or `oc` (default: `kubectl`).

### Secrets

- `litellm_master_key`: Bearer token with permission to call spend and admin routes (often the proxy master key or an admin key with spend scope).
- `kubeconfig`: Kubeconfig used only for optional cluster connectivity verification and standard RunWhen Kubernetes wiring.

## Tasks

### Review Recent Spend Logs for Failures

Calls `/spend/logs` with `summarize=false` for the computed date window and raises issues when heuristics match budget blocks, rate limits, or provider/HTTP failures.

### Check Global Spend Report Against Threshold

Calls `/global/spend/report` and, when `LITELLM_SPEND_THRESHOLD_USD` is greater than zero, compares estimated spend to the threshold.

### Inspect Virtual Key Spend and Remaining Budget

Uses `/key/list` when available to find keys near `max_budget` or expired keys.

### Review User Budget and Rate Limit Status

For each entry in `LITELLM_USER_IDS`, calls `/user/info` and surfaces `soft_budget_cooldown` when true.

### Summarize Team Budgets and Limits

For each entry in `LITELLM_TEAM_IDS`, calls `/team/info` and flags teams at or above 90% of `max_budget`.

### Aggregate Error and Blocked Request Signals

Produces triage counts (for example `budget_exceeded`, rate-limit, 429, 5xx patterns) from spend logs and raises an issue when the combined signal volume is high.

## SLI

`sli.robot` publishes a 0–1 score from three dimensions: proxy reachability (`/health` or `/`), global spend versus threshold, and spend-log failure heuristics. Generation rules emit an SLI template alongside the runbook.

## Notes

- Some routes are Enterprise-only or require specific key permissions; scripts emit clear issues on HTTP 403.
- Database-backed spend logs must be enabled on the proxy for full `/spend/logs` results.
- Set `custom.litellm_proxy_base_url` in workspace configuration when using discovery templates, or override `PROXY_BASE_URL` per SLX.
Loading