Python SDK for AISpendGuard — track AI API spend with tags-only observability. No prompts or outputs are ever sent.
pip install aispendguardfrom aispendguard import AISpendGuard, create_openai_event
import openai, time
client = AISpendGuard(api_key="asg_your_key_here")
openai_client = openai.OpenAI()
start = time.time()
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Classify: 'I want to cancel'"}],
)
event = create_openai_event(
model="gpt-4o-mini",
usage=response.usage,
latency_ms=int((time.time() - start) * 1000),
tags={
"task_type": "classify",
"feature": "ticket_triage",
"route": "POST /api/classify",
},
)
client.track(event)from aispendguard import create_openai_event
event = create_openai_event(
model="gpt-4o-mini",
usage=response.usage, # auto-extracts tokens, cache hits, reasoning
latency_ms=420,
resolved_model=response.model, # e.g. "gpt-4o-mini-2024-07-18"
tags={"task_type": "classify", "feature": "triage", "route": "POST /api/classify"},
)from aispendguard import create_anthropic_event
event = create_anthropic_event(
model="claude-sonnet-4-20250514",
usage=message.usage, # auto-extracts cache_read + cache_creation
latency_ms=800,
tags={"task_type": "summarize", "feature": "digest", "route": "POST /api/summarize"},
)from aispendguard import create_gemini_event
event = create_gemini_event(
model="gemini-2.5-flash",
usage=response.usage_metadata, # auto-extracts thoughts_token_count
latency_ms=500,
tags={"task_type": "answer", "feature": "search", "route": "POST /api/search"},
)With streaming, usage data arrives in the final chunk. Accumulate the stream first, then track:
# OpenAI streaming
stream = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Hello"}],
stream=True,
stream_options={"include_usage": True}, # required for usage in stream
)
usage = None
for chunk in stream:
if chunk.usage:
usage = chunk.usage
# ... process chunk.choices
if usage:
event = create_openai_event(
model="gpt-4o-mini", usage=usage, latency_ms=latency, tags=tags
)
client.track(event)# Anthropic streaming
with anthropic_client.messages.stream(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "Hello"}],
max_tokens=200,
) as stream:
for text in stream.text_stream:
pass # process text
message = stream.get_final_message()
event = create_anthropic_event(
model="claude-sonnet-4-20250514", usage=message.usage, latency_ms=latency, tags=tags
)
client.track(event)client = AISpendGuard(
api_key="asg_...",
endpoint="https://www.aispendguard.com/api/ingest", # default
batch_size=10, # events buffered before auto-send
flush_interval=5.0, # seconds between background flushes
timeout=10.0, # HTTP timeout
strict=False, # True = raise on errors instead of logging
default_tags={ # merged into every event
"environment": "prod",
"service": "api",
},
)Every event must include these tags:
| Tag | Description |
|---|---|
task_type |
What the model does: classify, summarize, generate, code, etc. |
feature |
Your product feature name: ticket_triage, doc_search, etc. |
route |
API route or function: POST /api/classify, handle_support, etc. |
This SDK never sends prompt content, messages, or model outputs. Only metadata (tokens, model, latency) and your tags are transmitted. Forbidden tag keys (like prompt, messages, content) are rejected at validation time.
MIT