diff --git a/.claude/skills/opengradient/SKILL.md b/.claude/skills/opengradient/SKILL.md deleted file mode 100644 index 010a9bb..0000000 --- a/.claude/skills/opengradient/SKILL.md +++ /dev/null @@ -1,238 +0,0 @@ ---- -name: opengradient -description: > - Use when the user wants to write code using the OpenGradient SDK, including - LLM inference, chat completions, streaming, tool calling, on-chain model - inference, LangChain agents, model hub operations, or digital twins. - Also use when the user asks how to use a specific OpenGradient feature. -argument-hint: "[task description]" -allowed-tools: Read, Grep, Glob ---- - -You are an expert on the **OpenGradient Python SDK** (`opengradient`). Help the user write correct, idiomatic code using the SDK. - -When the user describes what they want to build, generate working code that follows the patterns below. Always prefer the simplest approach that satisfies the requirements. - -## Key Reference Files - -When you need more detail, read these files from the project: - -- **Examples**: `examples/` folder (runnable scripts for every feature) -- **Tutorials**: `tutorials/` folder (step-by-step walkthroughs) -- **Types & Enums**: `src/opengradient/types.py` -- **Client API**: `src/opengradient/client/client.py` -- **LLM API**: `src/opengradient/client/llm.py` -- **Alpha API**: `src/opengradient/client/alpha.py` -- **LangChain adapter**: `src/opengradient/agents/__init__.py` - -Also read the detailed API reference bundled with this skill at `api-reference.md` in this skill's directory. - -## SDK Overview - -OpenGradient is a decentralized AI inference platform. The SDK provides: - -- **Verified LLM inference** via TEE (Trusted Execution Environment) -- **x402 payment settlement** on Base Sepolia (on-chain receipts) -- **Multi-provider models** (OpenAI, Anthropic, Google, xAI) through a unified API -- **On-chain ONNX model inference** (alpha features) -- **LangChain integration** for building agents -- **Digital twins** chat - -## Initialization - -```python -import opengradient as og - -client = og.init( - private_key="0x...", # Required: Base Sepolia key with OPG tokens - alpha_private_key="0x...", # Optional: OpenGradient testnet key - email="...", # Optional: Model Hub auth - password="...", # Optional: Model Hub auth - twins_api_key="...", # Optional: Digital twins -) -``` - -Before the first LLM call, approve OPG token spending (idempotent): -```python -client.llm.ensure_opg_approval(opg_amount=5) -``` - -## Available Models (`og.TEE_LLM`) - -| Provider | Models | -|------------|--------| -| OpenAI | `GPT_4_1_2025_04_14`, `O4_MINI`, `GPT_5`, `GPT_5_MINI`, `GPT_5_2` | -| Anthropic | `CLAUDE_SONNET_4_5`, `CLAUDE_SONNET_4_6`, `CLAUDE_HAIKU_4_5`, `CLAUDE_OPUS_4_5`, `CLAUDE_OPUS_4_6` | -| Google | `GEMINI_2_5_FLASH`, `GEMINI_2_5_PRO`, `GEMINI_2_5_FLASH_LITE`, `GEMINI_3_PRO`, `GEMINI_3_FLASH` | -| xAI | `GROK_4`, `GROK_4_FAST`, `GROK_4_1_FAST`, `GROK_4_1_FAST_NON_REASONING` | - -## Settlement Modes (`og.x402SettlementMode`) - -- `SETTLE` — Hashes only (maximum privacy) -- `SETTLE_METADATA` — Full data on-chain (maximum transparency) -- `SETTLE_BATCH` — Aggregated hashes (most cost-efficient, default) - -## Core Patterns - -### Basic Chat - -```python -result = client.llm.chat( - model=og.TEE_LLM.GEMINI_2_5_FLASH, - messages=[{"role": "user", "content": "Hello!"}], - max_tokens=300, - temperature=0.0, -) -print(result.chat_output["content"]) -``` - -### Streaming - -```python -stream = client.llm.chat( - model=og.TEE_LLM.GPT_4_1_2025_04_14, - messages=[{"role": "user", "content": "Explain quantum computing"}], - max_tokens=500, - stream=True, -) -for chunk in stream: - if chunk.choices[0].delta.content: - print(chunk.choices[0].delta.content, end="", flush=True) -``` - -### Tool Calling - -```python -tools = [{ - "type": "function", - "function": { - "name": "get_weather", - "description": "Get current weather", - "parameters": { - "type": "object", - "properties": {"city": {"type": "string"}}, - "required": ["city"], - }, - }, -}] - -result = client.llm.chat( - model=og.TEE_LLM.GPT_5, - messages=[{"role": "user", "content": "Weather in NYC?"}], - tools=tools, - max_tokens=200, -) - -if result.finish_reason == "tool_calls": - for tc in result.chat_output["tool_calls"]: - print(f"Call: {tc['function']['name']}({tc['function']['arguments']})") -``` - -### Multi-Turn Tool Agent Loop - -```python -messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": user_query}, -] - -for _ in range(max_iterations): - result = client.llm.chat( - model=og.TEE_LLM.GPT_5, - messages=messages, - tools=tools, - tool_choice="auto", - ) - if result.finish_reason == "tool_calls": - messages.append(result.chat_output) - for tc in result.chat_output["tool_calls"]: - tool_result = execute_tool(tc["function"]["name"], tc["function"]["arguments"]) - messages.append({ - "role": "tool", - "tool_call_id": tc["id"], - "content": tool_result, - }) - else: - final_answer = result.chat_output["content"] - break -``` - -### LangChain ReAct Agent - -```python -from langchain_core.tools import tool -from langgraph.prebuilt import create_react_agent - -llm = og.agents.langchain_adapter( - private_key="0x...", - model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, - max_tokens=300, -) - -@tool -def lookup(query: str) -> str: - """Look up information.""" - return "result" - -agent = create_react_agent(llm, [lookup]) -result = agent.invoke({"messages": [("user", "Find info about X")]}) -print(result["messages"][-1].content) -``` - -### On-Chain ONNX Inference (Alpha) - -```python -result = client.alpha.infer( - model_cid="QmbUqS93oc4JTLMHwpVxsE39mhNxy6hpf6Py3r9oANr8aZ", - inference_mode=og.InferenceMode.VANILLA, - model_input={"input": [1.0, 2.0, 3.0]}, -) -print(result.model_output) -print(result.transaction_hash) -``` - -### Digital Twins - -```python -client = og.init(private_key="0x...", twins_api_key="your-key") - -result = client.twins.chat( - twin_id="0x1abd463fd6244be4a1dc0f69e0b70cd5", - model=og.TEE_LLM.GROK_4_1_FAST_NON_REASONING, - messages=[{"role": "user", "content": "What do you think about AI?"}], - max_tokens=1000, -) -print(result.chat_output["content"]) -``` - -### Model Hub: Upload a Model - -```python -repo = client.model_hub.create_model( - model_name="my-model", - model_desc="A prediction model", - version="1.0.0", -) -upload = client.model_hub.upload( - model_name=repo.name, - version=repo.initialVersion, - model_path="./model.onnx", -) -print(f"Model CID: {upload.modelCid}") -``` - -## Return Types - -- **`TextGenerationOutput`**: `chat_output` (dict), `finish_reason`, `transaction_hash`, `payment_hash` -- **`TextGenerationStream`**: iterable of `StreamChunk` objects -- **`StreamChunk`**: `choices[0].delta.content`, `choices[0].delta.tool_calls`, `usage` (final only), `is_final` -- **`InferenceResult`**: `model_output` (dict of np.ndarray), `transaction_hash` - -## Guidelines - -1. Always call `client.llm.ensure_opg_approval()` before the first LLM inference. -2. Handle `finish_reason`: `"stop"` / `"length"` = text response, `"tool_calls"` = function calls. -3. For streaming, check `chunk.choices[0].delta.content` is not None before printing. -4. In tool-calling loops, append `result.chat_output` as the assistant message, then append each tool result with `role: "tool"` and matching `tool_call_id`. -5. Use environment variables or config files for private keys — never hardcode them. -6. If you are unsure about a specific API detail, read the source files listed above. diff --git a/.claude/skills/opengradient/api-reference.md b/.claude/skills/opengradient/api-reference.md deleted file mode 100644 index b7db429..0000000 --- a/.claude/skills/opengradient/api-reference.md +++ /dev/null @@ -1,287 +0,0 @@ -# OpenGradient SDK — API Reference - -Detailed reference for the OpenGradient Python SDK. Use this alongside -the main SKILL.md when you need specifics about parameters, return types, -or less common features. - ---- - -## Client Initialization - -```python -import opengradient as og - -client = og.init( - private_key: str, # Required — Base Sepolia wallet (holds OPG tokens) - alpha_private_key: str = None, # Optional — OpenGradient testnet key - email: str = None, # Optional — Model Hub email - password: str = None, # Optional — Model Hub password - twins_api_key: str = None, # Optional — Digital twins API key -) -``` - -**Namespaces on `client`:** -- `client.llm` — LLM inference (chat, completions, streaming) -- `client.alpha` — On-chain ONNX model inference, workflows -- `client.model_hub` — Model repository CRUD -- `client.twins` — Digital twins chat (requires `twins_api_key`) - ---- - -## LLM API — `client.llm` - -### `ensure_opg_approval(opg_amount: int)` - -Approve OPG token spending for x402 payments. Idempotent — safe to call -multiple times. Must be called before the first `chat()` call. - -### `chat()` - -```python -client.llm.chat( - model: og.TEE_LLM, - messages: list[dict], # OpenAI-style message dicts - max_tokens: int = 300, - temperature: float = 0.0, - stream: bool = False, - tools: list[dict] = None, # Function definitions - tool_choice: str = None, # "auto", "none", or specific - x402_settlement_mode: og.x402SettlementMode = og.x402SettlementMode.SETTLE_BATCH, -) -> TextGenerationOutput | TextGenerationStream -``` - -**Messages format:** -```python -[ - {"role": "system", "content": "System prompt"}, - {"role": "user", "content": "User message"}, - {"role": "assistant", "content": "Previous response"}, - {"role": "tool", "tool_call_id": "call_123", "content": "Tool output"}, -] -``` - ---- - -## Return Types - -### `TextGenerationOutput` - -Returned by `client.llm.chat()` when `stream=False`. - -| Field | Type | Description | -|-------|------|-------------| -| `chat_output` | `dict` | `{"role": "assistant", "content": "...", "tool_calls": [...]}` | -| `completion_output` | `str` | Text (completions API only) | -| `finish_reason` | `str` | `"stop"`, `"length"`, or `"tool_calls"` | -| `transaction_hash` | `str` | Blockchain tx hash (usually `"external"` for TEE) | -| `payment_hash` | `str` | x402 settlement proof | - -### `TextGenerationStream` - -Returned by `client.llm.chat()` when `stream=True`. Iterable of `StreamChunk`. - -### `StreamChunk` - -| Field | Type | Description | -|-------|------|-------------| -| `choices` | `list[StreamChoice]` | Delta updates | -| `choices[0].delta.content` | `str or None` | Incremental text | -| `choices[0].delta.tool_calls` | `list or None` | Incremental tool calls | -| `model` | `str` | Model identifier | -| `usage` | `StreamUsage or None` | Token counts (final chunk only) | -| `is_final` | `bool` | `True` on last chunk | - -### `InferenceResult` - -Returned by `client.alpha.infer()`. - -| Field | Type | Description | -|-------|------|-------------| -| `model_output` | `dict[str, np.ndarray]` | Model outputs | -| `transaction_hash` | `str` | On-chain tx hash | - ---- - -## Models — `og.TEE_LLM` - -### OpenAI -- `GPT_4_1_2025_04_14` -- `O4_MINI` -- `GPT_5` -- `GPT_5_MINI` -- `GPT_5_2` - -### Anthropic -- `CLAUDE_SONNET_4_5` -- `CLAUDE_SONNET_4_6` -- `CLAUDE_HAIKU_4_5` -- `CLAUDE_OPUS_4_5` -- `CLAUDE_OPUS_4_6` - -### Google -- `GEMINI_2_5_FLASH` -- `GEMINI_2_5_PRO` -- `GEMINI_2_5_FLASH_LITE` -- `GEMINI_3_PRO` -- `GEMINI_3_FLASH` - -### xAI (Grok) -- `GROK_4` -- `GROK_4_FAST` -- `GROK_4_1_FAST` -- `GROK_4_1_FAST_NON_REASONING` - ---- - -## Settlement Modes — `og.x402SettlementMode` - -| Mode | Value | Description | -|------|-------|-------------| -| `SETTLE` | `"private"` | Hashes only — maximum privacy | -| `SETTLE_METADATA` | `"individual"` | Full data on-chain — maximum transparency | -| `SETTLE_BATCH` | `"batch"` | Aggregated hashes — most cost-efficient (default) | - ---- - -## Inference Modes — `og.InferenceMode` (Alpha) - -| Mode | Description | -|------|-------------| -| `VANILLA` | Standard execution | -| `TEE` | Trusted Execution Environment | -| `ZKML` | Zero-Knowledge ML | - ---- - -## Alpha API — `client.alpha` - -### `infer()` - -```python -client.alpha.infer( - model_cid: str, - inference_mode: og.InferenceMode, - model_input: dict, -) -> InferenceResult -``` - -### `new_workflow()` - -```python -client.alpha.new_workflow( - model_cid: str, - input_query: og.HistoricalInputQuery, - input_tensor_name: str, - scheduler_params: og.SchedulerParams, -) -> str # contract address -``` - -### `run_workflow(contract_address: str) -> InferenceResult` - -### `read_workflow_result(contract_address: str) -> InferenceResult` - -### `read_workflow_history(contract_address: str, num_results: int) -> list` - ---- - -## Workflow Input Queries - -### `og.HistoricalInputQuery` - -```python -og.HistoricalInputQuery( - base="ETH", - quote="USD", - total_candles=10, - candle_duration_in_mins=60, - order=og.CandleOrder.DESCENDING, - candle_types=[og.CandleType.CLOSE], -) -``` - -### `og.SchedulerParams` - -```python -og.SchedulerParams( - frequency=3600, # Seconds between runs - duration_hours=24, # Total duration -) -``` - ---- - -## Model Hub — `client.model_hub` - -### `create_model(model_name, model_desc, version) -> ModelRepository` - -### `upload(model_name, version, model_path) -> UploadResult` - -`UploadResult.modelCid` — the content-addressed identifier for the uploaded model. - ---- - -## Digital Twins — `client.twins` - -### `chat()` - -```python -client.twins.chat( - twin_id: str, - model: og.TEE_LLM, - messages: list[dict], - max_tokens: int, -) -> TextGenerationOutput -``` - ---- - -## LangChain Integration - -```python -llm = og.agents.langchain_adapter( - private_key: str, - model_cid: og.TEE_LLM, - max_tokens: int = 300, - x402_settlement_mode: og.x402SettlementMode = og.x402SettlementMode.SETTLE_BATCH, -) -``` - -Returns a LangChain-compatible `BaseChatModel` that can be used with -`create_react_agent()`, chains, or any LangChain component expecting an LLM. - ---- - -## Tools Format (for Tool Calling) - -```python -tools = [ - { - "type": "function", - "function": { - "name": "function_name", - "description": "What this function does", - "parameters": { - "type": "object", - "properties": { - "param1": {"type": "string", "description": "..."}, - "param2": {"type": "number"}, - }, - "required": ["param1"], - }, - }, - } -] -``` - ---- - -## CLI Commands - -```bash -opengradient config init # Interactive setup -opengradient config show # Display config -opengradient config clear # Reset config -opengradient create-account # Generate wallet -opengradient infer -m --input '' -opengradient chat --model --messages '' --max-tokens 100 -``` diff --git a/.claude/worktrees/tender-goldberg b/.claude/worktrees/tender-goldberg deleted file mode 160000 index f5e1b1b..0000000 --- a/.claude/worktrees/tender-goldberg +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f5e1b1be64760e0051f7f2b47e6b6ee201d8f2d3 diff --git a/.gitignore b/.gitignore index 2cd244e..b89f383 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,6 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ \ No newline at end of file +.idea/ + +./claude \ No newline at end of file diff --git a/README.md b/README.md index 1e39d75..c979338 100644 --- a/README.md +++ b/README.md @@ -293,16 +293,16 @@ Manage, host, and execute models through the Model Hub with direct integration i OpenGradient supports multiple settlement modes through the x402 payment protocol: -- **SETTLE**: Records cryptographic hashes only (maximum privacy) -- **SETTLE_METADATA**: Records complete input/output data (maximum transparency) -- **SETTLE_BATCH**: Aggregates multiple inferences (most cost-efficient) +- **PRIVATE**: Payment only, no input/output data on-chain (maximum privacy) +- **BATCH_HASHED**: Aggregates inferences into a Merkle tree with input/output hashes and signatures (most cost-efficient, default) +- **INDIVIDUAL_FULL**: Records input, output, timestamp, and verification on-chain (maximum auditability) Specify settlement mode in your requests: ```python result = client.llm.chat( model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) ``` diff --git a/docs/CLAUDE_SDK_USERS.md b/docs/CLAUDE_SDK_USERS.md index 009b6f5..d80b912 100644 --- a/docs/CLAUDE_SDK_USERS.md +++ b/docs/CLAUDE_SDK_USERS.md @@ -59,7 +59,7 @@ result = client.llm.chat( tools: List[Dict] = [], # Optional: function calling tool_choice: str = None, # "auto", "none", or specific tool stop_sequence: List[str] = None, - x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, stream: bool = False, # Enable streaming responses ) # Returns: TextGenerationOutput (or TextGenerationStream if stream=True) @@ -78,7 +78,7 @@ result = client.llm.completion( max_tokens: int = 100, temperature: float = 0.0, stop_sequence: List[str] = None, - x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, ) # Returns: TextGenerationOutput # - completion_output: str (raw text) @@ -272,9 +272,9 @@ og.InferenceMode.TEE # Trusted Execution Environment og.InferenceMode.ZKML # Zero-knowledge proof # x402 Payment Settlement Modes (for LLM calls) -og.x402SettlementMode.SETTLE # Input/output hashes only (most private) -og.x402SettlementMode.SETTLE_BATCH # Batch hashes (most cost-efficient, default) -og.x402SettlementMode.SETTLE_METADATA # Full data and metadata on-chain +og.x402SettlementMode.PRIVATE # Input/output hashes only (most private) +og.x402SettlementMode.BATCH_HASHED # Batch hashes (most cost-efficient, default) +og.x402SettlementMode.INDIVIDUAL_FULL # Full data and metadata on-chain # Workflow data types og.CandleType.OPEN, .HIGH, .LOW, .CLOSE, .VOLUME diff --git a/docs/opengradient/agents/index.md b/docs/opengradient/agents/index.md index 528a017..cbbf38e 100644 --- a/docs/opengradient/agents/index.md +++ b/docs/opengradient/agents/index.md @@ -23,7 +23,7 @@ def langchain_adapter( private_key: str, model_cid: `TEE_LLM`, max_tokens: int = 300, - x402_settlement_mode: `x402SettlementMode` = x402SettlementMode.SETTLE_BATCH + x402_settlement_mode: `x402SettlementMode` = x402SettlementMode.BATCH_HASHED ) ‑> [OpenGradientChatModel](./og_langchain) ``` Returns an OpenGradient LLM that implements LangChain's LLM interface diff --git a/docs/opengradient/alphasense/index.md b/docs/opengradient/alphasense/index.md index a29ec26..09baa07 100644 --- a/docs/opengradient/alphasense/index.md +++ b/docs/opengradient/alphasense/index.md @@ -97,7 +97,7 @@ runs inference using the specified OpenGradient model. This is required to ensure the output is compatible with the tool framework. - Default returns the InferenceResult object. + This argument is required and has no default formatter. InferenceResult has attributes: * transaction_hash (str): Blockchain hash for the transaction diff --git a/docs/opengradient/client/llm.md b/docs/opengradient/client/llm.md index 073e432..52f5ed6 100644 --- a/docs/opengradient/client/llm.md +++ b/docs/opengradient/client/llm.md @@ -49,7 +49,7 @@ def chat( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, - x402_settlement_mode: Optional[`x402SettlementMode`] = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: Optional[`x402SettlementMode`] = x402SettlementMode.BATCH_HASHED, stream: bool = False ) ‑> Union[`TextGenerationOutput`, `TextGenerationStream`] ``` @@ -65,10 +65,10 @@ Perform inference on an LLM model using chat via TEE. * **`tools (List[dict], optional)`**: Set of tools for function calling. * **`tool_choice (str, optional)`**: Sets a specific tool to choose. * **`x402_settlement_mode (x402SettlementMode, optional)`**: Settlement mode for x402 payments. - - SETTLE: Records input/output hashes only (most privacy-preserving). - - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient). - - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata. - Defaults to SETTLE_BATCH. + - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). + - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). + - INDIVIDUAL_FULL: Records input, output, timestamp, and verification on-chain (maximum auditability). + Defaults to BATCH_HASHED. * **`stream (bool, optional)`**: Whether to stream the response. Default is False. **Returns** @@ -118,7 +118,7 @@ def completion( max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, - x402_settlement_mode: Optional[`x402SettlementMode`] = x402SettlementMode.SETTLE_BATCH + x402_settlement_mode: Optional[`x402SettlementMode`] = x402SettlementMode.BATCH_HASHED ) ‑> `TextGenerationOutput` ``` Perform inference on an LLM model using completions via TEE. @@ -131,10 +131,10 @@ Perform inference on an LLM model using completions via TEE. * **`stop_sequence (List[str], optional)`**: List of stop sequences for LLM. Default is None. * **`temperature (float)`**: Temperature for LLM inference, between 0 and 1. Default is 0.0. * **`x402_settlement_mode (x402SettlementMode, optional)`**: Settlement mode for x402 payments. - - SETTLE: Records input/output hashes only (most privacy-preserving). - - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient). - - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata. - Defaults to SETTLE_BATCH. + - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). + - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). + - INDIVIDUAL_FULL: Records input, output, timestamp, and verification on-chain (maximum auditability). + Defaults to BATCH_HASHED. **Returns** diff --git a/docs/opengradient/index.md b/docs/opengradient/index.md index b78d44d..0b02867 100644 --- a/docs/opengradient/index.md +++ b/docs/opengradient/index.md @@ -332,29 +332,24 @@ privacy, and transaction costs. **Attributes** -* **`SETTLE`**: Most private settlement method. - Only the payment is settled on-chain — no input or output hashes are posted to the chain. +* **`PRIVATE`**: Payment-only settlement. + Only the payment is settled on-chain — no input or output hashes are posted. Your inference data remains completely off-chain, ensuring maximum privacy. - Suitable for applications where payment settlement is required without any on-chain record of execution. - CLI usage: --settlement-mode settle -* **`SETTLE_METADATA`**: Individual settlement with full metadata. - Also known as SETTLE_INDIVIDUAL_WITH_METADATA in some documentation. - Records complete model information, full input and output data, - and all inference metadata on-chain. + Suitable when payment settlement is required without any on-chain record of execution. + CLI usage: --settlement-mode private +* **`BATCH_HASHED`**: Batch settlement with hashes (default). + Aggregates multiple inferences into a single settlement transaction + using a Merkle tree containing input hashes, output hashes, and signatures. + Most cost-efficient for high-volume applications. + CLI usage: --settlement-mode batch-hashed +* **`INDIVIDUAL_FULL`**: Individual settlement with full metadata. + Records input data, output data, timestamp, and verification on-chain. Provides maximum transparency and auditability. Higher gas costs due to larger data storage. - CLI usage: --settlement-mode settle-metadata -* **`SETTLE_BATCH`**: Batch settlement for multiple inferences. - Aggregates multiple inference requests into a single settlement transaction - using batch hashes. - Most cost-efficient for high-volume applications. - Reduced per-inference transaction overhead. - CLI usage: --settlement-mode settle-batch + CLI usage: --settlement-mode individual-full #### Variables -* static `SETTLE` -* static `SETTLE_BATCH` -* static `SETTLE_INDIVIDUAL` -* static `SETTLE_INDIVIDUAL_WITH_METADATA` -* static `SETTLE_METADATA` \ No newline at end of file +* static `BATCH_HASHED` +* static `INDIVIDUAL_FULL` +* static `PRIVATE` \ No newline at end of file diff --git a/docs/opengradient/types.md b/docs/opengradient/types.md index 8283bf0..bf437d0 100644 --- a/docs/opengradient/types.md +++ b/docs/opengradient/types.md @@ -532,29 +532,24 @@ privacy, and transaction costs. **Attributes** -* **`SETTLE`**: Most private settlement method. - Only the payment is settled on-chain — no input or output hashes are posted to the chain. +* **`PRIVATE`**: Payment-only settlement. + Only the payment is settled on-chain — no input or output hashes are posted. Your inference data remains completely off-chain, ensuring maximum privacy. - Suitable for applications where payment settlement is required without any on-chain record of execution. - CLI usage: --settlement-mode settle -* **`SETTLE_METADATA`**: Individual settlement with full metadata. - Also known as SETTLE_INDIVIDUAL_WITH_METADATA in some documentation. - Records complete model information, full input and output data, - and all inference metadata on-chain. + Suitable when payment settlement is required without any on-chain record of execution. + CLI usage: --settlement-mode private +* **`BATCH_HASHED`**: Batch settlement with hashes (default). + Aggregates multiple inferences into a single settlement transaction + using a Merkle tree containing input hashes, output hashes, and signatures. + Most cost-efficient for high-volume applications. + CLI usage: --settlement-mode batch-hashed +* **`INDIVIDUAL_FULL`**: Individual settlement with full metadata. + Records input data, output data, timestamp, and verification on-chain. Provides maximum transparency and auditability. Higher gas costs due to larger data storage. - CLI usage: --settlement-mode settle-metadata -* **`SETTLE_BATCH`**: Batch settlement for multiple inferences. - Aggregates multiple inference requests into a single settlement transaction - using batch hashes. - Most cost-efficient for high-volume applications. - Reduced per-inference transaction overhead. - CLI usage: --settlement-mode settle-batch + CLI usage: --settlement-mode individual-full #### Variables -* static `SETTLE` -* static `SETTLE_BATCH` -* static `SETTLE_INDIVIDUAL` -* static `SETTLE_INDIVIDUAL_WITH_METADATA` -* static `SETTLE_METADATA` \ No newline at end of file +* static `BATCH_HASHED` +* static `INDIVIDUAL_FULL` +* static `PRIVATE` \ No newline at end of file diff --git a/examples/langchain_react_agent.py b/examples/langchain_react_agent.py index d4b4a9a..6dc0a53 100644 --- a/examples/langchain_react_agent.py +++ b/examples/langchain_react_agent.py @@ -24,7 +24,7 @@ private_key=os.environ.get("OG_PRIVATE_KEY"), model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, max_tokens=300, - x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA, + x402_settlement_mode=og.x402SettlementMode.INDIVIDUAL_FULL, ) diff --git a/examples/llm_chat.py b/examples/llm_chat.py index 9f7bed9..84ce7b9 100644 --- a/examples/llm_chat.py +++ b/examples/llm_chat.py @@ -15,6 +15,6 @@ model=og.TEE_LLM.GEMINI_2_5_FLASH, messages=messages, max_tokens=300, - x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA, + x402_settlement_mode=og.x402SettlementMode.INDIVIDUAL_FULL, ) -print(result.chat_output['content']) +print(result.chat_output["content"]) diff --git a/examples/llm_chat_streaming.py b/examples/llm_chat_streaming.py index 21ce04d..f732059 100644 --- a/examples/llm_chat_streaming.py +++ b/examples/llm_chat_streaming.py @@ -16,7 +16,7 @@ stream = client.llm.chat( model=og.TEE_LLM.GPT_4_1_2025_04_14, messages=messages, - x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA, + x402_settlement_mode=og.x402SettlementMode.INDIVIDUAL_FULL, stream=True, max_tokens=300, ) diff --git a/src/opengradient/agents/__init__.py b/src/opengradient/agents/__init__.py index a2dd27c..082f706 100644 --- a/src/opengradient/agents/__init__.py +++ b/src/opengradient/agents/__init__.py @@ -14,7 +14,7 @@ def langchain_adapter( private_key: str, model_cid: TEE_LLM, max_tokens: int = 300, - x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, ) -> OpenGradientChatModel: """ Returns an OpenGradient LLM that implements LangChain's LLM interface diff --git a/src/opengradient/agents/og_langchain.py b/src/opengradient/agents/og_langchain.py index b62e443..21920be 100644 --- a/src/opengradient/agents/og_langchain.py +++ b/src/opengradient/agents/og_langchain.py @@ -71,7 +71,7 @@ class OpenGradientChatModel(BaseChatModel): model_cid: str max_tokens: int = 300 - x402_settlement_mode: Optional[str] = x402SettlementMode.SETTLE_BATCH + x402_settlement_mode: Optional[str] = x402SettlementMode.BATCH_HASHED _client: Client = PrivateAttr() _tools: List[Dict] = PrivateAttr(default_factory=list) @@ -81,7 +81,7 @@ def __init__( private_key: str, model_cid: TEE_LLM, max_tokens: int = 300, - x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.BATCH_HASHED, **kwargs, ): super().__init__( diff --git a/src/opengradient/cli.py b/src/opengradient/cli.py index 28669db..ef46d33 100644 --- a/src/opengradient/cli.py +++ b/src/opengradient/cli.py @@ -69,9 +69,9 @@ def convert(self, value, param, ctx): } x402SettlementModes = { - "settle-batch": x402SettlementMode.SETTLE_BATCH, - "settle": x402SettlementMode.SETTLE, - "settle-metadata": x402SettlementMode.SETTLE_METADATA, + "batch-hashed": x402SettlementMode.BATCH_HASHED, + "private": x402SettlementMode.PRIVATE, + "individual-full": x402SettlementMode.INDIVIDUAL_FULL, } @@ -375,8 +375,8 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path "--x402-settlement-mode", "x402_settlement_mode", type=click.Choice(x402SettlementModes.keys()), - default="settle-batch", - help="Settlement mode for x402 payments: settle (payment only), settle-batch (batched, default), settle-metadata (full data)", + default="batch-hashed", + help="Settlement mode for x402 payments: private (payment only), batch-hashed (default), individual-full (full data with verification)", ) @click.pass_context def completion( @@ -468,8 +468,8 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_vanilla=True) @click.option( "--x402-settlement-mode", type=click.Choice(x402SettlementModes.keys()), - default="settle-batch", - help="Settlement mode for x402 payments: settle (payment only), settle-batch (batched, default), settle-metadata (full data)", + default="batch-hashed", + help="Settlement mode for x402 payments: private (payment only), batch-hashed (default), individual-full (full data with verification)", ) @click.option("--stream", is_flag=True, default=False, help="Stream the output from the LLM") @click.pass_context @@ -611,10 +611,7 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_van if value is not None and value not in ("", "[]", []): # Normalize list-of-blocks content (e.g. Gemini 3 thought signatures) if key == "content" and isinstance(value, list): - text = " ".join( - block.get("text", "") for block in value - if isinstance(block, dict) and block.get("type") == "text" - ).strip() + text = " ".join(block.get("text", "") for block in value if isinstance(block, dict) and block.get("type") == "text").strip() click.echo(f"{key}: {text}") else: click.echo(f"{key}: {value}") diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index 83f8eb8..5e1176a 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -72,9 +72,7 @@ def _fetch_tls_cert_as_ssl_context(server_url: str) -> Optional[ssl.SSLContext]: return None # Write PEM to a temp file so we can load it into the SSLContext - cert_file = tempfile.NamedTemporaryFile( - prefix="og_tee_tls_", suffix=".pem", delete=False, mode="w" - ) + cert_file = tempfile.NamedTemporaryFile(prefix="og_tee_tls_", suffix=".pem", delete=False, mode="w") cert_file.write(pem_cert) cert_file.flush() cert_file.close() @@ -115,12 +113,8 @@ def __init__(self, wallet_account: LocalAccount, og_llm_server_url: str, og_llm_ self._og_llm_server_url = og_llm_server_url self._og_llm_streaming_server_url = og_llm_streaming_server_url - self._tls_verify: Union[ssl.SSLContext, bool] = ( - _fetch_tls_cert_as_ssl_context(self._og_llm_server_url) or True - ) - self._streaming_tls_verify: Union[ssl.SSLContext, bool] = ( - _fetch_tls_cert_as_ssl_context(self._og_llm_streaming_server_url) or True - ) + self._tls_verify: Union[ssl.SSLContext, bool] = _fetch_tls_cert_as_ssl_context(self._og_llm_server_url) or True + self._streaming_tls_verify: Union[ssl.SSLContext, bool] = _fetch_tls_cert_as_ssl_context(self._og_llm_streaming_server_url) or True signer = EthAccountSignerv2(self._wallet_account) self._x402_client = x402Clientv2() @@ -205,7 +199,7 @@ def completion( max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, - x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.BATCH_HASHED, ) -> TextGenerationOutput: """ Perform inference on an LLM model using completions via TEE. @@ -217,10 +211,10 @@ def completion( stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None. temperature (float): Temperature for LLM inference, between 0 and 1. Default is 0.0. x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments. - - SETTLE: Records input/output hashes only (most privacy-preserving). - - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient). - - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata. - Defaults to SETTLE_BATCH. + - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). + - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). + - INDIVIDUAL_FULL: Records input, output, timestamp, and verification on-chain (maximum auditability). + Defaults to BATCH_HASHED. Returns: TextGenerationOutput: Generated text results including: @@ -247,7 +241,7 @@ def _tee_llm_completion( max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, - x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.BATCH_HASHED, ) -> TextGenerationOutput: """ Route completion request to OpenGradient TEE LLM server with x402 payments. @@ -304,7 +298,7 @@ def chat( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, - x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.BATCH_HASHED, stream: bool = False, ) -> Union[TextGenerationOutput, TextGenerationStream]: """ @@ -319,10 +313,10 @@ def chat( tools (List[dict], optional): Set of tools for function calling. tool_choice (str, optional): Sets a specific tool to choose. x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments. - - SETTLE: Records input/output hashes only (most privacy-preserving). - - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient). - - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata. - Defaults to SETTLE_BATCH. + - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). + - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). + - INDIVIDUAL_FULL: Records input, output, timestamp, and verification on-chain (maximum auditability). + Defaults to BATCH_HASHED. stream (bool, optional): Whether to stream the response. Default is False. Returns: @@ -367,7 +361,7 @@ def _tee_llm_chat( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, - x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, ) -> TextGenerationOutput: """ Route chat request to OpenGradient TEE LLM server with x402 payments. @@ -396,9 +390,7 @@ async def make_request_v2(): try: endpoint = "/v1/chat/completions" - response = await self._request_client.post( - self._og_llm_server_url + endpoint, json=payload, headers=headers, timeout=60 - ) + response = await self._request_client.post(self._og_llm_server_url + endpoint, json=payload, headers=headers, timeout=60) response.raise_for_status() content = await response.aread() @@ -412,8 +404,7 @@ async def make_request_v2(): content = message.get("content") if isinstance(content, list): message["content"] = " ".join( - block.get("text", "") for block in content - if isinstance(block, dict) and block.get("type") == "text" + block.get("text", "") for block in content if isinstance(block, dict) and block.get("type") == "text" ).strip() return TextGenerationOutput( @@ -443,7 +434,7 @@ def _tee_llm_chat_stream_sync( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, - x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, ): """ Sync streaming using threading bridge - TRUE real-time streaming. @@ -499,7 +490,7 @@ async def _tee_llm_chat_stream_async( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, - x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, ): """ Internal async streaming implementation for TEE LLM with x402 payments. diff --git a/src/opengradient/types.py b/src/opengradient/types.py index bc9c917..43a5060 100644 --- a/src/opengradient/types.py +++ b/src/opengradient/types.py @@ -19,41 +19,34 @@ class x402SettlementMode(str, Enum): privacy, and transaction costs. Attributes: - SETTLE: Most private settlement method. - Only the payment is settled on-chain — no input or output hashes are posted to the chain. + PRIVATE: Payment-only settlement. + Only the payment is settled on-chain — no input or output hashes are posted. Your inference data remains completely off-chain, ensuring maximum privacy. - Suitable for applications where payment settlement is required without any on-chain record of execution. - CLI usage: --settlement-mode settle + Suitable when payment settlement is required without any on-chain record of execution. + CLI usage: --settlement-mode private - SETTLE_METADATA: Individual settlement with full metadata. - Also known as SETTLE_INDIVIDUAL_WITH_METADATA in some documentation. - Records complete model information, full input and output data, - and all inference metadata on-chain. + BATCH_HASHED: Batch settlement with hashes (default). + Aggregates multiple inferences into a single settlement transaction + using a Merkle tree containing input hashes, output hashes, and signatures. + Most cost-efficient for high-volume applications. + CLI usage: --settlement-mode batch-hashed + + INDIVIDUAL_FULL: Individual settlement with full metadata. + Records input data, output data, timestamp, and verification on-chain. Provides maximum transparency and auditability. Higher gas costs due to larger data storage. - CLI usage: --settlement-mode settle-metadata - - SETTLE_BATCH: Batch settlement for multiple inferences. - Aggregates multiple inference requests into a single settlement transaction - using batch hashes. - Most cost-efficient for high-volume applications. - Reduced per-inference transaction overhead. - CLI usage: --settlement-mode settle-batch + CLI usage: --settlement-mode individual-full Examples: >>> from opengradient import x402SettlementMode - >>> mode = x402SettlementMode.SETTLE + >>> mode = x402SettlementMode.PRIVATE >>> print(mode.value) - 'settle' + 'private' """ - SETTLE = "private" - SETTLE_METADATA = "individual" - SETTLE_BATCH = "batch" - - # Aliases for backward compatibility with glossary naming - SETTLE_INDIVIDUAL = SETTLE - SETTLE_INDIVIDUAL_WITH_METADATA = SETTLE_METADATA + PRIVATE = "private" + BATCH_HASHED = "batch" + INDIVIDUAL_FULL = "individual" class CandleOrder(IntEnum): @@ -288,6 +281,7 @@ def from_sse_data(cls, data: Dict) -> "StreamChunk": tee_timestamp=data.get("tee_timestamp"), ) + @dataclass class TextGenerationStream: """ @@ -480,6 +474,7 @@ class TEE_LLM(str, Enum): messages=[{"role": "user", "content": "Hello"}], ) """ + # OpenAI models via TEE GPT_4_1_2025_04_14 = "openai/gpt-4.1-2025-04-14" O4_MINI = "openai/o4-mini" diff --git a/tests/client_test.py b/tests/client_test.py index 822fc74..5077be5 100644 --- a/tests/client_test.py +++ b/tests/client_test.py @@ -311,11 +311,6 @@ def test_from_sse_data_with_usage(self): class TestX402SettlementMode: def test_settlement_modes_values(self): """Test settlement mode enum values.""" - assert x402SettlementMode.SETTLE == "private" - assert x402SettlementMode.SETTLE_BATCH == "batch" - assert x402SettlementMode.SETTLE_METADATA == "individual" - - def test_settlement_mode_aliases(self): - """Test settlement mode aliases.""" - assert x402SettlementMode.SETTLE_INDIVIDUAL == x402SettlementMode.SETTLE - assert x402SettlementMode.SETTLE_INDIVIDUAL_WITH_METADATA == x402SettlementMode.SETTLE_METADATA + assert x402SettlementMode.PRIVATE == "private" + assert x402SettlementMode.BATCH_HASHED == "batch" + assert x402SettlementMode.INDIVIDUAL_FULL == "individual" diff --git a/tests/langchain_adapter_test.py b/tests/langchain_adapter_test.py index 1671c7f..58af208 100644 --- a/tests/langchain_adapter_test.py +++ b/tests/langchain_adapter_test.py @@ -34,7 +34,7 @@ def test_initialization(self, model): """Test model initializes with correct fields.""" assert model.model_cid == TEE_LLM.GPT_5 assert model.max_tokens == 300 - assert model.x402_settlement_mode == x402SettlementMode.SETTLE_BATCH + assert model.x402_settlement_mode == x402SettlementMode.BATCH_HASHED assert model._llm_type == "opengradient" def test_initialization_custom_max_tokens(self, mock_client): @@ -47,9 +47,9 @@ def test_initialization_custom_settlement_mode(self, mock_client): model = OpenGradientChatModel( private_key="0x" + "a" * 64, model_cid=TEE_LLM.GPT_5, - x402_settlement_mode=x402SettlementMode.SETTLE, + x402_settlement_mode=x402SettlementMode.PRIVATE, ) - assert model.x402_settlement_mode == x402SettlementMode.SETTLE + assert model.x402_settlement_mode == x402SettlementMode.PRIVATE def test_identifying_params(self, model): """Test _identifying_params returns model name.""" @@ -215,7 +215,7 @@ def test_passes_correct_params_to_client(self, model, mock_client): stop_sequence=["END"], max_tokens=300, tools=[], - x402_settlement_mode=x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=x402SettlementMode.BATCH_HASHED, ) diff --git a/tests/opg_token_test.py b/tests/opg_token_test.py index 57865bf..e035880 100644 --- a/tests/opg_token_test.py +++ b/tests/opg_token_test.py @@ -252,9 +252,7 @@ def test_default_tx_hash_is_none(self): assert result.tx_hash is None def test_fields(self): - result = Permit2ApprovalResult( - allowance_before=0, allowance_after=500, tx_hash="0xabc" - ) + result = Permit2ApprovalResult(allowance_before=0, allowance_after=500, tx_hash="0xabc") assert result.allowance_before == 0 assert result.allowance_after == 500 assert result.tx_hash == "0xabc" diff --git a/tutorials/01-verifiable-ai-agent.md b/tutorials/01-verifiable-ai-agent.md index 51fad5e..a323e60 100644 --- a/tutorials/01-verifiable-ai-agent.md +++ b/tutorials/01-verifiable-ai-agent.md @@ -58,7 +58,7 @@ llm = og.agents.langchain_adapter( private_key=private_key, model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, max_tokens=500, - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) ``` @@ -223,9 +223,9 @@ is recorded on-chain: | Mode | What's Stored | Best For | |------|--------------|----------| -| `SETTLE` | Hashes of input and output only | **Privacy** -- proves execution happened without revealing content | -| `SETTLE_BATCH` | Batch hash of multiple inferences | **Cost efficiency** -- reduces per-call gas costs (default) | -| `SETTLE_METADATA` | Full model info, input, output, and metadata | **Transparency** -- complete auditability for compliance | +| `PRIVATE` | Hashes of input and output only | **Privacy** -- proves execution happened without revealing content | +| `BATCH_HASHED` | Batch hash of multiple inferences | **Cost efficiency** -- reduces per-call gas costs (default) | +| `INDIVIDUAL_FULL` | Full model info, input, output, and metadata | **Transparency** -- complete auditability for compliance | Choose based on your requirements: @@ -234,21 +234,21 @@ Choose based on your requirements: llm_dev = og.agents.langchain_adapter( private_key=os.environ["OG_PRIVATE_KEY"], model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) # For production financial applications -- full audit trail llm_prod = og.agents.langchain_adapter( private_key=os.environ["OG_PRIVATE_KEY"], model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, - x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA, + x402_settlement_mode=og.x402SettlementMode.INDIVIDUAL_FULL, ) # For privacy-sensitive applications -- minimal on-chain footprint llm_private = og.agents.langchain_adapter( private_key=os.environ["OG_PRIVATE_KEY"], model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, - x402_settlement_mode=og.x402SettlementMode.SETTLE, + x402_settlement_mode=og.x402SettlementMode.PRIVATE, ) ``` @@ -299,7 +299,7 @@ llm = og.agents.langchain_adapter( private_key=private_key, model_cid=og.TEE_LLM.GPT_4_1_2025_04_14, max_tokens=500, - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) # ── Standard tool ───────────────────────────────────────────────────────── @@ -356,5 +356,5 @@ if __name__ == "__main__": models deployed on OpenGradient. - **Read workflow results**: Use `og.alphasense.create_read_workflow_tool` to read from scheduled on-chain workflows that run models automatically. -- **Go to production**: Switch settlement mode to `SETTLE_METADATA` and store the +- **Go to production**: Switch settlement mode to `INDIVIDUAL_FULL` and store the payment hashes and transaction hashes for your compliance records. diff --git a/tutorials/02-streaming-multi-provider.md b/tutorials/02-streaming-multi-provider.md index fc4dafd..c588d17 100644 --- a/tutorials/02-streaming-multi-provider.md +++ b/tutorials/02-streaming-multi-provider.md @@ -163,9 +163,9 @@ privacy/cost/transparency trade-off: | Mode | On-Chain Data | Use Case | |------|--------------|----------| -| `SETTLE` | Input/output hashes only | **Privacy** -- prove execution without revealing content | -| `SETTLE_BATCH` | Batch digest of multiple calls | **Cost efficiency** -- lower gas per inference (default) | -| `SETTLE_METADATA` | Full model, input, output, metadata | **Transparency** -- complete audit trail | +| `PRIVATE` | Input/output hashes only | **Privacy** -- prove execution without revealing content | +| `BATCH_HASHED` | Batch digest of multiple calls | **Cost efficiency** -- lower gas per inference (default) | +| `INDIVIDUAL_FULL` | Full model, input, output, metadata | **Transparency** -- complete audit trail | ```python # Privacy-first: only hashes stored on-chain @@ -173,7 +173,7 @@ result_private = client.llm.chat( model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Sensitive query here."}], max_tokens=100, - x402_settlement_mode=og.x402SettlementMode.SETTLE, + x402_settlement_mode=og.x402SettlementMode.PRIVATE, ) print(f"Payment hash (SETTLE): {result_private.payment_hash}") @@ -182,18 +182,18 @@ result_batch = client.llm.chat( model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Regular query."}], max_tokens=100, - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) -print(f"Payment hash (SETTLE_BATCH): {result_batch.payment_hash}") +print(f"Payment hash (BATCH_HASHED): {result_batch.payment_hash}") # Full transparency: everything on-chain result_transparent = client.llm.chat( model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Auditable query."}], max_tokens=100, - x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA, + x402_settlement_mode=og.x402SettlementMode.INDIVIDUAL_FULL, ) -print(f"Payment hash (SETTLE_METADATA): {result_transparent.payment_hash}") +print(f"Payment hash (INDIVIDUAL_FULL): {result_transparent.payment_hash}") ``` All three calls return a `payment_hash` you can look up on-chain. The difference is @@ -308,9 +308,9 @@ print("\n") # ── Settlement modes ────────────────────────────────────────────────────── for mode_name, mode in [ - ("SETTLE", og.x402SettlementMode.SETTLE), - ("SETTLE_BATCH", og.x402SettlementMode.SETTLE_BATCH), - ("SETTLE_METADATA", og.x402SettlementMode.SETTLE_METADATA), + ("PRIVATE", og.x402SettlementMode.PRIVATE), + ("BATCH_HASHED", og.x402SettlementMode.BATCH_HASHED), + ("INDIVIDUAL_FULL", og.x402SettlementMode.INDIVIDUAL_FULL), ]: try: r = client.llm.chat( diff --git a/tutorials/03-verified-tool-calling.md b/tutorials/03-verified-tool-calling.md index c7caa49..685ffed 100644 --- a/tutorials/03-verified-tool-calling.md +++ b/tutorials/03-verified-tool-calling.md @@ -179,7 +179,7 @@ result = client.llm.chat( # "auto" lets the model decide whether to call a tool or respond with text. # "none" forces a text-only response. tool_choice="auto", - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) ``` @@ -225,7 +225,7 @@ def run_agent(client: og.Client, user_query: str) -> str: temperature=0.0, tools=TOOLS, tool_choice="auto", - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) except Exception as e: print(f" LLM call failed: {e}") @@ -376,7 +376,7 @@ def run_agent(user_query: str) -> str: result = client.llm.chat( model=og.TEE_LLM.GPT_5, messages=messages, max_tokens=600, temperature=0.0, tools=TOOLS, tool_choice="auto", - x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, + x402_settlement_mode=og.x402SettlementMode.BATCH_HASHED, ) except Exception as e: return f"Error: {e}" @@ -409,5 +409,5 @@ if __name__ == "__main__": even during multi-turn tool loops. See **Tutorial 2** for streaming basics. - **Use different providers**: Swap `og.TEE_LLM.GPT_5` for `CLAUDE_SONNET_4_6` or `GEMINI_2_5_FLASH` -- tool calling works across all providers. -- **Add settlement transparency**: Switch to `SETTLE_METADATA` to store the full +- **Add settlement transparency**: Switch to `INDIVIDUAL_FULL` to store the full tool-calling reasoning chain on-chain for audit purposes.