From 127a307aacdb6cedaed10e1eaf44571ad76b18f7 Mon Sep 17 00:00:00 2001 From: Patrick Moorhead Date: Wed, 7 Jan 2026 09:17:08 -0600 Subject: [PATCH 1/2] Add AIDP OpenAI Demo example This example demonstrates the NVIDIA AI Data Platform (AIDP) Retrieval API following the OpenAI Vector Store Search specification, exposed via MCP. Features: - OpenAI-compatible search_vector_store tool via MCP - NVIDIA NIMs for embeddings and LLM reasoning - Milvus vector database integration - Synthetic data generation using Nemotron LLM - Full ReAct agent workflow with NeMo Agent Toolkit Signed-off-by: Patrick Moorhead --- examples/aidp_openai_demo/README.md | 323 +++++++++++++++ examples/aidp_openai_demo/pyproject.toml | 23 ++ .../scripts/load_support_tickets.py | 376 ++++++++++++++++++ .../src/nat_aidp_openai_demo/__init__.py | 3 + .../nat_aidp_openai_demo/configs/workflow.yml | 44 ++ .../src/nat_aidp_openai_demo/server.py | 221 ++++++++++ 6 files changed, 990 insertions(+) create mode 100644 examples/aidp_openai_demo/README.md create mode 100644 examples/aidp_openai_demo/pyproject.toml create mode 100644 examples/aidp_openai_demo/scripts/load_support_tickets.py create mode 100644 examples/aidp_openai_demo/src/nat_aidp_openai_demo/__init__.py create mode 100644 examples/aidp_openai_demo/src/nat_aidp_openai_demo/configs/workflow.yml create mode 100644 examples/aidp_openai_demo/src/nat_aidp_openai_demo/server.py diff --git a/examples/aidp_openai_demo/README.md b/examples/aidp_openai_demo/README.md new file mode 100644 index 0000000..b84341b --- /dev/null +++ b/examples/aidp_openai_demo/README.md @@ -0,0 +1,323 @@ + + +# AIDP Retrieval API Demo with NVIDIA NIMs + +## Overview + +### The Problem + +Enterprise AI applications need a standardized way to access enterprise data stored in storage platforms. Without a unified interface: + +- **Custom integrations** - Each AI agent requires custom code to access storage +- **Inconsistent APIs** - Different storage vendors expose different interfaces +- **Tool fragmentation** - Tools built for one platform don't work with others +- **Security complexity** - Each integration needs its own authentication handling + +### The Solution + +This demo implements the **NVIDIA AI Data Platform (AIDP) Retrieval API** following the [OpenAI Vector Store Search specification](https://platform.openai.com/docs/api-reference/vector_stores/search). By exposing the Retrieval API via **Model Context Protocol (MCP)**, any MCP-compatible AI agent can seamlessly search enterprise data with a standardized interface. + +### How It Works + +The demo implements an **Agentic RAG (Retrieval-Augmented Generation)** system for searching support tickets: + +1. **User asks a question** via the chat UI or CLI (for example, "Find GPU memory issues") +2. **ReAct Agent reasons** about which tools to use +3. **MCP Tool executes** - `search_vector_store` performs semantic search +4. **NVIDIA NIMs process** the request using GPU-accelerated embeddings +5. **Agent synthesizes** the results into a coherent response + +### Component Selection + +| Component | Technology | Why This Choice | +|-----------|------------|-----------------| +| **Protocol** | MCP (`Streamable HTTP`) | Open standard with auth support, works with any MCP client | +| **Agent Framework** | NeMo Agent Toolkit | Native MCP server/client, YAML config, production-ready | +| **Vector Database** | Milvus | GPU-accelerated, scales to billions of vectors | +| **Embeddings** | `nvidia/nv-embedqa-e5-v5` | High-quality 1024-dim embeddings optimized for Q&A retrieval | +| **LLM** | `meta/llama-3.1-70b-instruct` | Strong reasoning for agent orchestration and response generation | +| **API Spec** | OpenAI Vector Store Search | Industry standard for AI platform APIs | + +--- + +## Table of Contents + +- [Overview](#overview) +- [Key Features](#key-features) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Installation and Setup](#installation-and-setup) +- [Running the Demo](#running-the-demo) +- [NVIDIA NIMs Used](#nvidia-nims-used) +- [The Tool](#the-tool) +- [Sample Queries](#sample-queries) +- [OpenAI API Alignment](#openai-api-alignment) +- [Customization Guide](#customization-guide) + +--- + +## Key Features + +- **OpenAI-Compatible API**: Implements the OpenAI Vector Store Search specification +- **MCP Protocol**: Tools exposed via standardized Model Context Protocol for interoperability +- **NVIDIA NIMs Integration**: Uses NVIDIA NIMs for embedding and LLM reasoning +- **Agentic RAG**: ReAct agent orchestrating search operations with tool calling +- **Vector Search**: Semantic similarity search using Milvus vector database +- **YAML-based Configuration**: Fully configurable workflow through YAML files + +--- + +## Architecture + +This demo uses a 3-terminal architecture: + +1. **AIDP MCP Server** (`python src/nat_aidp_openai_demo/server.py`): Exposes `search_vector_store` via MCP +2. **NAT UI Server** (`nat serve`): Acts as MCP client, provides API for the UI +3. **NAT UI**: Frontend that users interact with + +``` +┌─────────────┐ REST ┌─────────────────┐ +│ NAT UI │ ◄──────────────────► │ NAT UI Server │ +│ (Browser) │ Port 3000 │ (MCP Client) │ +└─────────────┘ └────────┬────────┘ + │ Port 8000 + MCP Protocol + (Streamable-HTTP) + │ + ┌────────▼────────┐ + │ AIDP MCP Server│ + │ Port 8081 │ + │ search_vector_ │ + │ store │ + └────────┬────────┘ + │ + ┌────────────────┼────────────────┐ + │ │ │ + ┌───────▼──────┐ ┌──────▼──────┐ ┌──────▼──────┐ + │ Embedding NIM│ │ LLM NIM │ │ Milvus │ + │ (API.NVIDIA)│ │ (API.NVIDIA)│ │ Port 19530 │ + └──────────────┘ └─────────────┘ └─────────────┘ +``` + +--- + +## Prerequisites + +- Docker (for Milvus vector database) +- Python 3.11+ +- NVIDIA API key from [build.nvidia.com](https://build.nvidia.com) +- Node.js (for UI) + +--- + +## Installation and Setup + +### Set Up API Keys + +```bash +export NVIDIA_API_KEY= +``` + +### Start Milvus Vector Database + +```bash +# Download the Milvus standalone docker-compose file +curl -sfL https://github.com/milvus-io/milvus/releases/download/v2.4.0/milvus-standalone-docker-compose.yml -o docker-compose.yml + +# Start Milvus +docker compose up -d +``` + +### Load Sample Data + +```bash +python scripts/load_support_tickets.py +``` + +Expected output: +``` +Creating collection: support_tickets with explicit schema +Collection 'support_tickets' created successfully +Inserted 10 tickets with NIM embeddings +Test search for 'GPU memory' returned 3 results +``` + +--- + +## Running the Demo + +### Terminal 1: Start AIDP MCP Server + +```bash +export NVIDIA_API_KEY= +python src/nat_aidp_openai_demo/server.py +``` + +### Terminal 2: Start NAT UI Server + +```bash +export NVIDIA_API_KEY= +nat serve --config_file src/nat_aidp_openai_demo/configs/workflow.yml --port 8000 +``` + +### Terminal 3: Start UI + +```bash +cd external/nat-ui +npm run dev +``` + +### Open Browser + +Navigate to: http://localhost:3000 + +**Alternative: Command Line** + +```bash +nat run --config_file src/nat_aidp_openai_demo/configs/workflow.yml --input "Find GPU memory issues" +``` + +--- + +## NVIDIA NIMs Used + +| NIM | Purpose | Model | +|-----|---------|-------| +| **Embedding** | Generate vector embeddings for semantic search | `nvidia/nv-embedqa-e5-v5` | +| **LLM** | Agent reasoning and response generation | `meta/llama-3.1-70b-instruct` | + +--- + +## The Tool + +### `search_vector_store` + +Semantic search following the AIDP Retrieval API (OpenAI specification). + +| Parameter | Type | Description | Default | +|-----------|------|-------------|---------| +| `query` | string | Search query (required) | - | +| `vector_store_id` | string | Vector store name | `support_tickets` | +| `max_num_results` | integer | Results limit (1-50) | `10` | +| `filter_key` | string | Attribute to filter by | `null` | +| `filter_type` | string | Filter type: `eq`, `ne`, `contains` | `null` | +| `filter_value` | string | Value to match | `null` | +| `score_threshold` | float | Minimum similarity score | `null` | + +--- + +## Sample Queries + +Try these queries in the UI: + +- "Find GPU memory issues" +- "Show me critical severity tickets" +- "What CUDA errors have been reported?" +- "Find driver crash issues" +- "Show resolved tickets about performance" + +--- + +## OpenAI API Alignment + +The AIDP Retrieval API follows the [OpenAI Vector Store Search specification](https://platform.openai.com/docs/api-reference/vector_stores/search): + +### Endpoint + +``` +POST /v1/vector_stores/{vector_store_id}/search +``` + +### Response Format + +```json +{ + "object": "vector_store.search_results.page", + "search_query": "GPU memory issues", + "data": [ + { + "file_id": "d1649d77-e043-45e5-b426-9b8b7c2856f2", + "filename": "CUDA out of memory error.txt", + "score": 0.4976, + "attributes": { + "category": "Memory Problems", + "severity": "high", + "title": "CUDA out of memory error with large batch sizes" + }, + "content": [ + { + "type": "text", + "text": "Training transformer model with batch size 64..." + } + ] + } + ], + "has_more": false, + "next_page": null +} +``` + +### Alignment Table + +| OpenAI Spec | AIDP Implementation | +|-------------|---------------------| +| `query` (required) | ✅ Implemented | +| `filters` (key/type/value) | ✅ Implemented | +| `max_num_results` (1-50) | ✅ Implemented | +| `ranking_options` | ✅ Implemented | +| Response: `file_id`, `filename`, `score` | ✅ Identical | +| Response: `attributes`, `content[]` | ✅ Identical | +| Bearer token authentication | ✅ Implemented | + +--- + +## Customization Guide + +### Adding New Fields + +1. Update the Milvus schema in `scripts/load_support_tickets.py` +2. Add the field to `output_fields` in `src/nat_aidp_openai_demo/server.py` +3. Include the field in the response `attributes` object + +### Using Different Models + +Update `src/nat_aidp_openai_demo/configs/workflow.yml`: + +```yaml +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.3-70b-instruct # Change model here + temperature: 0 + max_tokens: 512 +``` + +### Connecting to Different Vector Stores + +Set the environment variable: + +```bash +export MILVUS_URI="http://your-milvus-host:19530" +``` + +--- + +## Files + +| File | Purpose | +|------|---------| +| `src/nat_aidp_openai_demo/server.py` | MCP server exposing `search_vector_store` tool | +| `src/nat_aidp_openai_demo/configs/workflow.yml` | NeMo Agent Toolkit workflow configuration | +| `scripts/load_support_tickets.py` | Data loading script for Milvus | + +--- + +## References + +- [OpenAI Vector Store Search API](https://platform.openai.com/docs/api-reference/vector_stores/search) +- [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) +- [NeMo Agent Toolkit](https://github.com/NVIDIA/NeMo-Agent-Toolkit) + diff --git a/examples/aidp_openai_demo/pyproject.toml b/examples/aidp_openai_demo/pyproject.toml new file mode 100644 index 0000000..119ce9b --- /dev/null +++ b/examples/aidp_openai_demo/pyproject.toml @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[project] +name = "nat_aidp_openai_demo" +version = "0.1.0" +description = "AIDP Retrieval API Demo with NVIDIA NIMs - OpenAI Vector Store Search specification via MCP" +readme = "README.md" +requires-python = ">=3.11" + +dependencies = [ + "nvidia-nat[langchain,mcp]>=1.4.0a0,<1.5.0", + "pymilvus~=2.6", + "fastmcp", + "requests", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "ruff", +] + diff --git a/examples/aidp_openai_demo/scripts/load_support_tickets.py b/examples/aidp_openai_demo/scripts/load_support_tickets.py new file mode 100644 index 0000000..abae4f4 --- /dev/null +++ b/examples/aidp_openai_demo/scripts/load_support_tickets.py @@ -0,0 +1,376 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +#!/usr/bin/env python3 +""" +Seed Milvus with synthetic support tickets using NVIDIA NIMs. + +This script: +1. Generates synthetic support tickets using NVIDIA Nemotron LLM +2. Embeds the tickets using NVIDIA NIM embeddings +3. Stores them in a Milvus collection for RAG retrieval +""" + +import os +import json +import requests +from typing import List, Dict +from pymilvus import MilvusClient, DataType + +# Configuration +MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530") +NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") +COLLECTION_NAME = "support_tickets" +EMBEDDING_MODEL = "nvidia/nv-embedqa-e5-v5" +LLM_MODEL = "nvidia/llama-3.1-nemotron-70b-instruct" +EMBEDDING_DIM = 1024 # Dimension for nv-embedqa-e5-v5 + +# NVIDIA API endpoints +NVIDIA_EMBED_URL = "https://integrate.api.nvidia.com/v1/embeddings" +NVIDIA_LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions" + + +def get_nvidia_headers(): + """Get headers for NVIDIA API requests.""" + if not NVIDIA_API_KEY: + raise ValueError("NVIDIA_API_KEY environment variable is not set. " + "Get one from https://build.nvidia.com") + return { + "Authorization": f"Bearer {NVIDIA_API_KEY}", + "Content-Type": "application/json" + } + + +def generate_embeddings(texts: List[str]) -> List[List[float]]: + """Generate embeddings using NVIDIA NIM.""" + headers = get_nvidia_headers() + + payload = { + "input": texts, + "model": EMBEDDING_MODEL, + "input_type": "passage", + "encoding_format": "float", + "truncate": "END" + } + + response = requests.post(NVIDIA_EMBED_URL, headers=headers, json=payload) + response.raise_for_status() + + result = response.json() + embeddings = [item["embedding"] for item in result["data"]] + return embeddings + + +def generate_synthetic_tickets(num_tickets: int = 50) -> List[Dict]: + """Generate synthetic support tickets using NVIDIA Nemotron LLM.""" + headers = get_nvidia_headers() + + categories = [ + "GPU Hardware", "CUDA Software", "Driver Issues", "Memory Problems", + "Performance Optimization", "Container/Docker", "Multi-GPU Setup", + "TensorRT", "cuDNN", "NVIDIA NIM", "DeepStream", "Triton Inference" + ] + + prompt = f"""Generate {num_tickets} diverse technical support tickets for an NVIDIA data center platform. +Each ticket should be a realistic IT support issue that a data center administrator might encounter. + +For each ticket, provide: +- title: A concise summary of the issue (max 100 chars) +- content: Detailed description including symptoms, error messages, and context (200-500 chars) +- category: One of these categories: {', '.join(categories)} +- severity: critical, high, medium, or low +- status: open, in_progress, or resolved + +Return as a JSON array with exactly {num_tickets} tickets. Only output valid JSON, no markdown. + +Example format: +[ + {{ + "title": "GPU driver crash on reboot after kernel update", + "content": "After updating to kernel 5.15, the NVIDIA driver fails to load on system reboot. dmesg shows 'NVRM: GPU at 0000:01:00.0 has fallen off the bus'. System: DGX A100, Driver: 535.129.03. Issue occurs consistently after warm reboot but not after cold boot.", + "category": "Driver Issues", + "severity": "critical", + "status": "open" + }} +]""" + + payload = { + "model": LLM_MODEL, + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.8, + "max_tokens": 8000 + } + + print(f"Generating {num_tickets} synthetic support tickets using {LLM_MODEL}...") + response = requests.post(NVIDIA_LLM_URL, headers=headers, json=payload) + response.raise_for_status() + + result = response.json() + content = result["choices"][0]["message"]["content"] + + # Parse the JSON response + try: + # Clean up potential markdown formatting + if "```json" in content: + content = content.split("```json")[1].split("```")[0] + elif "```" in content: + content = content.split("```")[1].split("```")[0] + + tickets = json.loads(content.strip()) + print(f"Successfully generated {len(tickets)} tickets") + return tickets + except json.JSONDecodeError as e: + print(f"Failed to parse LLM response as JSON: {e}") + print(f"Response content: {content[:500]}...") + # Return fallback tickets + return get_fallback_tickets() + + +def get_fallback_tickets() -> List[Dict]: + """Return fallback tickets if LLM generation fails.""" + return [ + { + "title": "GPU driver crash on reboot after kernel update", + "content": "After updating to kernel 5.15, the NVIDIA driver fails to load on system reboot. dmesg shows 'NVRM: GPU at 0000:01:00.0 has fallen off the bus'. System: DGX A100, Driver: 535.129.03. Issue occurs consistently after warm reboot but not after cold boot.", + "category": "Driver Issues", + "severity": "critical", + "status": "open" + }, + { + "title": "CUDA out of memory error with large batch sizes", + "content": "Training transformer model with batch size 64 causes 'CUDA out of memory' error on A100 80GB. nvidia-smi shows only 45GB used before crash. Suspecting memory fragmentation. Using PyTorch 2.1, CUDA 12.1. Issue started after upgrading from PyTorch 1.13.", + "category": "Memory Problems", + "severity": "high", + "status": "in_progress" + }, + { + "title": "Multi-GPU training hangs at NCCL initialization", + "content": "4-GPU training job freezes during NCCL backend initialization. No error messages, process just hangs. Using torchrun with NCCL 2.18.1. Single GPU training works fine. Network fabric is InfiniBand. Verified all GPUs visible via nvidia-smi.", + "category": "Multi-GPU Setup", + "severity": "high", + "status": "open" + }, + { + "title": "TensorRT engine build fails on INT8 quantization", + "content": "Building TensorRT engine with INT8 calibration fails with 'Calibration cache is not valid'. Using TensorRT 8.6.1, calibration dataset has 1000 samples. FP16 engine builds successfully. Model is ResNet-50 exported from PyTorch.", + "category": "TensorRT", + "severity": "medium", + "status": "open" + }, + { + "title": "Container GPU isolation not working in Kubernetes", + "content": "NVIDIA device plugin not properly isolating GPUs between pods. Pod A can see GPUs allocated to Pod B. Using k8s 1.28, nvidia-device-plugin 0.14.1. NVIDIA_VISIBLE_DEVICES environment variable is set correctly but nvidia-smi shows all 8 GPUs.", + "category": "Container/Docker", + "severity": "high", + "status": "in_progress" + }, + { + "title": "Triton Inference Server high latency spikes", + "content": "Periodic latency spikes of 500ms+ on Triton model serving. Normal latency is ~20ms. Happens every 30-60 seconds regardless of load. Using dynamic batching with max batch size 32. GPU utilization stays below 50%. No memory leaks detected.", + "category": "Triton Inference", + "severity": "medium", + "status": "open" + }, + { + "title": "cuDNN convolution algorithm selection crash", + "content": "Application crashes when cuDNN auto-tuner selects convolution algorithm. Error: 'CUDNN_STATUS_INTERNAL_ERROR'. Occurs randomly, not reproducible. cuDNN 8.9.5, CUDA 12.2. Workaround: setting CUDNN_BENCHMARK=0 but reduces performance.", + "category": "cuDNN", + "severity": "medium", + "status": "in_progress" + }, + { + "title": "DeepStream pipeline drops frames under load", + "content": "DeepStream 6.3 pipeline processing 16 RTSP streams drops frames when CPU load exceeds 60%. Using nvinfer with custom YOLOv8 model. GPU utilization is only 40%. Suspect bottleneck in demuxer or pre-processing stages.", + "category": "DeepStream", + "severity": "medium", + "status": "open" + }, + { + "title": "NIM endpoint returns 503 during peak hours", + "content": "Self-hosted NIM endpoint becomes unavailable (503 errors) during peak traffic. Deployed on 2x A100 GPUs. Request rate: ~100 req/s. No GPU memory issues visible. Kubernetes HPA not scaling as expected. Suspect connection pool exhaustion.", + "category": "NVIDIA NIM", + "severity": "critical", + "status": "open" + }, + { + "title": "Performance regression after driver update to 545", + "content": "Training throughput dropped 15% after updating driver from 535 to 545. Same code, same hardware (H100 SXM). Verified CUDA version unchanged at 12.2. Power consumption is similar. Issue affects both PyTorch and TensorFlow workloads.", + "category": "Performance Optimization", + "severity": "high", + "status": "open" + } + ] + + +def create_collection(client: MilvusClient): + """Create the support_tickets collection in Milvus with explicit schema.""" + from pymilvus import DataType + + # Drop existing collection if it exists + if client.has_collection(COLLECTION_NAME): + print(f"Dropping existing collection: {COLLECTION_NAME}") + client.drop_collection(COLLECTION_NAME) + + # Create schema with explicit fields (required for langchain-milvus retriever) + print(f"Creating collection: {COLLECTION_NAME}") + schema = client.create_schema(auto_id=False, enable_dynamic_field=True) + schema.add_field("pk", DataType.VARCHAR, max_length=128, is_primary=True) + schema.add_field("vector", DataType.FLOAT_VECTOR, dim=EMBEDDING_DIM) + schema.add_field("text", DataType.VARCHAR, max_length=8192) # Required field for retriever + schema.add_field("title", DataType.VARCHAR, max_length=512) + schema.add_field("category", DataType.VARCHAR, max_length=128) + schema.add_field("severity", DataType.VARCHAR, max_length=64) + schema.add_field("status", DataType.VARCHAR, max_length=64) + + # Create index params + index_params = client.prepare_index_params() + index_params.add_index(field_name="vector", metric_type="L2", index_type="AUTOINDEX") + + # Create collection with schema + client.create_collection( + collection_name=COLLECTION_NAME, + schema=schema, + index_params=index_params + ) + print(f"Collection '{COLLECTION_NAME}' created successfully with explicit schema") + + +def insert_tickets_with_random_embeddings(client: MilvusClient, tickets: List[Dict]): + """Insert tickets with random embeddings (for demo without API key).""" + import random + + print(f"Generating random embeddings for {len(tickets)} tickets (demo mode)...") + + # Prepare data for insertion with random vectors + import uuid + data = [] + for ticket in tickets: + # Generate random normalized vector for demo + random_vec = [random.gauss(0, 1) for _ in range(EMBEDDING_DIM)] + norm = sum(x*x for x in random_vec) ** 0.5 + normalized_vec = [x / norm for x in random_vec] + + data.append({ + "pk": str(uuid.uuid4()), # String primary key + "vector": normalized_vec, + "title": ticket["title"][:256], + "text": f"{ticket['title']}. {ticket['content']}"[:4096], # Use 'text' field for retriever + "category": ticket.get("category", "General")[:64], + "severity": ticket.get("severity", "medium")[:32], + "status": ticket.get("status", "open")[:32] + }) + + print(f"Inserting {len(data)} tickets into Milvus...") + result = client.insert(collection_name=COLLECTION_NAME, data=data) + print(f"Inserted {result['insert_count']} tickets successfully") + return result + + +def insert_tickets(client: MilvusClient, tickets: List[Dict]): + """Insert tickets with embeddings into Milvus.""" + # Combine title and content for embedding + texts = [f"{t['title']}. {t['content']}" for t in tickets] + + print(f"Generating embeddings for {len(texts)} tickets...") + embeddings = generate_embeddings(texts) + + # Prepare data for insertion + import uuid + data = [] + for i, (ticket, embedding) in enumerate(zip(tickets, embeddings)): + data.append({ + "pk": str(uuid.uuid4()), # String primary key + "vector": embedding, + "title": ticket["title"][:256], # Truncate to fit schema + "text": f"{ticket['title']}. {ticket['content']}"[:4096], # Use 'text' field for retriever + "category": ticket.get("category", "General")[:64], + "severity": ticket.get("severity", "medium")[:32], + "status": ticket.get("status", "open")[:32] + }) + + print(f"Inserting {len(data)} tickets into Milvus...") + result = client.insert(collection_name=COLLECTION_NAME, data=data) + print(f"Inserted {result['insert_count']} tickets successfully") + return result + + +def test_search(client: MilvusClient, query: str): + """Test searching the collection.""" + print(f"\nTesting search with query: '{query}'") + + # Generate query embedding + query_embedding = generate_embeddings([query])[0] + + # Search + results = client.search( + collection_name=COLLECTION_NAME, + data=[query_embedding], + limit=3, + output_fields=["title", "text", "category", "severity"] + ) + + print(f"Found {len(results[0])} results:") + for i, hit in enumerate(results[0]): + print(f"\n {i+1}. Score: {hit['distance']:.4f}") + print(f" Title: {hit['entity']['title']}") + print(f" Category: {hit['entity']['category']}") + print(f" Severity: {hit['entity']['severity']}") + + +def main(): + """Main function to seed Milvus with support tickets.""" + print("=" * 60) + print("AIDP Support Tickets Seeder") + print("=" * 60) + + # Check for API key + use_synthetic = True + if not NVIDIA_API_KEY: + print("\nWarning: NVIDIA_API_KEY environment variable is not set.") + print("Get an API key from https://build.nvidia.com") + print("\nProceeding with fallback data (no embeddings - using random vectors)...") + use_synthetic = False + + # Connect to Milvus + print(f"\nConnecting to Milvus at {MILVUS_URI}...") + client = MilvusClient(uri=MILVUS_URI) + print("Connected successfully!") + + # Create collection + create_collection(client) + + # Generate or load tickets + if use_synthetic: + try: + tickets = generate_synthetic_tickets(num_tickets=30) + except Exception as e: + print(f"LLM generation failed: {e}") + print("Using fallback tickets...") + tickets = get_fallback_tickets() + # Insert with real embeddings + insert_tickets(client, tickets) + else: + # Use fallback data with random embeddings for demo purposes + tickets = get_fallback_tickets() + insert_tickets_with_random_embeddings(client, tickets) + + # Test search (only works properly with real embeddings) + if use_synthetic: + test_search(client, "GPU driver issues after kernel update") + test_search(client, "memory problems during training") + test_search(client, "container isolation Kubernetes") + else: + print("\nNote: Search test skipped (requires NVIDIA_API_KEY for proper embeddings)") + print("Set NVIDIA_API_KEY and re-run to enable semantic search.") + + print("\n" + "=" * 60) + print("Seeding complete!") + print(f"Collection '{COLLECTION_NAME}' now has data ready for RAG queries.") + print("=" * 60) + + return 0 + + +if __name__ == "__main__": + exit(main()) + diff --git a/examples/aidp_openai_demo/src/nat_aidp_openai_demo/__init__.py b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/__init__.py new file mode 100644 index 0000000..5e3787e --- /dev/null +++ b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + diff --git a/examples/aidp_openai_demo/src/nat_aidp_openai_demo/configs/workflow.yml b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/configs/workflow.yml new file mode 100644 index 0000000..9c8bf7a --- /dev/null +++ b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/configs/workflow.yml @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# AIDP Retrieval API Demo - MCP Client Workflow +# +# This workflow demonstrates an AI agent using the AIDP Retrieval API +# via MCP (Model Context Protocol) following the AIDP Storage API Proposal. +# +# Usage: +# # First, start the AIDP MCP Server: +# python src/nat_aidp_openai_demo/server.py +# +# # Then run the agent: +# nat run --config_file src/nat_aidp_openai_demo/configs/workflow.yml \ +# --input "Find GPU memory issues" + +# MCP Client connection to AIDP server +function_groups: + aidp: + _type: mcp_client + server: + transport: streamable-http + url: "http://localhost:8081/mcp" + include: + - search_vector_store + +# LLM configuration - NVIDIA NIM +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.1-70b-instruct + temperature: 0 + max_tokens: 512 + +# Agent workflow +workflow: + _type: react_agent + tool_names: + - aidp + llm_name: nim_llm + verbose: true + retry_parsing_errors: true + max_retries: 2 + diff --git a/examples/aidp_openai_demo/src/nat_aidp_openai_demo/server.py b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/server.py new file mode 100644 index 0000000..7566faa --- /dev/null +++ b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/server.py @@ -0,0 +1,221 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +#!/usr/bin/env python3 +""" +AIDP Retrieval API - MCP HTTP Server + +This MCP server exposes the AIDP Retrieval API over HTTP using the +streamable-http transport, allowing NeMo Agent Toolkit to connect +as an MCP client. + +Implements the OpenAI Vector Store Search specification: + POST /v1/vector_stores/{vector_store_id}/search + +MCP Tool: search_vector_store + +Usage: + python server.py + + Then configure NAT to connect: + function_groups: + aidp: + _type: mcp_client + server: + transport: streamable-http + url: "http://localhost:8081/mcp" +""" + +import os +import json +import logging +from typing import Optional + +from fastmcp import FastMCP + +from pymilvus import MilvusClient +import requests + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("aidp-mcp-http-server") + +# Configuration +MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530") +NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "") +NVIDIA_EMBED_URL = "https://integrate.api.nvidia.com/v1/embeddings" +NVIDIA_EMBED_MODEL = "nvidia/nv-embedqa-e5-v5" +MCP_PORT = int(os.getenv("MCP_PORT", "8081")) + +# Initialize FastMCP server with port configuration +mcp = FastMCP( + "AIDP Retrieval API", + host="0.0.0.0", + port=MCP_PORT +) + +# Milvus client (lazy initialization) +_milvus_client = None + + +def get_milvus_client(): + """Get or create Milvus client.""" + global _milvus_client + if _milvus_client is None: + _milvus_client = MilvusClient(uri=MILVUS_URI) + logger.info(f"Connected to Milvus at {MILVUS_URI}") + return _milvus_client + + +def get_embedding(text: str) -> list[float]: + """Get embedding vector using NVIDIA NIM.""" + if not NVIDIA_API_KEY: + raise ValueError("NVIDIA_API_KEY not set") + + response = requests.post( + NVIDIA_EMBED_URL, + headers={ + "Authorization": f"Bearer {NVIDIA_API_KEY}", + "Content-Type": "application/json" + }, + json={ + "model": NVIDIA_EMBED_MODEL, + "input": [text], + "input_type": "query", + "encoding_format": "float", + "truncate": "END" + }, + timeout=30 + ) + response.raise_for_status() + return response.json()["data"][0]["embedding"] + + +@mcp.tool() +def search_vector_store( + query: str, + vector_store_id: str = "support_tickets", + max_num_results: int = 10, + filter_key: Optional[str] = None, + filter_type: Optional[str] = None, + filter_value: Optional[str] = None, + score_threshold: Optional[float] = None +) -> str: + """ + Search a vector store for relevant chunks based on a query. + + This tool implements the AIDP Retrieval API following the OpenAI specification. + Use this to search the NVIDIA AI Data Platform for support tickets or other content. + + Args: + query: A query string for search (required) + vector_store_id: The ID of the vector store to search (default: support_tickets) + max_num_results: Maximum number of results to return, 1-50 (default: 10) + filter_key: Optional filter attribute key (e.g., "severity", "category") + filter_type: Optional filter type: "eq", "ne", "contains" + filter_value: Optional filter value to match + score_threshold: Optional minimum score threshold for results + + Returns: + JSON string with search results in OpenAI format + """ + logger.info(f"search_vector_store called: query='{query}', store='{vector_store_id}'") + + try: + client = get_milvus_client() + + # Check if collection exists + if not client.has_collection(vector_store_id): + return json.dumps({ + "error": {"code": 404, "message": f"Vector store '{vector_store_id}' not found"} + }) + + # Get query embedding + query_embedding = get_embedding(query) + + # Validate max_num_results + max_num_results = max(1, min(50, max_num_results)) + + # Search Milvus + search_results = client.search( + collection_name=vector_store_id, + data=[query_embedding], + limit=max_num_results, + output_fields=["pk", "title", "text", "category", "severity", "status"] + ) + + # Transform to OpenAI format + data = [] + for hits in search_results: + for hit in hits: + entity = hit.get("entity", {}) + distance = hit.get("distance", 0) + similarity_score = 1 / (1 + distance) + + # Apply score threshold + if score_threshold and similarity_score < score_threshold: + continue + + # Apply filters + if filter_key and filter_type and filter_value: + attr_value = entity.get(filter_key, "") + if filter_type == "eq" and attr_value != filter_value: + continue + elif filter_type == "ne" and attr_value == filter_value: + continue + elif filter_type == "contains" and filter_value not in str(attr_value): + continue + + result = { + "file_id": entity.get("pk", ""), + "filename": f"{entity.get('title', 'untitled')}.txt", + "score": round(similarity_score, 4), + "attributes": { + "category": entity.get("category", ""), + "severity": entity.get("severity", ""), + "status": entity.get("status", ""), + "title": entity.get("title", "") + }, + "content": [ + { + "type": "text", + "text": entity.get("text", "") + } + ] + } + data.append(result) + + # Build OpenAI-compatible response + response = { + "object": "vector_store.search_results.page", + "search_query": query, + "data": data, + "has_more": False, + "next_page": None + } + + logger.info(f"Returning {len(data)} results") + return json.dumps(response, indent=2) + + except Exception as e: + logger.error(f"Search error: {e}") + return json.dumps({ + "error": {"code": 500, "message": str(e)} + }) + + +if __name__ == "__main__": + print(f""" +╔══════════════════════════════════════════════════════════════════════════════╗ +║ AIDP MCP Server (HTTP Transport) ║ +║ Following OpenAI Specification ║ +╠══════════════════════════════════════════════════════════════════════════════╣ +║ MCP Endpoint: http://localhost:{MCP_PORT}/mcp ║ +║ Tool: search_vector_store ║ +║ Milvus: {MILVUS_URI:<55} ║ +╚══════════════════════════════════════════════════════════════════════════════╝ +""") + + # Run the MCP server with HTTP transport + mcp.run(transport="streamable-http") + From 022d366a96f0995b64bc2b08ee42a570f9250a46 Mon Sep 17 00:00:00 2001 From: Patrick Moorhead Date: Wed, 7 Jan 2026 09:23:21 -0600 Subject: [PATCH 2/2] Add REST API server and examples - rest_api.py: OpenAI-compatible REST endpoint - examples.py: Comprehensive API usage examples - Updated pyproject.toml with fastapi/uvicorn deps - Updated README with complete file list Signed-off-by: Patrick Moorhead --- examples/aidp_openai_demo/README.md | 2 + examples/aidp_openai_demo/pyproject.toml | 2 + .../src/nat_aidp_openai_demo/examples.py | 390 ++++++++++++++++ .../src/nat_aidp_openai_demo/rest_api.py | 420 ++++++++++++++++++ 4 files changed, 814 insertions(+) create mode 100644 examples/aidp_openai_demo/src/nat_aidp_openai_demo/examples.py create mode 100644 examples/aidp_openai_demo/src/nat_aidp_openai_demo/rest_api.py diff --git a/examples/aidp_openai_demo/README.md b/examples/aidp_openai_demo/README.md index b84341b..279d93c 100644 --- a/examples/aidp_openai_demo/README.md +++ b/examples/aidp_openai_demo/README.md @@ -310,6 +310,8 @@ export MILVUS_URI="http://your-milvus-host:19530" | File | Purpose | |------|---------| | `src/nat_aidp_openai_demo/server.py` | MCP server exposing `search_vector_store` tool | +| `src/nat_aidp_openai_demo/rest_api.py` | REST API server (OpenAI-compatible endpoint) | +| `src/nat_aidp_openai_demo/examples.py` | Comprehensive API usage examples | | `src/nat_aidp_openai_demo/configs/workflow.yml` | NeMo Agent Toolkit workflow configuration | | `scripts/load_support_tickets.py` | Data loading script for Milvus | diff --git a/examples/aidp_openai_demo/pyproject.toml b/examples/aidp_openai_demo/pyproject.toml index 119ce9b..e1a49d5 100644 --- a/examples/aidp_openai_demo/pyproject.toml +++ b/examples/aidp_openai_demo/pyproject.toml @@ -12,6 +12,8 @@ dependencies = [ "nvidia-nat[langchain,mcp]>=1.4.0a0,<1.5.0", "pymilvus~=2.6", "fastmcp", + "fastapi", + "uvicorn", "requests", ] diff --git a/examples/aidp_openai_demo/src/nat_aidp_openai_demo/examples.py b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/examples.py new file mode 100644 index 0000000..f3c2ca2 --- /dev/null +++ b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/examples.py @@ -0,0 +1,390 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +#!/usr/bin/env python3 +""" +================================================================================ +AIDP Retrieval API Example +================================================================================ + +This example demonstrates the NVIDIA AI Data Platform (AIDP) Retrieval API +and its alignment with OpenAI-compliant search endpoints. + +AIDP API Specification: +----------------------- +The AIDP Retrieval API follows the OpenAI Vector Store Search API specification: +https://platform.openai.com/docs/api-reference/vector_stores/search + +Endpoint: + POST /v1/vector_stores/{vector_store_id}/search + +Request Body: + { + "query": "search query string", + "filters": {"key": "...", "type": "eq", "value": "..."}, + "max_num_results": 10, + "ranking_options": {"ranker": "auto", "score_threshold": 0.5}, + "rewrite_query": false + } + +Response Body (OpenAI Format): + { + "object": "vector_store.search_results.page", + "search_query": "...", + "data": [ + { + "file_id": "...", + "filename": "...", + "score": 0.95, + "attributes": {...}, + "content": [{"type": "text", "text": "..."}] + } + ], + "has_more": false, + "next_page": null + } + +Usage: + # Start the AIDP REST API server first: + python rest_api.py & + + # Then run this example: + python examples.py +""" + +import os +import json +import requests +from typing import Optional + +# Configuration +AIDP_API_BASE = os.getenv("AIDP_API_BASE", "http://localhost:8080") +API_KEY = os.getenv("NVIDIA_API_KEY", "demo-api-key") + + +def print_header(title: str): + """Print a formatted header.""" + print("\n" + "=" * 80) + print(f" {title}") + print("=" * 80) + + +def print_subheader(title: str): + """Print a formatted subheader.""" + print(f"\n{'─' * 40}") + print(f" {title}") + print(f"{'─' * 40}") + + +def search_vector_store( + vector_store_id: str, + query: str, + max_num_results: int = 10, + filters: Optional[dict] = None, + ranking_options: Optional[dict] = None +) -> dict: + """ + Call the AIDP Retrieval API following OpenAI specification. + + This function demonstrates the exact API call format as specified + in the AIDP Storage API Proposal. + + Endpoint: POST /v1/vector_stores/{vector_store_id}/search + + Args: + vector_store_id: The ID of the vector store (collection) to search + query: A query string for search (required) + max_num_results: Maximum results to return, 1-50 (default: 10) + filters: Optional filters based on file attributes + ranking_options: Optional ranking configuration + + Returns: + OpenAI-compatible search response + """ + url = f"{AIDP_API_BASE}/v1/vector_stores/{vector_store_id}/search" + + # Build request body (OpenAI format) + request_body = { + "query": query, + "max_num_results": max_num_results + } + + if filters: + request_body["filters"] = filters + + if ranking_options: + request_body["ranking_options"] = ranking_options + + # Make the API call with Bearer token authentication + response = requests.post( + url, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {API_KEY}" # AIDP uses Bearer token auth + }, + json=request_body, + timeout=30 + ) + + response.raise_for_status() + return response.json() + + +def example_1_basic_search(): + """Example 1: Basic semantic search.""" + print_header("Example 1: Basic Semantic Search") + + print(""" +This example demonstrates a basic semantic search following the OpenAI API spec. + +API Call: + POST /v1/vector_stores/support_tickets/search + + Request Body: + { + "query": "GPU memory issues during training", + "max_num_results": 3 + } +""") + + result = search_vector_store( + vector_store_id="support_tickets", + query="GPU memory issues during training", + max_num_results=3 + ) + + print("Response (OpenAI Format):") + print(f" object: {result.get('object')}") + print(f" search_query: {result.get('search_query')}") + print(f" has_more: {result.get('has_more')}") + print(f"\n Results ({len(result.get('data', []))} found):") + + for i, item in enumerate(result.get("data", []), 1): + print(f"\n {i}. {item['attributes']['title']}") + print(f" file_id: {item['file_id']}") + print(f" score: {item['score']}") + print(f" category: {item['attributes']['category']}") + print(f" severity: {item['attributes']['severity']}") + + +def example_2_filtered_search(): + """Example 2: Search with attribute filters.""" + print_header("Example 2: Search with Attribute Filters") + + print(""" +This example demonstrates filtering by file attributes (severity=critical). + +API Call: + POST /v1/vector_stores/support_tickets/search + + Request Body: + { + "query": "GPU issues", + "max_num_results": 5, + "filters": { + "key": "severity", + "type": "eq", + "value": "critical" + } + } +""") + + result = search_vector_store( + vector_store_id="support_tickets", + query="GPU issues", + max_num_results=5, + filters={ + "key": "severity", + "type": "eq", + "value": "critical" + } + ) + + print("Response:") + print(f" Found {len(result.get('data', []))} critical issues:") + + for i, item in enumerate(result.get("data", []), 1): + print(f"\n {i}. {item['attributes']['title']}") + print(f" severity: {item['attributes']['severity']} ✓") + print(f" score: {item['score']}") + + +def example_3_category_filter(): + """Example 3: Filter by category.""" + print_header("Example 3: Filter by Category") + + print(""" +This example filters results by category. + +API Call: + POST /v1/vector_stores/support_tickets/search + + Request Body: + { + "query": "performance problems", + "filters": { + "key": "category", + "type": "eq", + "value": "Driver Issues" + } + } +""") + + result = search_vector_store( + vector_store_id="support_tickets", + query="performance problems", + max_num_results=5, + filters={ + "key": "category", + "type": "eq", + "value": "Driver Issues" + } + ) + + print("Response:") + print(f" Found {len(result.get('data', []))} Driver Issues:") + + for i, item in enumerate(result.get("data", []), 1): + print(f"\n {i}. {item['attributes']['title']}") + print(f" category: {item['attributes']['category']} ✓") + + +def example_4_full_response_format(): + """Example 4: Show complete OpenAI response format.""" + print_header("Example 4: Complete OpenAI Response Format") + + print(""" +This example shows the complete OpenAI-compatible response format +as specified in the AIDP Storage API Proposal. +""") + + result = search_vector_store( + vector_store_id="support_tickets", + query="container Kubernetes GPU", + max_num_results=2 + ) + + print("Full JSON Response:") + print(json.dumps(result, indent=2)) + + +def example_5_openai_alignment(): + """Example 5: OpenAI API alignment comparison.""" + print_header("Example 5: OpenAI API Alignment") + + print(""" +╔═══════════════════════════════════════════════════════════════════════════════╗ +║ AIDP API vs OpenAI API Alignment ║ +╠═══════════════════════════════════════════════════════════════════════════════╣ +║ ║ +║ OpenAI Specification AIDP Implementation ║ +║ ──────────────────────────────────── ──────────────────────────────── ║ +║ ║ +║ Endpoint: ║ +║ POST /v1/vector_stores/{id}/search ✓ Identical ║ +║ ║ +║ Request Parameters: ║ +║ • query (required) ✓ Implemented ║ +║ • max_num_results (1-50) ✓ Implemented ║ +║ • filters (key/type/value) ✓ Implemented ║ +║ • ranking_options ✓ Implemented ║ +║ • rewrite_query ✓ Implemented (placeholder) ║ +║ ║ +║ Response Format: ║ +║ • object: "vector_store.search..." ✓ Identical ║ +║ • search_query ✓ Identical ║ +║ • data[].file_id ✓ Identical ║ +║ • data[].filename ✓ Identical ║ +║ • data[].score ✓ Identical ║ +║ • data[].attributes ✓ Identical ║ +║ • data[].content[] ✓ Identical ║ +║ • has_more ✓ Identical ║ +║ • next_page ✓ Identical ║ +║ ║ +║ Authentication: ║ +║ Bearer token ✓ Implemented ║ +║ ║ +║ HTTP Status Codes: ║ +║ 200, 401, 404, 429, 500, 503 ✓ All implemented ║ +║ ║ +╚═══════════════════════════════════════════════════════════════════════════════╝ +""") + + +def main(): + """Run all examples.""" + print(""" +╔═══════════════════════════════════════════════════════════════════════════════╗ +║ ║ +║ NVIDIA AI Data Platform (AIDP) Retrieval API ║ +║ ║ +║ OpenAI-Compliant Search Endpoints ║ +║ ║ +╚═══════════════════════════════════════════════════════════════════════════════╝ + +This demonstration shows how the AIDP Retrieval API aligns with the OpenAI +Vector Store Search API specification, enabling seamless integration with +AI agents and enterprise applications. + +Reference: AIDP Storage API Proposal v0.2 +OpenAI Spec: https://platform.openai.com/docs/api-reference/vector_stores/search +""") + + try: + # Check if API is available + response = requests.get(f"{AIDP_API_BASE}/", timeout=5) + if response.status_code != 200: + raise Exception("API not available") + + print(f"✓ AIDP API Server running at {AIDP_API_BASE}") + + except Exception as e: + print(f""" +✗ AIDP API Server not running at {AIDP_API_BASE} + +Please start the server first: + python rest_api.py & + +Then run this example again. +""") + return + + # Run examples + try: + example_1_basic_search() + example_2_filtered_search() + example_3_category_filter() + example_4_full_response_format() + example_5_openai_alignment() + + print_header("Summary") + print(""" +The AIDP Retrieval API provides: + +1. ✓ OpenAI-Compatible Endpoint + POST /v1/vector_stores/{vector_store_id}/search + +2. ✓ Standard Request Format + query, filters, max_num_results, ranking_options + +3. ✓ OpenAI Response Schema + object, search_query, data[], has_more, next_page + +4. ✓ Bearer Token Authentication + As recommended in the AIDP Storage API Proposal + +5. ✓ MCP Integration + Exposes search_vector_store tool for AI agent discovery + +This alignment ensures that any AI application built for OpenAI's +Vector Store API can seamlessly integrate with NVIDIA AIDP storage. +""") + + except requests.exceptions.RequestException as e: + print(f"\n✗ API Error: {e}") + print("Please ensure the AIDP API server is running.") + + +if __name__ == "__main__": + main() + diff --git a/examples/aidp_openai_demo/src/nat_aidp_openai_demo/rest_api.py b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/rest_api.py new file mode 100644 index 0000000..48d5e96 --- /dev/null +++ b/examples/aidp_openai_demo/src/nat_aidp_openai_demo/rest_api.py @@ -0,0 +1,420 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +#!/usr/bin/env python3 +""" +AIDP Retrieval API - REST Server Implementation + +This REST server implements the NVIDIA AI Data Platform (AIDP) Retrieval API +following the OpenAI API specification exactly. + +Endpoint: POST /v1/vector_stores/{vector_store_id}/search + +Request Body: +{ + "query": "string", # Required + "filters": {...}, # Optional + "max_num_results": 10, # Optional (1-50) + "ranking_options": {...}, # Optional + "rewrite_query": false # Optional +} + +Response Body: +{ + "object": "vector_store.search_results.page", + "search_query": "...", + "data": [...], + "has_more": false, + "next_page": null +} + +Usage: + python rest_api.py + + # Then call: + curl -X POST http://localhost:8080/v1/vector_stores/support_tickets/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $NVIDIA_API_KEY" \ + -d '{"query": "GPU memory issues"}' +""" + +import os +import json +import logging +from typing import Optional, List, Any +from contextlib import asynccontextmanager + +from fastapi import FastAPI, HTTPException, Header, Path, Depends +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +import uvicorn + +from pymilvus import MilvusClient +import requests + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("aidp-rest-api") + +# Configuration +MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530") +NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "") +NVIDIA_EMBED_URL = "https://integrate.api.nvidia.com/v1/embeddings" +NVIDIA_EMBED_MODEL = "nvidia/nv-embedqa-e5-v5" +API_PORT = int(os.getenv("AIDP_API_PORT", "8080")) + + +# ============================================================================ +# Pydantic Models - Following OpenAI API Specification +# ============================================================================ + +class FilterSpec(BaseModel): + """Filter specification for attribute-based filtering.""" + key: str = Field(..., description="The attribute key to filter on") + type: str = Field("eq", description="Filter type: eq, ne, contains") + value: str = Field(..., description="The value to filter by") + + +class RankingOptions(BaseModel): + """Ranking options for search results.""" + ranker: str = Field("auto", description="Ranker to use") + score_threshold: Optional[float] = Field(None, description="Minimum score threshold") + + +class SearchRequest(BaseModel): + """ + Search request body following OpenAI API specification. + https://platform.openai.com/docs/api-reference/vector_stores/search + """ + query: str = Field(..., description="A query string for search") + filters: Optional[FilterSpec] = Field(None, description="Filters to apply based on file attributes") + max_num_results: int = Field(10, ge=1, le=50, description="Maximum number of results (1-50)") + ranking_options: Optional[RankingOptions] = Field(None, description="Ranking options for search") + rewrite_query: bool = Field(False, description="Whether to rewrite the query for vector search") + + +class ContentItem(BaseModel): + """Content item within a search result.""" + type: str = "text" + text: str + location: Optional[dict] = None + + +class SearchResultItem(BaseModel): + """Individual search result following OpenAI format.""" + file_id: str + filename: str + score: float + attributes: dict + content: List[ContentItem] + + +class SearchResponse(BaseModel): + """ + Search response body following OpenAI API specification. + https://platform.openai.com/docs/api-reference/vector_stores/search + """ + object: str = "vector_store.search_results.page" + search_query: str + data: List[SearchResultItem] + has_more: bool = False + next_page: Optional[str] = None + + +class ErrorResponse(BaseModel): + """Error response.""" + error: dict + + +# ============================================================================ +# Retrieval Service +# ============================================================================ + +class AIDPRetrievalService: + """AIDP Retrieval API service implementation.""" + + def __init__(self): + self.milvus_client = None + self.api_key = NVIDIA_API_KEY + + def connect(self): + """Connect to Milvus.""" + if self.milvus_client is None: + self.milvus_client = MilvusClient(uri=MILVUS_URI) + logger.info(f"Connected to Milvus at {MILVUS_URI}") + + def _get_embedding(self, text: str) -> List[float]: + """Get embedding vector using NVIDIA NIM.""" + if not self.api_key: + raise ValueError("NVIDIA_API_KEY not set. Get one from https://build.nvidia.com") + + response = requests.post( + NVIDIA_EMBED_URL, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + }, + json={ + "model": NVIDIA_EMBED_MODEL, + "input": [text], + "input_type": "query", + "encoding_format": "float", + "truncate": "END" + }, + timeout=30 + ) + response.raise_for_status() + return response.json()["data"][0]["embedding"] + + def search( + self, + vector_store_id: str, + request: SearchRequest + ) -> SearchResponse: + """ + Search vector store following OpenAI API specification. + """ + self.connect() + + logger.info(f"Searching '{vector_store_id}' with query: {request.query}") + + # Check if collection exists + if not self.milvus_client.has_collection(vector_store_id): + raise HTTPException( + status_code=404, + detail=f"Vector store '{vector_store_id}' not found" + ) + + # Get query embedding + query_embedding = self._get_embedding(request.query) + + # Search Milvus + search_results = self.milvus_client.search( + collection_name=vector_store_id, + data=[query_embedding], + limit=request.max_num_results, + output_fields=["pk", "title", "text", "category", "severity", "status"] + ) + + # Transform to OpenAI format + data = [] + for hits in search_results: + for hit in hits: + entity = hit.get("entity", {}) + + # Calculate similarity score from L2 distance + distance = hit.get("distance", 0) + similarity_score = 1 / (1 + distance) + + # Apply score threshold if specified + if request.ranking_options and request.ranking_options.score_threshold: + if similarity_score < request.ranking_options.score_threshold: + continue + + # Apply filters if specified + if request.filters: + attr_value = None + if request.filters.key == "category": + attr_value = entity.get("category", "") + elif request.filters.key == "severity": + attr_value = entity.get("severity", "") + elif request.filters.key == "status": + attr_value = entity.get("status", "") + + if attr_value: + if request.filters.type == "eq" and attr_value != request.filters.value: + continue + elif request.filters.type == "ne" and attr_value == request.filters.value: + continue + elif request.filters.type == "contains" and request.filters.value not in attr_value: + continue + + result = SearchResultItem( + file_id=entity.get("pk", ""), + filename=f"{entity.get('title', 'untitled')}.txt", + score=round(similarity_score, 4), + attributes={ + "category": entity.get("category", ""), + "severity": entity.get("severity", ""), + "status": entity.get("status", ""), + "title": entity.get("title", "") + }, + content=[ + ContentItem( + type="text", + text=entity.get("text", "") + ) + ] + ) + data.append(result) + + logger.info(f"Found {len(data)} results") + + return SearchResponse( + object="vector_store.search_results.page", + search_query=request.query, + data=data, + has_more=False, + next_page=None + ) + + +# Initialize service +retrieval_service = AIDPRetrievalService() + + +# ============================================================================ +# FastAPI Application +# ============================================================================ + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan handler.""" + logger.info("Starting AIDP Retrieval API Server...") + retrieval_service.connect() + yield + logger.info("Shutting down AIDP Retrieval API Server...") + + +app = FastAPI( + title="AIDP Retrieval API", + description="NVIDIA AI Data Platform Retrieval API following OpenAI specification", + version="0.2", + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# ============================================================================ +# Authentication +# ============================================================================ + +async def verify_api_key(authorization: Optional[str] = Header(None)): + """ + Verify Bearer token authentication. + API keys provided as Bearer tokens are the recommended authentication method. + """ + if not authorization: + raise HTTPException( + status_code=401, + detail="Invalid Authentication. Please provide Authorization header with Bearer token." + ) + + if not authorization.startswith("Bearer "): + raise HTTPException( + status_code=401, + detail="Invalid Authentication. Use 'Bearer ' format." + ) + + # For demo purposes, we accept any non-empty token + # In production, validate against your auth system + token = authorization.replace("Bearer ", "") + if not token: + raise HTTPException( + status_code=401, + detail="Incorrect API Key provided." + ) + + return token + + +# ============================================================================ +# API Endpoints +# ============================================================================ + +@app.get("/") +async def root(): + """API root - health check.""" + return { + "name": "AIDP Retrieval API", + "version": "0.2", + "status": "healthy", + "spec": "OpenAI Vector Store Search API" + } + + +@app.get("/v1/vector_stores") +async def list_vector_stores(api_key: str = Depends(verify_api_key)): + """List available vector stores.""" + retrieval_service.connect() + collections = retrieval_service.milvus_client.list_collections() + return { + "object": "list", + "data": [ + {"id": name, "object": "vector_store"} + for name in collections + ] + } + + +@app.post( + "/v1/vector_stores/{vector_store_id}/search", + response_model=SearchResponse, + responses={ + 200: {"description": "Search results"}, + 401: {"description": "Invalid Authentication"}, + 404: {"description": "Vector store not found"}, + 429: {"description": "Rate limit exceeded"}, + 500: {"description": "Internal server error"}, + 503: {"description": "System overloaded"} + } +) +async def search_vector_store( + vector_store_id: str = Path(..., description="The ID of the vector store (collection) to search"), + request: SearchRequest = ..., + api_key: str = Depends(verify_api_key) +): + """ + Search a vector store for relevant chunks based on a query and file attributes filter. + + This endpoint implements the OpenAI Vector Store Search API specification + as defined in the AIDP Storage API Proposal. + + URL: POST /v1/vector_stores/{vector_store_id}/search + + See: https://platform.openai.com/docs/api-reference/vector_stores/search + """ + try: + return retrieval_service.search(vector_store_id, request) + except HTTPException: + raise + except Exception as e: + logger.error(f"Search error: {e}") + raise HTTPException( + status_code=500, + detail=f"Internal system error: {str(e)}" + ) + + +# ============================================================================ +# Main +# ============================================================================ + +if __name__ == "__main__": + print(f""" +╔══════════════════════════════════════════════════════════════════════════════╗ +║ AIDP Retrieval API Server ║ +║ Following OpenAI Specification ║ +╠══════════════════════════════════════════════════════════════════════════════╣ +║ Endpoint: POST /v1/vector_stores/{{vector_store_id}}/search ║ +║ Port: {API_PORT} ║ +║ Milvus: {MILVUS_URI:<55} ║ +╚══════════════════════════════════════════════════════════════════════════════╝ + +Example usage: + curl -X POST http://localhost:{API_PORT}/v1/vector_stores/support_tickets/search \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer your-api-key" \\ + -d '{{"query": "GPU memory issues", "max_num_results": 5}}' +""") + + uvicorn.run(app, host="0.0.0.0", port=API_PORT) +