Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"chat.tools.terminal.autoApprove": {
"/^cd H:\\\\Works\\\\Code-Migration\\\\Container-Migration-Solution-Accelerator\\\\src\\\\backend-api ; python -m ruff check src/ --fix 2>&1$/": {
"approve": true,
"matchCommandLine": true
},
"npx eslint": true
}
}
38 changes: 38 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ param aiModelVersion string = '2025-04-16'
@description('Optional. AI model deployment token capacity. Lower this if initial provisioning fails due to capacity. Defaults to 50K tokens per minute to improve regional success rate.')
param aiModelCapacity int = 500

@minLength(1)
@description('Optional. Name of the embedding model to deploy. Defaults to text-embedding-3-large.')
param aiEmbeddingModelName string = 'text-embedding-3-large'

@description('Optional. Version of the embedding model. Defaults to 1.')
param aiEmbeddingModelVersion string = '1'

@description('Optional. Embedding model deployment token capacity. Defaults to 500.')
param aiEmbeddingModelCapacity int = 500

@description('Optional. The tags to apply to all deployed Azure resources.')
param tags resourceInput<'Microsoft.Resources/resourceGroups@2025-04-01'>.tags = {}

Expand Down Expand Up @@ -761,6 +771,18 @@ module existingAiFoundryAiServicesDeployments 'modules/ai-services-deployments.b
capacity: aiModelCapacity
}
}
{
name: aiEmbeddingModelName
model: {
format: 'OpenAI'
name: aiEmbeddingModelName
version: aiEmbeddingModelVersion
}
sku: {
name: 'Standard'
capacity: aiEmbeddingModelCapacity
}
}
]
roleAssignments: [
// Service Principal permissions
Expand Down Expand Up @@ -857,6 +879,18 @@ module aiFoundry 'br/public:avm/ptn/ai-ml/ai-foundry:0.4.0' = if(!useExistingAiF
capacity: aiModelCapacity
}
}
{
name: aiEmbeddingModelName
model: {
format: 'OpenAI'
name: aiEmbeddingModelName
version: aiEmbeddingModelVersion
}
sku: {
name: 'Standard'
capacity: aiEmbeddingModelCapacity
}
}
]
tags: allTags
enableTelemetry: enableTelemetry
Expand Down Expand Up @@ -905,6 +939,10 @@ module appConfiguration 'br/public:avm/res/app-configuration/configuration-store
name: 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME'
value: aiModelDeploymentName
}
{
name: 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME'
value: aiEmbeddingModelName
}
{
name: 'AZURE_OPENAI_ENDPOINT'
value: 'https://${aiServicesName}.cognitiveservices.azure.com/'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,14 +246,14 @@ class ContextTrimConfig:
"""

enabled: bool = True
# GPT-5.1 supports 272K input tokens (~800K chars). These defaults stay well
# within that budget while guarding against accidental large blob injection.
# Progressive trimming on retry will reduce these further if needed.
max_total_chars: int = 600_000
max_message_chars: int = 40_000
keep_last_messages: int = 50
keep_head_chars: int = 15_000
keep_tail_chars: int = 5_000
# GPT-5.1 supports 272K input tokens (~800K chars). With workspace context
# injected into system instructions (never trimmed) and Qdrant shared memory
# providing cross-step context, we can keep fewer conversation messages.
max_total_chars: int = 400_000
max_message_chars: int = 30_000
keep_last_messages: int = 30
keep_head_chars: int = 12_000
keep_tail_chars: int = 4_000
keep_system_messages: bool = True
retry_on_context_error: bool = True

Expand Down
30 changes: 18 additions & 12 deletions src/processor/src/libs/agent_framework/mem0_async_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

"""Lazy-initialized async wrapper around the Mem0 vector-store memory backend."""

import os

from mem0 import AsyncMemory


Expand All @@ -17,6 +19,13 @@ async def get_memory(self):
return self._memory_instance

async def _create_memory(self):
endpoint = os.getenv("AZURE_OPENAI_ENDPOINT", "")
chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-5.1")
embedding_deployment = os.getenv(
"AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-3-large"
)
api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")

config = {
"vector_store": {
"provider": "redis",
Expand All @@ -29,27 +38,24 @@ async def _create_memory(self):
"llm": {
"provider": "azure_openai",
"config": {
"model": "gpt-5.1",
"model": chat_deployment,
"temperature": 0.1,
"max_tokens": 100000,
"max_tokens": 4000,
"azure_kwargs": {
"azure_deployment": "gpt-5.1",
"api_version": "2024-12-01-preview",
"azure_endpoint": "https://aifappframework.cognitiveservices.azure.com/",
"azure_deployment": chat_deployment,
"api_version": api_version,
"azure_endpoint": endpoint,
},
},
},
"embedder": {
"provider": "azure_openai",
"config": {
"model": "text-embedding-3-large",
"model": embedding_deployment,
"azure_kwargs": {
"api_version": "2024-02-01",
"azure_deployment": "text-embedding-3-large",
"azure_endpoint": "https://aifappframework.openai.azure.com/",
"default_headers": {
"CustomHeader": "container migration",
},
"api_version": api_version,
"azure_deployment": embedding_deployment,
"azure_endpoint": endpoint,
},
},
},
Expand Down
Loading
Loading