microsoft · Dongbumlee · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
@@ -0,0 +1,9 @@
+{
+    "chat.tools.terminal.autoApprove": {
+        "/^cd H:\\\\Works\\\\Code-Migration\\\\Container-Migration-Solution-Accelerator\\\\src\\\\backend-api ; python -m ruff check src/ --fix 2>&1$/": {
+            "approve": true,
+            "matchCommandLine": true
+        },
+        "npx eslint": true
+    }
+}
@@ -78,6 +78,16 @@ param aiModelVersion string = '2025-04-16'
 @description('Optional. AI model deployment token capacity. Lower this if initial provisioning fails due to capacity. Defaults to 50K tokens per minute to improve regional success rate.')
 param aiModelCapacity int = 500
 
+@minLength(1)
+@description('Optional. Name of the embedding model to deploy. Defaults to text-embedding-3-large.')
+param aiEmbeddingModelName string = 'text-embedding-3-large'
+
+@description('Optional. Version of the embedding model. Defaults to 1.')
+param aiEmbeddingModelVersion string = '1'
+
+@description('Optional. Embedding model deployment token capacity. Defaults to 500.')
+param aiEmbeddingModelCapacity int = 500
+
 @description('Optional. The tags to apply to all deployed Azure resources.')
 param tags resourceInput<'Microsoft.Resources/resourceGroups@2025-04-01'>.tags = {}
 
@@ -761,6 +771,18 @@ module existingAiFoundryAiServicesDeployments 'modules/ai-services-deployments.b
           capacity: aiModelCapacity
         }
       }
+      {
+        name: aiEmbeddingModelName
+        model: {
+          format: 'OpenAI'
+          name: aiEmbeddingModelName
+          version: aiEmbeddingModelVersion
+        }
+        sku: {
+          name: 'Standard'
+          capacity: aiEmbeddingModelCapacity
+        }
+      }
     ]
     roleAssignments: [
       // Service Principal permissions
@@ -857,6 +879,18 @@ module aiFoundry 'br/public:avm/ptn/ai-ml/ai-foundry:0.4.0' = if(!useExistingAiF
           capacity: aiModelCapacity
         }
       }
+      {
+        name: aiEmbeddingModelName
+        model: {
+          format: 'OpenAI'
+          name: aiEmbeddingModelName
+          version: aiEmbeddingModelVersion
+        }
+        sku: {
+          name: 'Standard'
+          capacity: aiEmbeddingModelCapacity
+        }
+      }
     ]
     tags: allTags
     enableTelemetry: enableTelemetry
@@ -905,6 +939,10 @@ module appConfiguration 'br/public:avm/res/app-configuration/configuration-store
         name: 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME'
         value: aiModelDeploymentName
       }
+      {
+        name: 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME'
+        value: aiEmbeddingModelName
+      }
       {
         name: 'AZURE_OPENAI_ENDPOINT'
         value: 'https://${aiServicesName}.cognitiveservices.azure.com/'

@@ -246,14 +246,14 @@ class ContextTrimConfig:
     """
 
     enabled: bool = True
-    # GPT-5.1 supports 272K input tokens (~800K chars). These defaults stay well
-    # within that budget while guarding against accidental large blob injection.
-    # Progressive trimming on retry will reduce these further if needed.
-    max_total_chars: int = 600_000
-    max_message_chars: int = 40_000
-    keep_last_messages: int = 50
-    keep_head_chars: int = 15_000
-    keep_tail_chars: int = 5_000
+    # GPT-5.1 supports 272K input tokens (~800K chars). With workspace context
+    # injected into system instructions (never trimmed) and Qdrant shared memory
+    # providing cross-step context, we can keep fewer conversation messages.
+    max_total_chars: int = 400_000
+    max_message_chars: int = 30_000
+    keep_last_messages: int = 30
+    keep_head_chars: int = 12_000
+    keep_tail_chars: int = 4_000
     keep_system_messages: bool = True
     retry_on_context_error: bool = True
 

@@ -3,6 +3,8 @@
 
 """Lazy-initialized async wrapper around the Mem0 vector-store memory backend."""
 
+import os
+
 from mem0 import AsyncMemory
 
 
@@ -17,6 +19,13 @@ async def get_memory(self):
         return self._memory_instance
 
     async def _create_memory(self):
+        endpoint = os.getenv("AZURE_OPENAI_ENDPOINT", "")
+        chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-5.1")
+        embedding_deployment = os.getenv(
+            "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-3-large"
+        )
+        api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
+
         config = {
             "vector_store": {
                 "provider": "redis",
@@ -29,27 +38,24 @@ async def _create_memory(self):
             "llm": {
                 "provider": "azure_openai",
                 "config": {
-                    "model": "gpt-5.1",
+                    "model": chat_deployment,
                     "temperature": 0.1,
-                    "max_tokens": 100000,
+                    "max_tokens": 4000,
                     "azure_kwargs": {
-                        "azure_deployment": "gpt-5.1",
-                        "api_version": "2024-12-01-preview",
-                        "azure_endpoint": "https://aifappframework.cognitiveservices.azure.com/",
+                        "azure_deployment": chat_deployment,
+                        "api_version": api_version,
+                        "azure_endpoint": endpoint,
                     },
                 },
             },
             "embedder": {
                 "provider": "azure_openai",
                 "config": {
-                    "model": "text-embedding-3-large",
+                    "model": embedding_deployment,
                     "azure_kwargs": {
-                        "api_version": "2024-02-01",
-                        "azure_deployment": "text-embedding-3-large",
-                        "azure_endpoint": "https://aifappframework.openai.azure.com/",
-                        "default_headers": {
-                            "CustomHeader": "container migration",
-                        },
+                        "api_version": api_version,
+                        "azure_deployment": embedding_deployment,
+                        "azure_endpoint": endpoint,
                     },
                 },
             },