From 3efd9a412c452be53fba1346c5756867e91d34c7 Mon Sep 17 00:00:00 2001 From: breakerh Date: Mon, 11 May 2026 12:40:44 +0200 Subject: [PATCH 1/2] Add context key support for strict model aliases and enhance logging --- .gitignore | 3 +++ local-llm/lib/00-settings.ps1 | 2 ++ local-llm/lib/20-models.ps1 | 12 ++++++++++-- local-llm/lib/40-parsers.ps1 | 4 ++-- local-llm/lib/50-modelfile.ps1 | 29 +++++++++++++++++++---------- local-llm/lib/60-catalog.ps1 | 10 ++++++---- local-llm/lib/65-claude-launch.ps1 | 13 ++++++++++--- local-llm/lib/80-init.ps1 | 2 +- local-llm/lib/85-shortcuts.ps1 | 12 ++++-------- local-llm/lib/90-wizard.ps1 | 27 +++++++-------------------- 10 files changed, 64 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index 90e0548..f45f129 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,6 @@ $RECYCLE.BIN/ .idea *.backup + +# Log files (dev / runtime temp logs) +*.log diff --git a/local-llm/lib/00-settings.ps1 b/local-llm/lib/00-settings.ps1 index fabad3d..a07e199 100644 --- a/local-llm/lib/00-settings.ps1 +++ b/local-llm/lib/00-settings.ps1 @@ -168,6 +168,8 @@ $script:ClaudeEnvNames = @( "MAX_THINKING_TOKENS", "CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING", "CLAUDE_CODE_MAX_OUTPUT_TOKENS", + "CLAUDE_CODE_MAX_CONTEXT_TOKENS", + "CLAUDE_CODE_AUTO_COMPACT_WINDOW", "CLAUDE_CODE_ATTRIBUTION_HEADER", "DISABLE_PROMPT_CACHING" ) diff --git a/local-llm/lib/20-models.ps1 b/local-llm/lib/20-models.ps1 index 5bd10e5..844f242 100644 --- a/local-llm/lib/20-models.ps1 +++ b/local-llm/lib/20-models.ps1 @@ -300,9 +300,17 @@ function Get-ModelStrictEnabled { } function Get-ModelStrictAliasName { - param([Parameter(Mandatory = $true)][System.Collections.IDictionary]$Def) + param( + [Parameter(Mandatory = $true)][System.Collections.IDictionary]$Def, + [AllowEmptyString()][string]$ContextKey = '' + ) + + if ([string]::IsNullOrWhiteSpace($ContextKey)) { + return "$($Def.Root)-strict" + } - return "$($Def.Root)-strict" + $baseAliasName = Get-ModelAliasName -Def $Def -ContextKey $ContextKey + return "$baseAliasName-strict" } function Get-ModelStrictBaseContextKey { diff --git a/local-llm/lib/40-parsers.ps1 b/local-llm/lib/40-parsers.ps1 index 5bb1654..2ea591b 100644 --- a/local-llm/lib/40-parsers.ps1 +++ b/local-llm/lib/40-parsers.ps1 @@ -65,8 +65,8 @@ function Get-ParserLines { # and only OVERRIDES sampling parameters and SYSTEM. Add new model families to # Get-ParserLines without touching this — strict keeps working. # -# num_ctx is intentionally omitted: it's set per-alias by the caller, mirroring -# the base model's default-context value (Get-ModelStrictBaseContextKey). +# num_ctx is intentionally omitted: it's set per-alias by the caller, matching +# the selected base context. function Get-StrictModelfileLines { $lines = New-Object System.Collections.Generic.List[string] diff --git a/local-llm/lib/50-modelfile.ps1 b/local-llm/lib/50-modelfile.ps1 index 8c17204..45bcb47 100644 --- a/local-llm/lib/50-modelfile.ps1 +++ b/local-llm/lib/50-modelfile.ps1 @@ -142,10 +142,10 @@ function Get-StaleModelAliases { } if (Get-ModelStrictEnabled -Def $def) { - $strictName = Get-ModelStrictAliasName -Def $def + foreach ($strictCtxKey in $def.Contexts.Keys) { + $strictName = Get-ModelStrictAliasName -Def $def -ContextKey $strictCtxKey - if (Test-OllamaModelExists -ModelName $strictName) { - $strictCtxKey = Get-ModelStrictBaseContextKey -Def $def + if (-not (Test-OllamaModelExists -ModelName $strictName)) { continue } $strictNumCtx = Get-ModelContextValue -Def $def -ContextKey $strictCtxKey if (-not (Test-StrictAliasFresh -StrictAliasName $strictName -NumCtx $strictNumCtx)) { @@ -207,12 +207,17 @@ function Ensure-ModelAlias { function Ensure-ModelStrictAlias { param( [Parameter(Mandatory = $true)][string]$Key, + [AllowEmptyString()][string]$ContextKey = '', [switch]$ForceRebuild ) $def = Get-ModelDef -Key $Key - $strictName = Get-ModelStrictAliasName -Def $def - $baseCtxKey = Get-ModelStrictBaseContextKey -Def $def + $baseCtxKey = if ([string]::IsNullOrWhiteSpace($ContextKey)) { + Get-ModelStrictBaseContextKey -Def $def + } else { + Resolve-ModelContextKey -Def $def -ContextKey $ContextKey + } + $strictName = Get-ModelStrictAliasName -Def $def -ContextKey $baseCtxKey $baseName = Get-ModelAliasName -Def $def -ContextKey $baseCtxKey $numCtx = Get-ModelContextValue -Def $def -ContextKey $baseCtxKey @@ -241,7 +246,9 @@ function Ensure-ModelAllAliases { } if (Get-ModelStrictEnabled -Def $def) { - Ensure-ModelStrictAlias -Key $Key -ForceRebuild:$ForceRebuild | Out-Null + foreach ($contextKey in $def.Contexts.Keys) { + Ensure-ModelStrictAlias -Key $Key -ContextKey $contextKey -ForceRebuild:$ForceRebuild | Out-Null + } } } @@ -264,12 +271,14 @@ function Remove-ModelAliases { Remove-Item -Path $stampFile -Force -ErrorAction SilentlyContinue } - # Strict sibling — remove unconditionally, even if Strict is currently + # Strict siblings — remove unconditionally, even if Strict is currently # disabled, so leftovers from a previous build don't linger after the # user toggles Strict off. - $strictName = Get-ModelStrictAliasName -Def $def - & ollama rm $strictName 2>$null | Out-Null - Remove-Item -Path (Get-ProfileVersionFile -ModelName $strictName) -Force -ErrorAction SilentlyContinue + foreach ($contextKey in $def.Contexts.Keys) { + $strictName = Get-ModelStrictAliasName -Def $def -ContextKey $contextKey + & ollama rm $strictName 2>$null | Out-Null + Remove-Item -Path (Get-ProfileVersionFile -ModelName $strictName) -Force -ErrorAction SilentlyContinue + } } function Remove-ModelRemotePull { diff --git a/local-llm/lib/60-catalog.ps1 b/local-llm/lib/60-catalog.ps1 index cbb099d..007ce3c 100644 --- a/local-llm/lib/60-catalog.ps1 +++ b/local-llm/lib/60-catalog.ps1 @@ -588,12 +588,14 @@ function Get-AllManagedOllamaNames { $names.Add("${alias}:latest") | Out-Null } - # Strict sibling — managed regardless of the current Strict flag, so + # Strict siblings — managed regardless of the current Strict flag, so # leftovers from a previous build don't get classified as orphans # before the next 'init -Force' or 'removellm' rebuilds the entry. - $strictName = Get-ModelStrictAliasName -Def $def - $names.Add($strictName) | Out-Null - $names.Add("${strictName}:latest") | Out-Null + foreach ($contextKey in $def.Contexts.Keys) { + $strictName = Get-ModelStrictAliasName -Def $def -ContextKey $contextKey + $names.Add($strictName) | Out-Null + $names.Add("${strictName}:latest") | Out-Null + } if ($def.SourceType -eq 'remote' -and $def.RemoteModel) { $names.Add($def.RemoteModel) | Out-Null diff --git a/local-llm/lib/65-claude-launch.ps1 b/local-llm/lib/65-claude-launch.ps1 index 009f456..c318522 100644 --- a/local-llm/lib/65-claude-launch.ps1 +++ b/local-llm/lib/65-claude-launch.ps1 @@ -77,7 +77,8 @@ function Set-ClaudeLocalEnv { param( [Parameter(Mandatory = $true)][string]$BaseUrl, [Parameter(Mandatory = $true)][string]$Model, - [bool]$KeepThinking = $false + [bool]$KeepThinking = $false, + [int]$ContextTokens = 0 ) $env:ANTHROPIC_BASE_URL = $BaseUrl @@ -102,6 +103,10 @@ function Set-ClaudeLocalEnv { if ($maxOutputTokens -gt 0) { $env:CLAUDE_CODE_MAX_OUTPUT_TOKENS = [string]$maxOutputTokens } + if ($ContextTokens -gt 0) { + $env:CLAUDE_CODE_MAX_CONTEXT_TOKENS = [string]$ContextTokens + $env:CLAUDE_CODE_AUTO_COMPACT_WINDOW = [string]$ContextTokens + } $env:CLAUDE_CODE_ATTRIBUTION_HEADER = "0" $env:DISABLE_PROMPT_CACHING = "1" @@ -867,9 +872,10 @@ function Start-ClaudeWithLlamaCppModel { throw } + $contextTokens = Get-ModelContextValue -Def $def -ContextKey $ContextKey + if ($Codex) { try { - $contextTokens = Get-ModelContextValue -Def $def -ContextKey $ContextKey $maxOutputTokens = if ($script:Cfg.Contains("LocalModelMaxOutputTokens")) { try { [int]$script:Cfg.LocalModelMaxOutputTokens } catch { 0 } } else { @@ -930,7 +936,7 @@ function Start-ClaudeWithLlamaCppModel { } } - Set-ClaudeLocalEnv -BaseUrl $effectiveBaseUrl -Model $def.Root -KeepThinking:($thinkingPolicy -eq 'keep') + Set-ClaudeLocalEnv -BaseUrl $effectiveBaseUrl -Model $def.Root -KeepThinking:($thinkingPolicy -eq 'keep') -ContextTokens $contextTokens $backendLabel = if ($Unshackled) { "unshackled" } else { "claude" } $toolsLabel = if ($LimitTools) { "limited" } else { "all" } @@ -991,3 +997,4 @@ function Start-ClaudeWithLlamaCppModel { Stop-LlamaServer } } + diff --git a/local-llm/lib/80-init.ps1 b/local-llm/lib/80-init.ps1 index 02e286a..03b143c 100644 --- a/local-llm/lib/80-init.ps1 +++ b/local-llm/lib/80-init.ps1 @@ -35,7 +35,7 @@ function Initialize-LocalLLM { Write-Host " rebuilding: $($entry.AliasName)" -ForegroundColor DarkGray if ($entry.Kind -eq 'strict') { - Ensure-ModelStrictAlias -Key $entry.Key -ForceRebuild | Out-Null + Ensure-ModelStrictAlias -Key $entry.Key -ContextKey $entry.Context -ForceRebuild | Out-Null } else { Ensure-ModelAlias -Key $entry.Key -ContextKey $entry.Context -ForceRebuild | Out-Null } diff --git a/local-llm/lib/85-shortcuts.ps1 b/local-llm/lib/85-shortcuts.ps1 index a795344..ebac999 100644 --- a/local-llm/lib/85-shortcuts.ps1 +++ b/local-llm/lib/85-shortcuts.ps1 @@ -21,17 +21,13 @@ function Invoke-ModelShortcut { if (-not (Get-ModelStrictEnabled -Def $def)) { throw "Model '$Key' has Strict=false in the catalog; no strict sibling alias is built. Re-import via addllm and answer Yes to the strict prompt, or drop -Strict." } - - if (-not [string]::IsNullOrWhiteSpace($ContextKey)) { - throw "-Strict and -Ctx are mutually exclusive. Strict siblings are pinned to the model's strict-base context; drop -Ctx." - } } # Q8 KV check sizes against the context that will actually be used. Strict - # siblings derive their num_ctx from Get-ModelStrictBaseContextKey, not the - # caller-supplied -Ctx (which is rejected above). + # without -Ctx keeps the legacy strict-base context; strict with -Ctx uses + # that context-specific strict alias. if ($UseQ8) { - $q8CtxKey = if ($Strict) { Get-ModelStrictBaseContextKey -Def $def } else { $ContextKey } + $q8CtxKey = if ($Strict -and [string]::IsNullOrWhiteSpace($ContextKey)) { Get-ModelStrictBaseContextKey -Def $def } else { $ContextKey } $numCtx = Get-ModelContextValue -Def $def -ContextKey $q8CtxKey $maxQ8 = Get-Q8KvMaxContext @@ -45,7 +41,7 @@ function Invoke-ModelShortcut { } $modelName = if ($Strict) { - Ensure-ModelStrictAlias -Key $Key + Ensure-ModelStrictAlias -Key $Key -ContextKey $ContextKey } else { Ensure-ModelAlias -Key $Key -ContextKey $ContextKey } diff --git a/local-llm/lib/90-wizard.ps1 b/local-llm/lib/90-wizard.ps1 index edd603a..088e70e 100644 --- a/local-llm/lib/90-wizard.ps1 +++ b/local-llm/lib/90-wizard.ps1 @@ -1112,17 +1112,17 @@ function Invoke-LLMSelection { } "benchmark" { - $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ContextKey $ContextKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey } Test-OllamaSpeed -Model $modelName -Runs 3 } "setup" { - $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ForceRebuild } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey -ForceRebuild } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ContextKey $ContextKey -ForceRebuild } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey -ForceRebuild } Write-Host "Created/rebuilt alias: $modelName" -ForegroundColor Green } "show" { - $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ContextKey $ContextKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey } & ollama show $modelName } @@ -1195,15 +1195,7 @@ function Start-LLMWizardClassic { if ($null -eq $strict) { $step = 'backend'; break } # back $useStrict = [bool]$strict - if ($useStrict) { - # Strict pins context to Get-ModelStrictBaseContextKey via the - # alias build; the empty contextKey is correct here because the - # shortcut layer rejects -Strict + -Ctx together. - $contextKey = "" - $step = 'action' - } else { - $step = 'context' - } + $step = 'context' } 'context' { @@ -1219,7 +1211,7 @@ function Start-LLMWizardClassic { 'action' { $action = Select-LLMAction -Backend $backend if ([string]::IsNullOrWhiteSpace($action)) { - $step = if ($useStrict) { 'strict' } else { 'context' } + $step = 'context' break } @@ -1905,12 +1897,7 @@ function Start-LLMWizardSpectre { if ($null -eq $strict) { $step = 'backend'; break } $useStrict = [bool]$strict - if ($useStrict) { - $contextKey = "" - $step = 'action' - } else { - $step = 'context' - } + $step = 'context' } 'context' { @@ -1931,7 +1918,7 @@ function Start-LLMWizardSpectre { Select-LLMActionSpectre -Backend $captured } if ([string]::IsNullOrWhiteSpace($action)) { - $step = if ($useStrict) { 'strict' } else { 'context' } + $step = 'context' break } From 73e0e0f2dca2ee8dba99bb192d122ba171968dd1 Mon Sep 17 00:00:00 2001 From: breakerh Date: Wed, 13 May 2026 01:16:25 +0200 Subject: [PATCH 2/2] Add context key support for strict model aliases and enhance logging --- README.md | 5 +- local-llm/lib/10-helpers.ps1 | 10 + local-llm/lib/20-models.ps1 | 235 ++++++++++++++++ local-llm/lib/35-backend.ps1 | 4 +- local-llm/lib/41-llamacpp-args.ps1 | 16 ++ local-llm/lib/50-modelfile.ps1 | 31 +- local-llm/lib/55-huggingface.ps1 | 52 +++- local-llm/lib/60-catalog.ps1 | 56 +++- local-llm/lib/65-claude-launch.ps1 | 68 ++++- local-llm/lib/71-benchpilot-bridge.ps1 | 19 +- local-llm/lib/72-llamacpp-tuner.ps1 | 3 +- local-llm/lib/75-display.ps1 | 16 +- local-llm/lib/85-shortcuts.ps1 | 27 +- local-llm/lib/90-wizard.ps1 | 376 ++++++++++++++++++++++--- local-llm/lib/99-entrypoints.ps1 | 4 +- 15 files changed, 857 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index a54fbf6..f478f1f 100644 --- a/README.md +++ b/README.md @@ -767,7 +767,9 @@ Each step has a Back option (`0`/Escape in native, `[[Back]]` in Spectre); the Spectre wizard wraps each prompt in `Invoke-LLMWizardStep` and logs the full exception trace to `~/.local-llm/wizard-errors.log` if anything throws, so a Spectre live-display refresh can't scroll the trace off screen. Inspect -with `llmlogerr [-Lines 80]`; reset with `llmlogerrclear`. +with `llmlogerr [-Lines 80]`; reset with `llmlogerrclear`. The launch debug +trace (vision, proxy, llama-server, Claude launches) is recorded in +`~/.local-llm/launch.log` and tailable with `llmlog [-Lines 80]`. After a model is selected, the Spectre wizard waits briefly before drawing the next prompt and retries one fast-empty transition. Tune that guard with @@ -809,6 +811,7 @@ setups, so they stay. - **Stale aliases after editing a parser** → `init -Stale` rebuilds only the aliases whose Modelfile content hash drifted. - **Spectre wizard crashed or stalls** → `llmlogerr` for the full trace; use + `llmlog` for launch/debug details (vision, proxy, llama-server, Claude); `llmc` for the native picker or set `$env:LOCAL_LLM_NO_SPECTRE=1` to disable Spectre everywhere. If the next prompt appears too slowly after selecting a model, raise `$env:LOCAL_LLM_SPECTRE_PROMPT_COOLDOWN_MS`. diff --git a/local-llm/lib/10-helpers.ps1 b/local-llm/lib/10-helpers.ps1 index f05fa85..ec14b0e 100644 --- a/local-llm/lib/10-helpers.ps1 +++ b/local-llm/lib/10-helpers.ps1 @@ -89,10 +89,12 @@ function Download-HuggingFaceFile { "uvx-hf" { $oldPythonUtf8 = $env:PYTHONUTF8 $oldPythonIoEncoding = $env:PYTHONIOENCODING + $oldHfSsl = $env:HF_HUB_DISABLE_SSL_VERIFICATION try { $env:PYTHONUTF8 = "1" $env:PYTHONIOENCODING = "utf-8" + $env:HF_HUB_DISABLE_SSL_VERIFICATION = "1" & uvx hf download $Repo $normalizedFileName --local-dir $DestinationFolder | Out-Host @@ -119,6 +121,13 @@ function Download-HuggingFaceFile { else { Remove-Item Env:PYTHONIOENCODING -ErrorAction SilentlyContinue } + + if ($null -ne $oldHfSsl) { + $env:HF_HUB_DISABLE_SSL_VERIFICATION = $oldHfSsl + } + else { + Remove-Item Env:HF_HUB_DISABLE_SSL_VERIFICATION -ErrorAction SilentlyContinue + } } } @@ -144,6 +153,7 @@ function Download-HuggingFaceFile { $request.Method = "GET" $request.AllowAutoRedirect = $true $request.UserAgent = "LocalLLMProfile/1.0" + $request.ServerCertificateValidationCallback = { $true } if ($existingBytes -gt 0) { $request.AddRange($existingBytes) diff --git a/local-llm/lib/20-models.ps1 b/local-llm/lib/20-models.ps1 index 844f242..4bf4170 100644 --- a/local-llm/lib/20-models.ps1 +++ b/local-llm/lib/20-models.ps1 @@ -354,3 +354,238 @@ function Get-ModelGgufPath { return $ggufPath } + +function Get-ModelVisionModulePath { + # Resolves the full path to the mmproj.gguf (multimodal vision module) for a model. + # Downloads on demand if not already present locally. Returns $null when no + # VisionModule is configured or the file does not exist. + param( + [Parameter(Mandatory = $true)][string]$Key, + [Parameter(Mandatory = $true)][System.Collections.IDictionary]$Def, + [ValidateSet('ollama', 'llamacpp')][string]$Backend = 'ollama' + ) + + $mmprojFile = $null + $autoDetected = $false + + if ($Def.ContainsKey('VisionModule') -and -not [string]::IsNullOrWhiteSpace($Def.VisionModule)) { + $mmprojFile = [string]$Def.VisionModule + Write-LaunchLog "VisionModule configured: $mmprojFile" 'VISION' + } else { + # Auto-detect: scan for mmproj*.gguf in the model folder + $folder = Get-ModelFolder -Key $Key -Def $Def -Backend $Backend + Write-LaunchLog "No VisionModule configured — scanning for mmproj*.gguf in $folder" 'VISION' + $localMmproj = Get-ChildItem -Path $folder -Filter 'mmproj*.gguf' -File | Select-Object -First 1 + if ($localMmproj) { + $mmprojFile = $localMmproj.Name + $autoDetected = $true + Write-LaunchLog "Auto-detected mmproj: $($localMmproj.Name)" 'VISION' + } else { + # Also check Ollama root for llama.cpp backend + if ($Backend -eq 'llamacpp') { + $ollamaFolder = Join-Path $script:Cfg.OllamaCommunityRoot $Def.Root + Write-LaunchLog "Scanning Ollama root for mmproj*.gguf: $ollamaFolder" 'VISION' + $localMmproj = Get-ChildItem -Path $ollamaFolder -Filter 'mmproj*.gguf' -File | Select-Object -First 1 + if ($localMmproj) { + $mmprojFile = $localMmproj.Name + $autoDetected = $true + Write-LaunchLog "Auto-detected mmproj in Ollama root: $($localMmproj.Name)" 'VISION' + } + } + } + if (-not $mmprojFile) { + if ($Def.ContainsKey('Repo') -and -not [string]::IsNullOrWhiteSpace($Def.Repo)) { + Write-LaunchLog "No local mmproj found, querying HF: $($Def.Repo)" 'VISION' + $hfFiles = Get-HuggingFaceMmprojFiles -Repo $Def.Repo + if ($null -eq $hfFiles) { + Write-LaunchLog "HF query failed (network/SSL) — skipping HF fallback for $Key" 'WARN' + } elseif ($hfFiles.Count -gt 0) { + $mmprojFile = @($hfFiles.Keys)[0] + Write-LaunchLog "Found mmproj on HF: $mmprojFile" 'VISION' + } + } + if (-not $mmprojFile) { + Write-LaunchLog "No mmproj found locally or on HF for $Key" 'WARN' + return $null + } + } + } + + $folder = Get-ModelFolder -Key $Key -Def $Def -Backend $Backend + + # For llama.cpp, try to hardlink from Ollama root first. + if ($Backend -eq 'llamacpp') { + $llamaPath = Resolve-HuggingFaceLocalPath -DestinationFolder $folder -FileName $mmprojFile + if (Test-Path $llamaPath) { + Write-LaunchLog "Found existing mmproj in llama.cpp folder: $llamaPath" 'VISION' + return $llamaPath + } + + $ollamaFolder = Join-Path $script:Cfg.OllamaCommunityRoot $Def.Root + $ollamaPath = Resolve-HuggingFaceLocalPath -DestinationFolder $ollamaFolder -FileName $mmprojFile + if (Test-Path $ollamaPath) { + Write-LaunchLog "Found mmproj in Ollama root, linking to llama.cpp: $ollamaPath -> $llamaPath" 'VISION' + try { + New-Item -ItemType HardLink -Path $llamaPath -Target $ollamaPath -ErrorAction Stop | Out-Null + Write-Host "Hardlinked existing mmproj: $llamaPath -> $ollamaPath" -ForegroundColor DarkGreen + return $llamaPath + } catch { + try { + Copy-Item -LiteralPath $ollamaPath -Destination $llamaPath -ErrorAction Stop | Out-Null + Write-Host "Copied existing mmproj (cross-volume): $llamaPath" -ForegroundColor DarkGreen + return $llamaPath + } catch { + Write-Warning "Could not reuse Ollama mmproj at $ollamaPath : $($_.Exception.Message)" + } + } + } else { + Write-LaunchLog "mmproj not in Ollama root, will download to $folder" 'VISION' + } + } + + if ($autoDetected) { + Write-LaunchLog "Reusing auto-detected mmproj: $mmprojFile" 'VISION' + $localPath = Resolve-HuggingFaceLocalPath -DestinationFolder $folder -FileName $mmprojFile + if (Test-Path $localPath) { + return $localPath + } + } + + Write-LaunchLog "Downloading mmproj from HF repo: $($Def.Repo), file: $mmprojFile" 'VISION' + $mmprojPath = Download-HuggingFaceFile -Repo $Def.Repo -FileName $mmprojFile -DestinationFolder $folder + + if ($mmprojPath -is [array]) { + $mmprojPath = $mmprojPath[-1] + } + + if (-not ($mmprojPath -is [string])) { + throw "Expected mmproj path to be a string." + } + + Write-LaunchLog "Resolved mmproj path: $mmprojPath" 'VISION' + return $mmprojPath +} + +function Test-ModelVisionModuleAvailable { + # Checks whether the mmproj.gguf for a model exists locally, and if not, + # whether it is available on HuggingFace. Returns a hashtable with: + # Local : $true/$false (file exists in the model folder or Ollama root) + # AvailableOnHF: $true/$false (mmproj file listed on the HF repo) + # Filename : '' (the mmproj filename, when known) + param( + [Parameter(Mandatory = $true)][string]$Key, + [Parameter(Mandatory = $true)][System.Collections.IDictionary]$Def, + [ValidateSet('ollama', 'llamacpp')][string]$Backend = 'ollama' + ) + + $result = @{ + Local = $false + AvailableOnHF = $false + Filename = '' + } + + # Determine which mmproj filename to look for. If VisionModule is configured, use that; + # otherwise scan HF for any available mmproj files. + $mmprojFile = if ($Def.ContainsKey('VisionModule') -and -not [string]::IsNullOrWhiteSpace($def.VisionModule)) { + Write-LaunchLog "[vision/test] VisionModule configured: $($def.VisionModule)" 'VISION' + [string]$Def.VisionModule + } else { + Write-LaunchLog "[vision/test] No VisionModule configured, will auto-detect" 'VISION' + '' + } + + if ($mmprojFile) { + $result.Filename = $mmprojFile + } + $folder = Get-ModelFolder -Key $Key -Def $Def -Backend $Backend + Write-LaunchLog "[vision/test] Checking local mmproj for $Key (backend=$Backend, folder=$folder)" 'VISION' + + # Check llama.cpp folder first. + if ($Backend -eq 'llamacpp') { + if ($mmprojFile) { + $llamaPath = Resolve-HuggingFaceLocalPath -DestinationFolder $folder -FileName $mmprojFile + Write-LaunchLog "[vision/test] llama.cpp: checking $($llamaPath) ..." 'VISION' + if (Test-Path $llamaPath) { + Write-LaunchLog "[vision/test] Found in llama.cpp folder" 'VISION' + $result.Local = $true + return $result + } + } else { + # No VisionModule configured — scan for any mmproj files locally + $localMmproj = Get-ChildItem -Path $folder -Filter 'mmproj*.gguf' -File | Select-Object -First 1 + if ($localMmproj) { + Write-LaunchLog "[vision/test] Auto-detected $($localMmproj.Name) in llama.cpp folder" 'VISION' + $result.Local = $true + $result.Filename = $localMmproj.Name + return $result + } + } + + # Also check Ollama root as fallback. + $ollamaFolder = Join-Path $script:Cfg.OllamaCommunityRoot $Def.Root + if ($mmprojFile) { + $ollamaPath = Resolve-HuggingFaceLocalPath -DestinationFolder $ollamaFolder -FileName $mmprojFile + Write-LaunchLog "[vision/test] Ollama root: checking $($ollamaPath) ..." 'VISION' + if (Test-Path $ollamaPath) { + Write-LaunchLog "[vision/test] Found in Ollama root" + $result.Local = $true + return $result + } + } else { + $localMmproj = Get-ChildItem -Path $ollamaFolder -Filter 'mmproj*.gguf' -File | Select-Object -First 1 + if ($localMmproj) { + Write-LaunchLog "[vision/test] Auto-detected $($localMmproj.Name) in Ollama root" 'VISION' + $result.Local = $true + $result.Filename = $localMmproj.Name + return $result + } + } + } + + # Check Ollama root for ollama backend. + if ($Backend -eq 'ollama') { + $ollamaFolder = Join-Path $script:Cfg.OllamaCommunityRoot $Def.Root + if ($mmprojFile) { + $ollamaPath = Resolve-HuggingFaceLocalPath -DestinationFolder $ollamaFolder -FileName $mmprojFile + Write-LaunchLog "[vision/test] Ollama: checking $($ollamaPath) ..." 'VISION' + if (Test-Path $ollamaPath) { + Write-LaunchLog "[vision/test] Found in Ollama folder" 'VISION' + $result.Local = $true + return $result + } + } else { + $localMmproj = Get-ChildItem -Path $ollamaFolder -Filter 'mmproj*.gguf' -File | Select-Object -First 1 + if ($localMmproj) { + Write-LaunchLog "[vision/test] Auto-detected $($localMmproj.Name) in Ollama root" 'VISION' + $result.Local = $true + $result.Filename = $localMmproj.Name + return $result + } + } + } + + Write-LaunchLog "[vision/test] No local mmproj found for $Key, checking HuggingFace..." 'VISION' + + # Not local — check HF for availability. + if ($Def.ContainsKey('Repo') -and -not [string]::IsNullOrWhiteSpace($Def.Repo)) { + $mmprojFiles = Get-HuggingFaceMmprojFiles -Repo $Def.Repo + if ($null -eq $mmprojFiles) { + Write-LaunchLog "[vision/test] HF check skipped for $Key (network/SSL error)" 'WARN' + } elseif ($mmprojFiles.Count -gt 0) { + Write-LaunchLog "[vision/test] HF has $($mmprojFiles.Count) mmproj file(s): $($mmprojFiles.Keys -join ', ')" 'VISION' + $result.AvailableOnHF = $true + # If no specific VisionModule configured, pick the first available mmproj + if (-not $mmprojFile) { + $mmprojFile = @($mmprojFiles.Keys)[0] + $result.Filename = $mmprojFile + } elseif ($mmprojFiles.ContainsKey($mmprojFile)) { + $result.AvailableOnHF = $true + } + } else { + Write-LaunchLog "[vision/test] No mmproj files on HF for $($Def.Repo)" 'VISION' + } + } + + Write-LaunchLog "[vision/test] Result for ${Key}: Local=$($result.Local), HF=$($result.AvailableOnHF), File='$($result.Filename)'" 'VISION' + return $result +} diff --git a/local-llm/lib/35-backend.ps1 b/local-llm/lib/35-backend.ps1 index befc6fa..a2fd621 100644 --- a/local-llm/lib/35-backend.ps1 +++ b/local-llm/lib/35-backend.ps1 @@ -54,6 +54,7 @@ function Invoke-Backend { [switch]$Unshackled, [switch]$Codex, [switch]$Strict, + [switch]$UseVision, [switch]$AutoBest, [ValidateSet('auto','pure','balanced','short','long')][string]$AutoBestProfile = 'auto', [string[]]$ExtraArgs, @@ -91,7 +92,7 @@ function Invoke-Backend { 'launch-claude' { switch ($Backend) { 'ollama' { - Invoke-ModelShortcut -Key $Key -ContextKey $ContextKey -UseQ8:$UseQ8 -Unshackled:$Unshackled -Codex:$Codex -Strict:$Strict -ExtraUnshackledArgs $ExtraUnshackledArgs + Invoke-ModelShortcut -Key $Key -ContextKey $ContextKey -UseQ8:$UseQ8 -Unshackled:$Unshackled -Codex:$Codex -Strict:$Strict -UseVision:$UseVision -ExtraUnshackledArgs $ExtraUnshackledArgs } 'llamacpp' { $mode = Resolve-LlamaCppMode -Mode $LlamaCppMode @@ -105,6 +106,7 @@ function Invoke-Backend { -Unshackled:$Unshackled ` -Codex:$Codex ` -Strict:$Strict ` + -UseVision:$UseVision ` -AutoBest:$AutoBest ` -AutoBestProfile $AutoBestProfile ` -ExtraArgs $ExtraArgs ` diff --git a/local-llm/lib/41-llamacpp-args.ps1 b/local-llm/lib/41-llamacpp-args.ps1 index 1823014..72e2d0d 100644 --- a/local-llm/lib/41-llamacpp-args.ps1 +++ b/local-llm/lib/41-llamacpp-args.ps1 @@ -69,10 +69,13 @@ function Build-LlamaServerArgs { [int]$Threads, [int]$ThreadsBatch, [Nullable[bool]]$FlashAttn, + [switch]$SwaFull, + [Nullable[bool]]$CachePrompt, [ValidateSet('none', 'layer', 'row')][string]$SplitMode, [string]$ChatTemplate, [string]$ThinkingPolicy, [switch]$Strict, + [string]$VisionModulePath, [string[]]$ExtraArgs ) @@ -172,6 +175,13 @@ function Build-LlamaServerArgs { $argList.Add($(if ($FlashAttn) { 'on' } else { 'off' })) | Out-Null } + if ($PSBoundParameters.ContainsKey('SwaFull') -and $SwaFull) { + $argList.Add('--swa-full') | Out-Null + } + if ($PSBoundParameters.ContainsKey('CachePrompt') -and $null -ne $CachePrompt -and $CachePrompt) { + $argList.Add('--cache-prompt') | Out-Null + } + # Multi-GPU split mode. Emitted only when caller passes a value. if ($PSBoundParameters.ContainsKey('SplitMode') -and -not [string]::IsNullOrWhiteSpace($SplitMode)) { $argList.Add('--split-mode') | Out-Null @@ -198,6 +208,12 @@ function Build-LlamaServerArgs { $argList.Add($a) | Out-Null } + # Vision / multimodal. --mmproj loads the projection layer for vision models. + if (-not [string]::IsNullOrWhiteSpace($VisionModulePath)) { + $argList.Add('--mmproj') | Out-Null + $argList.Add($VisionModulePath) | Out-Null + } + # Reasoning routing. if ([string]::IsNullOrWhiteSpace($ThinkingPolicy)) { $ThinkingPolicy = if ($Def.Contains('ThinkingPolicy') -and -not [string]::IsNullOrWhiteSpace($Def.ThinkingPolicy)) { [string]$Def.ThinkingPolicy } else { 'strip' } diff --git a/local-llm/lib/50-modelfile.ps1 b/local-llm/lib/50-modelfile.ps1 index 45bcb47..101bc26 100644 --- a/local-llm/lib/50-modelfile.ps1 +++ b/local-llm/lib/50-modelfile.ps1 @@ -7,7 +7,8 @@ function New-OllamaModelFromSource { [Parameter(Mandatory = $true)][string]$ModelName, [Parameter(Mandatory = $true)][string]$FromSource, [Parameter(Mandatory = $true)][string]$Parser, - [Nullable[int]]$NumCtx + [Nullable[int]]$NumCtx, + [string]$VisionModulePath ) $safeName = ($ModelName -replace '[:/\\]', '_') @@ -16,6 +17,10 @@ function New-OllamaModelFromSource { $content = New-Object System.Collections.Generic.List[string] $content.Add("FROM $FromSource") + if (-not [string]::IsNullOrWhiteSpace($VisionModulePath)) { + $content.Add("VISION $VisionModulePath") + } + foreach ($line in (Get-ParserLines -Parser $Parser)) { $content.Add($line) } @@ -46,7 +51,8 @@ function New-OllamaStrictAlias { param( [Parameter(Mandatory = $true)][string]$BaseAliasName, [Parameter(Mandatory = $true)][string]$StrictAliasName, - [Nullable[int]]$NumCtx + [Nullable[int]]$NumCtx, + [AllowEmptyString()][string]$VisionModulePath = '' ) $safeName = ($StrictAliasName -replace '[:/\\]', '_') @@ -55,6 +61,10 @@ function New-OllamaStrictAlias { $content = New-Object System.Collections.Generic.List[string] $content.Add("FROM ${BaseAliasName}:latest") + if (-not [string]::IsNullOrWhiteSpace($VisionModulePath)) { + $content.Add("VISION $VisionModulePath") + } + foreach ($line in (Get-StrictModelfileLines)) { $content.Add($line) } @@ -167,7 +177,8 @@ function Ensure-ModelAlias { param( [Parameter(Mandatory = $true)][string]$Key, [Parameter(Mandatory = $true)][AllowEmptyString()][string]$ContextKey, - [switch]$ForceRebuild + [switch]$ForceRebuild, + [string]$VisionModulePath ) $def = Get-ModelDef -Key $Key @@ -186,14 +197,14 @@ function Ensure-ModelAlias { throw "ollama pull failed for '$($def.RemoteModel)'" } - New-OllamaModelFromSource -ModelName $modelName -FromSource $def.RemoteModel -Parser $def.Parser -NumCtx $numCtx + New-OllamaModelFromSource -ModelName $modelName -FromSource $def.RemoteModel -Parser $def.Parser -NumCtx $numCtx -VisionModulePath $VisionModulePath } "gguf" { $ggufPath = Get-ModelGgufPath -Key $Key -Def $def $posixPath = Convert-ToPosixPath $ggufPath - New-OllamaModelFromSource -ModelName $modelName -FromSource $posixPath -Parser $def.Parser -NumCtx $numCtx + New-OllamaModelFromSource -ModelName $modelName -FromSource $posixPath -Parser $def.Parser -NumCtx $numCtx -VisionModulePath $VisionModulePath } default { @@ -221,15 +232,18 @@ function Ensure-ModelStrictAlias { $baseName = Get-ModelAliasName -Def $def -ContextKey $baseCtxKey $numCtx = Get-ModelContextValue -Def $def -ContextKey $baseCtxKey + # Resolve vision module so the strict alias gets an explicit VISION instruction. + $visionModulePath = Get-ModelVisionModulePath -Key $Key -Def $def -Backend ollama + if (-not $ForceRebuild -and (Test-StrictAliasFresh -StrictAliasName $strictName -NumCtx $numCtx)) { return $strictName } # The strict sibling derives FROM the base alias. Make sure the base # exists first; build it if missing (Ensure-ModelAlias is idempotent). - Ensure-ModelAlias -Key $Key -ContextKey $baseCtxKey | Out-Null + Ensure-ModelAlias -Key $Key -ContextKey $baseCtxKey -VisionModulePath $visionModulePath | Out-Null - New-OllamaStrictAlias -BaseAliasName $baseName -StrictAliasName $strictName -NumCtx $numCtx + New-OllamaStrictAlias -BaseAliasName $baseName -StrictAliasName $strictName -NumCtx $numCtx -VisionModulePath $visionModulePath return $strictName } @@ -240,9 +254,10 @@ function Ensure-ModelAllAliases { ) $def = Get-ModelDef -Key $Key + $visionModulePath = Get-ModelVisionModulePath -Key $Key -Def $def -Backend ollama foreach ($contextKey in $def.Contexts.Keys) { - Ensure-ModelAlias -Key $Key -ContextKey $contextKey -ForceRebuild:$ForceRebuild | Out-Null + Ensure-ModelAlias -Key $Key -ContextKey $contextKey -ForceRebuild:$ForceRebuild -VisionModulePath $visionModulePath | Out-Null } if (Get-ModelStrictEnabled -Def $def) { diff --git a/local-llm/lib/55-huggingface.ps1 b/local-llm/lib/55-huggingface.ps1 index c267992..162bd2b 100644 --- a/local-llm/lib/55-huggingface.ps1 +++ b/local-llm/lib/55-huggingface.ps1 @@ -23,7 +23,55 @@ function Get-HuggingFaceModelInfo { param([Parameter(Mandatory = $true)][string]$Repo) $url = "https://huggingface.co/api/models/$Repo`?blobs=true" - return Invoke-RestMethod -Uri $url -UseBasicParsing + return Invoke-RestMethod -Uri $url -UseBasicParsing -SkipCertificateCheck +} + +function Get-HuggingFaceMmprojFiles { + # Returns mmproj.gguf files from a HF repo: top-level files matching + # ^mmproj(-.*)?\.gguf$ (no subdirectory). Returns an ordered hashtable of + # filename -> sizeGB, an empty hashtable if none found, or $null on network error. + param([Parameter(Mandatory = $true)][string]$Repo) + + $map = [ordered]@{} + + try { + Write-LaunchLog "Querying HuggingFace for mmproj files in repo: $Repo" 'INFO' + $info = Get-HuggingFaceModelInfo -Repo $Repo + + if (-not $info.siblings) { + Write-LaunchLog "No siblings found in HF response for $Repo" 'INFO' + return $map + } + + foreach ($s in $info.siblings) { + $name = $s.rfilename + Write-LaunchLog "Checking file: $name" 'INFO' + if (-not $name) { continue } + if ($name -match '/') { continue } # skip subdirs + if ($name -notmatch '^mmproj(?:-.*)?\.gguf$') { + Write-LaunchLog "Skipping non-mmproj file: $name" 'INFO' + continue + } + + $bytes = 0L + if ($s.PSObject.Properties.Match('lfs').Count -gt 0 -and $s.lfs -and $s.lfs.size) { + try { $bytes = [long]$s.lfs.size } catch { } + } + if ($bytes -le 0 -and $s.PSObject.Properties.Match('size').Count -gt 0 -and $s.size) { + try { $bytes = [long]$s.size } catch { } + } + + $sizeGB = if ($bytes -gt 0) { [math]::Round($bytes / 1000000000, 1) } else { 0 } + $map[$name] = $sizeGB + Write-LaunchLog "Found mmproj: $name$(if ($sizeGB -gt 0) { " ($sizeGB GB)" })" 'INFO' + } + + Write-LaunchLog "Total mmproj files found for ${Repo}: $($map.Count)" 'INFO' + return $map + } catch { + Write-LaunchLog "HF mmproj query failed for ${Repo} (network/SSL): $($_.Exception.Message)" 'WARN' + return $null + } } function Get-HuggingFaceModelFiles { @@ -76,7 +124,7 @@ function Get-HuggingFaceReadme { $url = "https://huggingface.co/$Repo/raw/main/README.md" try { - $resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 10 + $resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 10 -SkipCertificateCheck return [string]$resp.Content } catch { return $null diff --git a/local-llm/lib/60-catalog.ps1 b/local-llm/lib/60-catalog.ps1 index 007ce3c..7c4a73d 100644 --- a/local-llm/lib/60-catalog.ps1 +++ b/local-llm/lib/60-catalog.ps1 @@ -249,6 +249,36 @@ function Add-LocalLLMModel { $entry.LlamaCppCompatible = [bool]$LlamaCppCompatible } + # Check for mmproj (multimodal vision module). Prompt the user to download. + if ($PSBoundParameters.ContainsKey('Mmproj')) { + $mmprojFile = [string]$Mmproj + if (-not [string]::IsNullOrWhiteSpace($mmprojFile)) { + $entry.VisionModule = $mmprojFile + } + } + else { + $mmprojFiles = Get-HuggingFaceMmprojFiles -Repo $repo + if ($mmprojFiles.Count -gt 0) { + $mmprojNames = @($mmprojFiles.Keys) -join ', ' + Write-Host "Vision modules (mmproj.gguf) found: $mmprojNames" -ForegroundColor DarkCyan + + $answer = (Read-Host "Download a vision module? [Y/n]").Trim().ToLowerInvariant() + if ($answer -notin @('n', 'no')) { + # Prefer the first available mmproj; fall back to user selection + $chosen = $mmprojNames[0] + $chosen = Read-Host "Which one? (default: $chosen)" + if ([string]::IsNullOrWhiteSpace($chosen)) { + $chosen = [string]$mmprojFiles.Keys | Select-Object -First 1 + } + + if ($mmprojFiles.ContainsKey($chosen)) { + $entry.VisionModule = $chosen + Write-Host "Vision module set to: $chosen" -ForegroundColor DarkGray + } + } + } + } + if ($cfg.Models.Contains($Key)) { $cfg.Models.Remove($Key) } @@ -308,6 +338,7 @@ function addllm { [string[]]$Tags, [string[]]$ExtraArgs, [Nullable[bool]]$LlamaCppCompatible, + [string]$Mmproj, [switch]$Force ) @@ -406,7 +437,30 @@ function Update-LocalLLMModelQuants { if ($addedCount -eq 0) { Write-Host " (no new quants — entry already has every recognized GGUF in $repo)" -ForegroundColor DarkGray - return + } + + # Also check for mmproj files on updatellm. + $mmprojFiles = Get-HuggingFaceMmprojFiles -Repo $repo + if ($mmprojFiles.Count -gt 0) { + if (-not $entry.ContainsKey('VisionModule')) { + $mmprojNames = @($mmprojFiles.Keys) -join ', ' + Write-Host "Vision modules (mmproj.gguf) found: $mmprojNames" -ForegroundColor DarkCyan + + $answer = (Read-Host "Download a vision module? [Y/n]").Trim().ToLowerInvariant() + if ($answer -notin @('n', 'no')) { + $chosen = Read-Host "Which one? (default: $($mmprojFiles.Keys | Select-Object -First 1))" + if ([string]::IsNullOrWhiteSpace($chosen)) { + $chosen = [string]$mmprojFiles.Keys | Select-Object -First 1 + } + if ($mmprojFiles.ContainsKey($chosen)) { + $entry.VisionModule = $chosen + Write-Host "Vision module set to: $chosen" -ForegroundColor DarkGray + } + } + } + else { + Write-Host "Vision module already configured: $($entry.VisionModule)" -ForegroundColor DarkGray + } } if ($DryRun) { diff --git a/local-llm/lib/65-claude-launch.ps1 b/local-llm/lib/65-claude-launch.ps1 index c318522..62cfbb5 100644 --- a/local-llm/lib/65-claude-launch.ps1 +++ b/local-llm/lib/65-claude-launch.ps1 @@ -65,7 +65,7 @@ function Restore-ClaudeEnvBackup { } $script:ClaudeEnvBackup = @{} - Write-Verbose "Claude env vars restored." + Write-LaunchLog "Claude env vars restored." 'INFO' } function Set-ClaudeLocalEnv { @@ -134,19 +134,24 @@ function Start-NoThinkProxy { ) $target = "${TargetHost}:${TargetPort}" + Write-LaunchLog "No-think proxy: target=$target port=$($script:NoThinkProxyPort)" 'PROXY' $targetMatches = Test-NoThinkProxyTarget -ListenPort $script:NoThinkProxyPort -TargetHost $TargetHost -TargetPort $TargetPort if ($targetMatches -eq $true) { + Write-LaunchLog "No-think proxy already running for target=$target" 'PROXY' return } if ($targetMatches -eq $false) { throw "No-think proxy port $($script:NoThinkProxyPort) is already in use by a proxy for a different or unverifiable target. Stop that process or change NoThinkProxyPort." } + $proxyScript = Join-Path $HOME ".ollama-proxy\no-think-proxy.py" + if ($script:NoThinkProxyProcess -and -not $script:NoThinkProxyProcess.HasExited) { + Write-LaunchLog "Reusing existing no-think proxy process (PID=$($script:NoThinkProxyProcess.Id))" 'PROXY' return } - $proxyScript = Join-Path $HOME ".ollama-proxy\no-think-proxy.py" + Write-LaunchLog "Starting no-think proxy: python $proxyScript $port $target" 'PROXY' if (-not (Test-Path $proxyScript)) { throw "No-think proxy not found: $proxyScript. Re-run install.ps1 so Claude/Unshackled launches do not point at a dead proxy URL." @@ -607,7 +612,7 @@ function Start-ClaudeWithOllamaModel { $thinkingLabel = if ($keepThinking) { "kept (direct to Ollama)" } else { "disabled" } Write-Host "" - Write-Host "Launching $backendLabel with $Model via Ollama..." -ForegroundColor Cyan + Write-Host "Launching ${$backendLabel} with $Model via Ollama..." -ForegroundColor Cyan Write-Host " Base URL : $($env:ANTHROPIC_BASE_URL)" -ForegroundColor DarkGray Write-Host " Model : $Model" -ForegroundColor DarkGray Write-Host " Thinking : $thinkingLabel" -ForegroundColor DarkGray @@ -689,6 +694,7 @@ function Start-ClaudeWithLlamaCppModel { [switch]$Unshackled, [switch]$Codex, [switch]$Strict, + [switch]$UseVision, [switch]$AutoBest, [switch]$AutoBestStrict, [ValidateSet('auto','pure','balanced','short','long')][string]$AutoBestProfile = 'auto', @@ -742,13 +748,42 @@ function Start-ClaudeWithLlamaCppModel { 256 } + # Resolve vision module (mmproj) on demand when user opts in; always log availability. + $visionModulePath = if ($UseVision) { + Write-LaunchLog "Resolving vision module for llama.cpp launch (model=$($def.Root))" 'VISION' + $result = Get-ModelVisionModulePath -Key $Key -Def $def -Backend llamacpp + if ($result) { + Write-LaunchLog "Vision module resolved: $([System.IO.Path]::GetFileName($result))" 'VISION' + } else { + Write-LaunchLog "No vision module found for $Key" 'WARN' + } + $result + } else { + $avail = Test-ModelVisionModuleAvailable -Key $Key -Def $def -Backend llamacpp + if ($avail.Local) { + Write-LaunchLog "Vision available locally ($($avail.Filename)) — not loaded (no -UseVision)" 'VISION' + } elseif ($avail.AvailableOnHF) { + Write-LaunchLog "Vision available on HuggingFace ($($avail.Filename)) — not loaded (no -UseVision)" 'VISION' + } else { + Write-LaunchLog "No vision module available for $Key (llama.cpp)" 'VISION' + } + '' + } + + if ($UseVision -and $visionModulePath) { + Write-Host "Vision: loaded mmproj $([System.IO.Path]::GetFileName($visionModulePath))" -ForegroundColor DarkCyan + } elseif ($UseVision) { + Write-Warning "Vision requested but no mmproj found for $Key" + } + $buildParams = @{ - Def = $def - ContextKey = $ContextKey - Mode = $Mode - ModelArgPath = $modelArgPath - Port = $port - ThinkingPolicy = $thinkingPolicy + Def = $def + ContextKey = $ContextKey + Mode = $Mode + ModelArgPath = $modelArgPath + Port = $port + ThinkingPolicy = $thinkingPolicy + VisionModulePath = $(if ($visionModulePath) { $visionModulePath } else { '' }) } if ($agentParallel -gt 0) { $buildParams.Parallel = $agentParallel } if ($agentCacheReuse -gt 0) { $buildParams.CacheReuse = $agentCacheReuse } @@ -793,7 +828,7 @@ function Start-ClaudeWithLlamaCppModel { Write-Warning "AutoBest: $reason" } } - $tunable = @('KvK','KvV','NGpuLayers','NCpuMoe','UbatchSize','BatchSize','Threads','ThreadsBatch','Mlock','NoMmap','FlashAttn','SplitMode') + $tunable = @('KvK','KvV','NGpuLayers','NCpuMoe','UbatchSize','BatchSize','Threads','ThreadsBatch','Mlock','NoMmap','FlashAttn','SplitMode','SwaFull','CachePrompt','CacheReuse') foreach ($k in $tunable) { if ($buildParams.ContainsKey($k)) { continue } $val = $null @@ -844,10 +879,15 @@ function Start-ClaudeWithLlamaCppModel { Write-Host "Starting llama-server for $($def.Root) via llama.cpp ($Mode)..." -ForegroundColor Cyan Write-Host " Server : $serverPath" -ForegroundColor DarkGray Write-Host " GGUF : $ggufPath" -ForegroundColor DarkGray + if ($VisionModulePath) { + Write-Host " Vision : $([System.IO.Path]::GetFileName($VisionModulePath))" -ForegroundColor DarkCyan + } Write-Host " Port : $port" -ForegroundColor DarkGray Write-Host " Logs : $($logPaths.Out)" -ForegroundColor DarkGray Write-Host " $($logPaths.Err)" -ForegroundColor DarkGray + Write-LaunchLog "llama-server: path=$serverPath port=$port gguf=$ggufPath mode=$Mode" 'SERVER' + $proc = Start-LlamaServerNative -ServerPath $serverPath -ServerArgs $serverArgs -OutLogPath $logPaths.Out -ErrLogPath $logPaths.Err $session = @{ @@ -887,6 +927,9 @@ function Start-ClaudeWithLlamaCppModel { Write-Host " Base URL : http://localhost:$port/v1" -ForegroundColor DarkGray Write-Host " Model : $($def.Root)" -ForegroundColor DarkGray Write-Host " GGUF : $ggufPath" -ForegroundColor DarkGray + if ($VisionModulePath) { + Write-Host " Vision : $([System.IO.Path]::GetFileName($VisionModulePath))" -ForegroundColor DarkCyan + } Write-Host " Port : $port" -ForegroundColor DarkGray Write-Host " Strict : $([bool]$Strict)" -ForegroundColor DarkGray Write-Host "" @@ -953,6 +996,9 @@ function Start-ClaudeWithLlamaCppModel { Write-Host " Base URL : $effectiveBaseUrl" -ForegroundColor DarkGray Write-Host " Model : $($def.Root)" -ForegroundColor DarkGray Write-Host " GGUF : $ggufPath" -ForegroundColor DarkGray + if ($VisionModulePath) { + Write-Host " Vision : $([System.IO.Path]::GetFileName($VisionModulePath))" -ForegroundColor DarkCyan + } Write-Host " Port : $port" -ForegroundColor DarkGray $agentSlotsLabel = if ($agentParallel -gt 0) { [string]$agentParallel } else { 'auto' } $agentCacheReuseLabel = if ($agentCacheReuse -gt 0) { [string]$agentCacheReuse } else { 'default' } @@ -979,6 +1025,8 @@ function Start-ClaudeWithLlamaCppModel { ) } + Write-LaunchLog "Launching ${$backendLabel}: model=$($def.Root) base=$effectiveBaseUrl unshackled=$Unshackled" 'LAUNCH' + if ($Unshackled) { $extras = Get-UnshackledExtraArgs -Param $ExtraUnshackledArgs Invoke-UnshackledCli @launchArgs @extras diff --git a/local-llm/lib/71-benchpilot-bridge.ps1 b/local-llm/lib/71-benchpilot-bridge.ps1 index dc55c94..c611534 100644 --- a/local-llm/lib/71-benchpilot-bridge.ps1 +++ b/local-llm/lib/71-benchpilot-bridge.ps1 @@ -157,7 +157,7 @@ function Test-BenchPilotIntegrationAvailable { catch { $result.Reason = $_.Exception.Message if (-not $Quiet) { - Write-Verbose $result.Reason + Write-LaunchLog "BenchPilot check failed: $($result.Reason)" 'WARN' } return [pscustomobject]$result } @@ -183,6 +183,7 @@ function Invoke-BenchPilotLauncherFindBest { [ValidateSet('pure','balanced','both')][string]$Profile = 'pure', [ValidateSet('greedy','beam')][string]$SearchStrategy, [int]$BeamWidth = 1, + [int[]]$NCpuMoeCandidates, [switch]$NoSave ) @@ -222,10 +223,26 @@ function Invoke-BenchPilotLauncherFindBest { if ($PSBoundParameters.ContainsKey('BeamWidth')) { $params.BeamWidth = $BeamWidth } + if ($PSBoundParameters.ContainsKey('NCpuMoeCandidates') -and $NCpuMoeCandidates -and $NCpuMoeCandidates.Count -gt 0) { + $params.NCpuMoeCandidates = $NCpuMoeCandidates + } Find-BenchPilotBestConfig @params } +function Get-BenchPilotTopNCpuMoeValues { + param( + [Parameter(Mandatory = $true)][string]$Key, + [AllowEmptyString()][string]$ContextKey = '', + [int]$TopN = 5 + ) + try { Import-BenchPilotModule | Out-Null } catch { return @() } + if (Get-Command Get-LlamaCppTopNCpuMoeFromCandidates -ErrorAction SilentlyContinue) { + return @(Get-LlamaCppTopNCpuMoeFromCandidates -Key $Key -ContextKey $ContextKey -TopN $TopN) + } + return @() +} + function Get-BenchPilotLauncherBestConfig { [CmdletBinding()] param( diff --git a/local-llm/lib/72-llamacpp-tuner.ps1 b/local-llm/lib/72-llamacpp-tuner.ps1 index b67c65b..c91a069 100644 --- a/local-llm/lib/72-llamacpp-tuner.ps1 +++ b/local-llm/lib/72-llamacpp-tuner.ps1 @@ -50,7 +50,7 @@ function Format-LlamaCppOverrides { param([Parameter(Mandatory = $true)]$Overrides) $parts = @() - foreach ($k in @('NGpuLayers','NCpuMoe','UbatchSize','BatchSize','Threads','FlashAttn','Mlock','NoMmap','KvK','KvV')) { + foreach ($k in @('NGpuLayers','NCpuMoe','UbatchSize','BatchSize','Threads','FlashAttn','Mlock','NoMmap','KvK','KvV','SwaFull','CachePrompt','CacheReuse')) { $value = $null $hasValue = $false @@ -458,6 +458,7 @@ function Find-BestLlamaCppConfig { [ValidateSet('pure','balanced','both')][string]$Profile = 'pure', [ValidateSet('greedy','beam')][string]$SearchStrategy, [int]$BeamWidth = 1, + [int[]]$NCpuMoeCandidates, [switch]$NoSave ) diff --git a/local-llm/lib/75-display.ps1 b/local-llm/lib/75-display.ps1 index d237f40..82e6f67 100644 --- a/local-llm/lib/75-display.ps1 +++ b/local-llm/lib/75-display.ps1 @@ -96,7 +96,7 @@ function Test-LocalLLMSpectreAvailable { $script:LocalLLMSpectreState = $true return $true } catch { - Write-Verbose "PwshSpectreConsole import failed: $($_.Exception.Message)" + Write-LaunchLog "PwshSpectreConsole import failed: $($_.Exception.Message)" 'WARN' $script:LocalLLMSpectreState = $false return $false } @@ -266,6 +266,10 @@ function Show-ModelDetailSpectre { $headerLines.Add(("[grey70]Source[/] : {0}" -f (ConvertTo-LocalLLMSpectreSafe $source))) | Out-Null $headerLines.Add(("[grey70]Parser[/] : {0} [grey70]LimitTools[/]: {1}" -f (ConvertTo-LocalLLMSpectreSafe $parser), $limitTools)) | Out-Null + if ($def.ContainsKey('VisionModule') -and -not [string]::IsNullOrWhiteSpace($def.VisionModule)) { + $headerLines.Add(("[grey70]Vision[/] : {0}" -f (ConvertTo-LocalLLMSpectreSafe $def.VisionModule))) | Out-Null + } + if ($def.ContainsKey('ParserNote') -and $def.ParserNote) { $headerLines.Add(("[grey50]Note[/] : {0}" -f (ConvertTo-LocalLLMSpectreSafe $def.ParserNote))) | Out-Null } @@ -508,6 +512,10 @@ function Show-ModelDetailFallback { Write-Host " Source : $source" -ForegroundColor DarkGray Write-Host " Parser : $($def.Parser) LimitTools: $([bool]$def.LimitTools)" -ForegroundColor DarkGray + if ($def.ContainsKey('VisionModule') -and -not [string]::IsNullOrWhiteSpace($def.VisionModule)) { + Write-Host " Vision : $($def.VisionModule)" -ForegroundColor DarkGray + } + if ($def.ContainsKey('ParserNote') -and $def.ParserNote) { Write-Host " Note : $($def.ParserNote)" -ForegroundColor DarkGray } @@ -586,7 +594,7 @@ function Show-LocalBoxCommandReference { } Write-Host "LocalBox model commands" -ForegroundColor Green - Write-Host " One function is generated for each configured model. Use -Ctx, -Chat, -Codex, -Q8, -Strict, -Unshackled, and -Quant where supported." -ForegroundColor DarkGray + Write-Host " One function is generated for each configured model. Use -Ctx, -Chat, -Codex, -Q8, -Strict, -Unshackled, -UseVision, and -Quant where supported." -ForegroundColor DarkGray foreach ($key in (@(Get-ModelKeys) | Sort-Object)) { $def = Get-ModelDef -Key $key $name = Get-ModelShortcutName -Def $def @@ -614,6 +622,7 @@ function Show-LocalBoxCommandReference { Write-CommandRow -Command "llmdefaultcodex" -Description "Launch the default model through Codex." Write-CommandRow -Command "llmdefaultchat" -Description "Launch the default model as plain Ollama chat." Write-CommandRow -Command "llmlogerr, llmlogerrclear" -Description "Show or clear wizard error logs." + Write-CommandRow -Command "llmlog" -Description "Show launch debug log (~/.local-llm/launch.log)." Write-Host "" Write-Host "LocalBox model setup and catalog" -ForegroundColor Green @@ -791,6 +800,8 @@ One function per model — flags select what to do. q36p -Chat Raw ollama chat, no Claude Code q36p -Q8 Use q8 KV cache for higher quality q36p -Quant q6kp Switch the GGUF quant (rebuilds aliases) + q36p -UseVision Launch with vision (mmproj) support + llm -UseVision Same, via wizard (skips vision prompt if mmproj available) llmdefault Launch the configured Default model llm Guided wizard (Spectre when available) llmc Native selectable wizard @@ -804,6 +815,7 @@ Flags Refused above $q8MaxLabel tokens — q8 KV at long context OOMs a 24GB card. Override the threshold with: Set-LocalLLMSetting Q8KvMaxContext 262144 -Quant Switch the model's selected quant (no launch). GGUF models only. + -UseVision Enable vision/multimodal support (loads mmproj module). Requires a model with VisionModule configured or an mmproj-*.gguf file present. Tradeoffs / sizes Per-quant and per-context tradeoffs (file size, KV pressure, when to pick what) diff --git a/local-llm/lib/85-shortcuts.ps1 b/local-llm/lib/85-shortcuts.ps1 index ebac999..b6bc8f3 100644 --- a/local-llm/lib/85-shortcuts.ps1 +++ b/local-llm/lib/85-shortcuts.ps1 @@ -12,6 +12,7 @@ function Invoke-ModelShortcut { [switch]$Codex, [switch]$Chat, [switch]$Strict, + [switch]$UseVision, [string[]]$ExtraUnshackledArgs ) @@ -40,10 +41,32 @@ function Invoke-ModelShortcut { } } + # Resolve vision module (mmproj) on demand when user opts in; always log availability. + $visionModulePath = if ($UseVision) { + Write-LaunchLog "Resolving vision module for Ollama launch (model=$Key)" 'VISION' + $result = Get-ModelVisionModulePath -Key $Key -Def $def -Backend ollama + if ($result) { + Write-LaunchLog "Vision module resolved: $([System.IO.Path]::GetFileName($result))" 'VISION' + } else { + Write-LaunchLog "No vision module found for $Key (Ollama)" 'WARN' + } + $result + } else { + $avail = Test-ModelVisionModuleAvailable -Key $Key -Def $def -Backend ollama + if ($avail.Local) { + Write-LaunchLog "Vision available locally ($($avail.Filename)) — not loaded (no -UseVision)" 'VISION' + } elseif ($avail.AvailableOnHF) { + Write-LaunchLog "Vision available on HuggingFace ($($avail.Filename)) — not loaded (no -UseVision)" 'VISION' + } else { + Write-LaunchLog "No vision module available for $Key (Ollama)" 'VISION' + } + '' + } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $Key -ContextKey $ContextKey } else { - Ensure-ModelAlias -Key $Key -ContextKey $ContextKey + Ensure-ModelAlias -Key $Key -ContextKey $ContextKey -VisionModulePath $(if ($visionModulePath) { $visionModulePath } else { '' }) } if ($Chat) { @@ -63,6 +86,8 @@ function Invoke-ModelShortcut { "strip" } + Write-LaunchLog "Ollama launch: model=$modelName vision=[$visionModulePath] unshackled=$Unshackled codex=$Codex strict=$Strict" 'LAUNCH' + $startArgs = @{ Model = $modelName Tools = $toolsList diff --git a/local-llm/lib/90-wizard.ps1 b/local-llm/lib/90-wizard.ps1 index 088e70e..7a2db68 100644 --- a/local-llm/lib/90-wizard.ps1 +++ b/local-llm/lib/90-wizard.ps1 @@ -363,6 +363,29 @@ function Set-ModelQuantForSelectedLaunch { Write-Host "$ModelKey session quant set to $resolvedQuant -> $($def.Quants[$resolvedQuant])" -ForegroundColor Green } +function Read-LLMVisionToggle { + # Returns $true (vision on), $false (vision off), or $null (back). + $items = @( + [pscustomobject]@{ Key = $false; Label = 'No'; Description = 'Launch without vision support' }, + [pscustomobject]@{ Key = $true; Label = 'Yes'; Description = 'Load multimodal module (mmproj.gguf) for vision' } + ) + + $idx = Read-LLMChoiceIndex ` + -Title "Use vision (multimodal)?" ` + -Items $items ` + -ZeroLabel "Back" ` + -Label { + param($item, $i) + "$($item.Label) - $($item.Description)" + } + + if ($idx -lt 0) { + return $null + } + + return [bool]$items[$idx].Key +} + function Read-LLMQ8Toggle { # Returns $true (q8 on), $false (q8 off), or $null (back). $items = @( @@ -488,6 +511,106 @@ function Read-LLMTuneOptimize { return [string]$items[$idx].Key } +function Read-LLMTuneKvVariation { + param([Parameter(Mandatory = $true)][string]$Mode) + + $items = @( + [pscustomobject]@{ Key = 'no'; Label = 'No'; Description = 'Keep current KV type only' }, + [pscustomobject]@{ Key = 'yes'; Label = 'Yes'; Description = 'Widen within the quality class' } + ) + if ($Mode -eq 'turboquant') { + $items += [pscustomobject]@{ Key = 'turbo-only'; Label = 'Turbo'; Description = 'Test turbo3 and turbo4 only' } + } + + $idx = Read-LLMChoiceIndex ` + -Title "Allow KV cache variation?" ` + -Items $items ` + -ZeroLabel "Back" ` + -Label { + param($item, $i) + "$($item.Label) - $($item.Description)" + } + + if ($idx -lt 0) { return $null } + return [string]$items[$idx].Key +} + +function Read-LLMTuneNCpuMoeRange { + param( + [Parameter(Mandatory = $true)][string]$ModelKey, + [AllowEmptyString()][string]$ContextKey = '' + ) + + $topValues = @() + if (Get-Command Get-BenchPilotTopNCpuMoeValues -ErrorAction SilentlyContinue) { + $topValues = @(Get-BenchPilotTopNCpuMoeValues -Key $ModelKey -ContextKey $ContextKey -TopN 5) + } + + $parseRange = { + param([string]$input) + $trimmed = $input.Trim() + if ($trimmed -match '^(\d+)-(\d+):(\d+)$') { + $start = [int]$Matches[1]; $end = [int]$Matches[2]; $step = [int]$Matches[3] + if ($step -lt 1) { $step = 1 } + $vals = @() + for ($v = $start; $v -le $end; $v += $step) { $vals += $v } + return @($vals | Select-Object -Unique) + } + return $null + } + + if ($topValues.Count -gt 0) { + $topStr = (($topValues | Sort-Object | ForEach-Object { [string]$_ }) -join ', ') + $minVal = ($topValues | Measure-Object -Minimum).Minimum + $maxVal = ($topValues | Measure-Object -Maximum).Maximum + $items = @( + [pscustomobject]@{ Key = 'step1'; Label = 'Yes — step=1'; Description = "Test NCpuMoe $minVal..$maxVal in steps of 1 (previous best: $topStr)" }, + [pscustomobject]@{ Key = 'default'; Label = 'No — defaults'; Description = 'Use auto-generated candidate range' }, + [pscustomobject]@{ Key = 'custom'; Label = 'Custom'; Description = 'Enter own range (format: start-end:step, e.g. 15-40:2)' } + ) + $idx = Read-LLMChoiceIndex ` + -Title "Refine NCpuMoe around previous best?" ` + -Items $items -ZeroLabel "Back" ` + -Label { param($item, $i) "$($item.Label) - $($item.Description)" } + + if ($idx -lt 0) { return $null } + $choice = $items[$idx].Key + + if ($choice -eq 'step1') { + return @([int]$minVal..[int]$maxVal) + } + if ($choice -eq 'custom') { + $raw = Read-Host "NCpuMoe range (start-end:step, e.g. 20-40:1)" + if ([string]::IsNullOrWhiteSpace($raw)) { return $null } + $parsed = & $parseRange $raw + if ($parsed -and $parsed.Count -gt 0) { return $parsed } + Write-Warning "Could not parse '$raw' as start-end:step range. Using defaults." + return $null + } + return $null + } + + # No history: offer defaults or custom entry + $items = @( + [pscustomobject]@{ Key = 'default'; Label = 'Use defaults'; Description = 'Auto-generate NCpuMoe candidates from model catalog' }, + [pscustomobject]@{ Key = 'custom'; Label = 'Custom range'; Description = 'Enter your own range (format: start-end:step, e.g. 15-40:2)' } + ) + $idx = Read-LLMChoiceIndex ` + -Title "NCpuMoe expert offload range" ` + -Items $items -ZeroLabel "Back" ` + -Label { param($item, $i) "$($item.Label) - $($item.Description)" } + + if ($idx -lt 0) { return $null } + if ($items[$idx].Key -eq 'custom') { + $raw = Read-Host "NCpuMoe range (start-end:step, e.g. 20-40:1)" + if ([string]::IsNullOrWhiteSpace($raw)) { return $null } + $parsed = & $parseRange $raw + if ($parsed -and $parsed.Count -gt 0) { return $parsed } + Write-Warning "Could not parse '$raw' as start-end:step range. Using defaults." + } + return $null +} + function Read-LLMTuneProfile { $items = @( [pscustomobject]@{ Key = 'pure'; Label = 'Pure'; Description = 'Fastest measured LLM throughput' }, @@ -539,25 +662,42 @@ function Invoke-LlamaCppTunerWizardFlow { if ([string]::IsNullOrWhiteSpace($selectionProfile)) { return } $allowKv = if ($UseSpectrePrompts) { - Read-LLMTuneKvVariationSpectre + Read-LLMTuneKvVariationSpectre -Mode $Mode } else { - Write-Host "" - Write-Host "Widens the search to other types in your quality class." -ForegroundColor DarkGray - Read-LLMYesNo -Prompt "Allow KV cache variation?" -DefaultYes:$false + Read-LLMTuneKvVariation -Mode $Mode } + if ([string]::IsNullOrWhiteSpace($allowKv)) { return } - $allowedKvTypes = $null - if ($allowKv) { - $allowedKvTypes = if ($Mode -eq 'turboquant') { - @('q8_0', 'f16', 'turbo3', 'turbo4') - } else { - @('q8_0', 'f16') - } + $allowedKvTypes = switch ($allowKv) { + 'yes' { if ($Mode -eq 'turboquant') { @('q8_0', 'f16', 'turbo3', 'turbo4') } else { @('q8_0', 'f16') } } + 'turbo-only' { @('turbo3', 'turbo4') } + default { $null } } $def = Get-ModelDef -Key $ModelKey $quant = if ($def.Contains('Quant')) { [string]$def.Quant } else { '' } - $result = Find-BestLlamaCppConfig -Key $ModelKey -ContextKey $ContextKey -Mode $Mode -Quant $quant -AllowedKvTypes $allowedKvTypes -Deep:$useDeep -Optimize $optimize -Profile $selectionProfile -NoSave + + $ncpuMoeCandidates = $null + $isMoE = $def.Contains('NCpuMoe') -and $null -ne $def.NCpuMoe + if ($isMoE -and -not $UseSpectrePrompts) { + $ncpuMoeCandidates = Read-LLMTuneNCpuMoeRange -ModelKey $ModelKey -ContextKey $ContextKey + } + + $findParams = @{ + Key = $ModelKey + ContextKey = $ContextKey + Mode = $Mode + Quant = $quant + AllowedKvTypes = $allowedKvTypes + Deep = $useDeep + Optimize = $optimize + Profile = $selectionProfile + NoSave = $true + } + if ($ncpuMoeCandidates -and $ncpuMoeCandidates.Count -gt 0) { + $findParams.NCpuMoeCandidates = [int[]]$ncpuMoeCandidates + } + $result = Find-BestLlamaCppConfig @findParams $results = @($result | Where-Object { $_ }) Write-Host "" @@ -1042,12 +1182,13 @@ function Invoke-LLMSelection { [string]$KvCacheV, [switch]$UseQ8, [switch]$Strict, + [switch]$UseVision, [switch]$UseAutoBest, [ValidateSet('auto','pure','balanced','short','long')][string]$AutoBestProfile = 'auto', [switch]$UseSpectrePrompts ) - if ($Backend -eq 'llamacpp') { + if ($Backend -eq 'llamacpp') { $def = Get-ModelDef -Key $ModelKey switch ($Action) { @@ -1055,21 +1196,21 @@ function Invoke-LLMSelection { Invoke-Backend -Action launch-claude -Backend llamacpp ` -Key $ModelKey -ContextKey $ContextKey ` -LlamaCppMode $LlamaCppMode -KvCacheK $KvCacheK -KvCacheV $KvCacheV ` - -LimitTools:([bool]$def.LimitTools) -Strict:$Strict -AutoBest:$UseAutoBest -AutoBestProfile $AutoBestProfile + -LimitTools:([bool]$def.LimitTools) -Strict:$Strict -UseVision:$UseVision -AutoBest:$UseAutoBest -AutoBestProfile $AutoBestProfile } "codex" { Start-ClaudeWithLlamaCppModel ` -Key $ModelKey -ContextKey $ContextKey -Mode $LlamaCppMode ` -KvCacheK $KvCacheK -KvCacheV $KvCacheV ` - -LimitTools:([bool]$def.LimitTools) -Strict:$Strict -AutoBest:$UseAutoBest -AutoBestProfile $AutoBestProfile -Codex + -LimitTools:([bool]$def.LimitTools) -Strict:$Strict -UseVision:$UseVision -AutoBest:$UseAutoBest -AutoBestProfile $AutoBestProfile -Codex } "unshackled" { Invoke-Backend -Action launch-claude -Backend llamacpp ` -Key $ModelKey -ContextKey $ContextKey ` -LlamaCppMode $LlamaCppMode -KvCacheK $KvCacheK -KvCacheV $KvCacheV ` - -LimitTools:([bool]$def.LimitTools) -Unshackled -Strict:$Strict -AutoBest:$UseAutoBest -AutoBestProfile $AutoBestProfile + -LimitTools:([bool]$def.LimitTools) -Unshackled -Strict:$Strict -UseVision:$UseVision -AutoBest:$UseAutoBest -AutoBestProfile $AutoBestProfile } "setup" { @@ -1096,33 +1237,39 @@ function Invoke-LLMSelection { # Ollama backend. switch ($Action) { "chat" { - Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -Chat -UseQ8:$UseQ8 -Strict:$Strict + Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -Chat -UseQ8:$UseQ8 -Strict:$Strict -UseVision:$UseVision } "unshackled" { - Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -Unshackled -UseQ8:$UseQ8 -Strict:$Strict + Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -Unshackled -UseQ8:$UseQ8 -Strict:$Strict -UseVision:$UseVision } "codex" { - Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -Codex -UseQ8:$UseQ8 -Strict:$Strict + Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -Codex -UseQ8:$UseQ8 -Strict:$Strict -UseVision:$UseVision } "claude" { - Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -UseQ8:$UseQ8 -Strict:$Strict + Invoke-ModelShortcut -Key $ModelKey -ContextKey $ContextKey -UseQ8:$UseQ8 -Strict:$Strict -UseVision:$UseVision } "benchmark" { - $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ContextKey $ContextKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey } + $defBenchmark = Get-ModelDef -Key $ModelKey + $visionBM = if ($UseVision) { Get-ModelVisionModulePath -Key $ModelKey -Def $defBenchmark -Backend ollama } else { '' } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey -VisionModulePath $(if ($visionBM) { $visionBM } else { '' }) } Test-OllamaSpeed -Model $modelName -Runs 3 } "setup" { - $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ContextKey $ContextKey -ForceRebuild } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey -ForceRebuild } + $defSetup = Get-ModelDef -Key $ModelKey + $visionSetup = if ($UseVision) { Get-ModelVisionModulePath -Key $ModelKey -Def $defSetup -Backend ollama } else { '' } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ForceRebuild } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey -ForceRebuild -VisionModulePath $(if ($visionSetup) { $visionSetup } else { '' }) } Write-Host "Created/rebuilt alias: $modelName" -ForegroundColor Green } "show" { - $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey -ContextKey $ContextKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey } + $defShow = Get-ModelDef -Key $ModelKey + $visionShow = if ($UseVision) { Get-ModelVisionModulePath -Key $ModelKey -Def $defShow -Backend ollama } else { '' } + $modelName = if ($Strict) { Ensure-ModelStrictAlias -Key $ModelKey } else { Ensure-ModelAlias -Key $ModelKey -ContextKey $ContextKey -VisionModulePath $(if ($visionShow) { $visionShow } else { '' }) } & ollama show $modelName } @@ -1133,11 +1280,17 @@ function Invoke-LLMSelection { } function Start-LLMWizardClassic { + [CmdletBinding()] + param( + [switch]$UseVision + ) + $modelKey = $null $contextKey = $null $action = $null $useQ8 = $false $useStrict = $false + $useVisionFlag = [bool]$UseVision $backend = 'ollama' $llamaCppMode = $null $kvK = $null @@ -1161,6 +1314,7 @@ function Start-LLMWizardClassic { $useAutoBest = $false $autoBestProfile = 'auto' $saveAsDefault = $false + $useVisionFlag = $false $step = 'quant' } @@ -1187,6 +1341,39 @@ function Start-LLMWizardClassic { 'llamacpp-native' { $backend = 'llamacpp'; $llamaCppMode = 'native' } 'llamacpp-turboquant' { $backend = 'llamacpp'; $llamaCppMode = 'turboquant' } } + + # Check if this model has vision support: configured VisionModule, local mmproj file, or HF available. + $visionAvail = Test-ModelVisionModuleAvailable -Key $modelKey -Def $def -Backend $backend + $hasVision = $visionAvail.Local -or $visionAvail.AvailableOnHF + if (-not $hasVision) { + $step = if (Get-ModelStrictEnabled -Def $def) { 'strict' } else { 'context' } + } elseif ($useVisionFlag) { + # Pre-set by -UseVision flag; skip vision prompt. + $step = if (Get-ModelStrictEnabled -Def $def) { 'strict' } else { 'context' } + } else { + $step = 'vision' + } + } + + 'vision' { + # Re-check availability after potential download from previous step + if (-not $visionAvail) { + $visionAvail = Test-ModelVisionModuleAvailable -Key $modelKey -Def $def -Backend $backend + } + if (-not $visionAvail.Local -and $visionAvail.AvailableOnHF) { + Write-Host "Downloading vision module '$($visionAvail.Filename)' from HuggingFace..." -ForegroundColor Yellow + try { + $ollamaFolder = Join-Path $script:Cfg.OllamaCommunityRoot $def.Root + Download-HuggingFaceFile -Repo $def.Repo -FileName $visionAvail.Filename -DestinationFolder $ollamaFolder | Out-Null + Write-Host "Downloaded '$($visionAvail.Filename)'." -ForegroundColor Green + $visionAvail.Local = $true + } catch { + Write-Warning "Failed to download vision module: $_" + } + } + $useVision = Read-LLMVisionToggle + if ($null -eq $useVision) { $step = 'backend'; break } # back + $useVisionFlag = [bool]$useVision $step = if (Get-ModelStrictEnabled -Def $def) { 'strict' } else { 'context' } } @@ -1195,7 +1382,15 @@ function Start-LLMWizardClassic { if ($null -eq $strict) { $step = 'backend'; break } # back $useStrict = [bool]$strict - $step = 'context' + if ($useStrict) { + # Strict pins context to Get-ModelStrictBaseContextKey via the + # alias build; the empty contextKey is correct here because the + # shortcut layer rejects -Strict + -Ctx together. + $contextKey = "" + $step = 'action' + } else { + $step = 'context' + } } 'context' { @@ -1211,7 +1406,7 @@ function Start-LLMWizardClassic { 'action' { $action = Select-LLMAction -Backend $backend if ([string]::IsNullOrWhiteSpace($action)) { - $step = 'context' + $step = if ($useStrict) { 'strict' } else { 'context' } break } @@ -1286,7 +1481,7 @@ function Start-LLMWizardClassic { } else { Invoke-LLMSelection -ModelKey $modelKey -ContextKey $contextKey -Action $action ` -Backend $backend -LlamaCppMode $llamaCppMode ` - -KvCacheK $kvK -KvCacheV $kvV -UseQ8:$useQ8 -Strict:$useStrict -UseAutoBest:$useAutoBest -AutoBestProfile $autoBestProfile + -KvCacheK $kvK -KvCacheV $kvV -UseQ8:$useQ8 -Strict:$useStrict -UseVision:$useVisionFlag -UseAutoBest:$useAutoBest -AutoBestProfile $autoBestProfile } } catch { @@ -1604,6 +1799,19 @@ function Select-LLMKvCacheSpectre { return @{ K = $value; V = $value } } +function Read-LLMVisionToggleSpectre { + # Returns $true (vision on), $false (vision off), or $null (back). + $choices = [ordered]@{ + 'No - launch without vision support' = $false + 'Yes - load multimodal module (mmproj.gguf)' = $true + '[[Back]]' = '__back__' + } + $chosen = Read-SpectreSelection -Message "Use vision (multimodal)?" -Choices @($choices.Keys) -PageSize 5 + if ($null -eq $chosen) { return $null } + if ($chosen -eq '[[Back]]') { return $null } + return [bool]$choices[$chosen] +} + function Read-LLMQ8ToggleSpectre { # Returns $true (q8 on), $false (q8 off), or $null (back). # Important: check the label string for back BEFORE looking up the boolean @@ -1660,13 +1868,18 @@ function Read-LLMYesNoSpectre { } function Read-LLMTuneKvVariationSpectre { + param([Parameter(Mandatory = $true)][string]$Mode) + $choices = [ordered]@{ - 'No - keep current KV type only' = $false - 'Yes - widen within the quality class' = $true + 'No - keep current KV type only' = 'no' + 'Yes - widen within the quality class' = 'yes' + } + if ($Mode -eq 'turboquant') { + $choices['Turbo - test turbo3 and turbo4 only'] = 'turbo-only' } $chosen = Read-SpectreSelection -Message "Allow KV cache variation? Widens the search to other types in your quality class." -Choices @($choices.Keys) -PageSize 4 - if ($null -eq $chosen) { return $false } - return [bool]$choices[$chosen] + if ($null -eq $chosen) { return 'no' } + return [string]$choices[$chosen] } function Read-LLMTuneDepthSpectre { @@ -1819,11 +2032,17 @@ function Invoke-LLMSpectreTransitionCooldown { } function Start-LLMWizardSpectre { + [CmdletBinding()] + param( + [switch]$UseVision + ) + $modelKey = $null $contextKey = $null $action = $null $useQ8 = $false $useStrict = $false + $useVisionFlag = [bool]$UseVision $backend = 'ollama' $llamaCppMode = $null $kvK = $null @@ -1887,6 +2106,41 @@ function Start-LLMWizardSpectre { 'llamacpp-native' { $backend = 'llamacpp'; $llamaCppMode = 'native' } 'llamacpp-turboquant' { $backend = 'llamacpp'; $llamaCppMode = 'turboquant' } } + # Check if this model has vision support: configured VisionModule, local mmproj file, or HF available. + $visionAvail = Test-ModelVisionModuleAvailable -Key $modelKey -Def $def -Backend $backend + $hasVision = $visionAvail.Local -or $visionAvail.AvailableOnHF + if (-not $hasVision) { + $step = if (Get-ModelStrictEnabled -Def $def) { 'strict' } else { 'context' } + } elseif ($useVisionFlag) { + # Pre-set by -UseVision flag; skip vision prompt. + $step = if (Get-ModelStrictEnabled -Def $def) { 'strict' } else { 'context' } + } else { + $step = 'vision' + } + } + + 'vision' { + # Re-check availability after potential download from previous step + if (-not $visionAvail) { + $visionAvail = Test-ModelVisionModuleAvailable -Key $modelKey -Def $def -Backend $backend + } + if (-not $visionAvail.Local -and $visionAvail.AvailableOnHF) { + Write-Host "Downloading vision module '$($visionAvail.Filename)' from HuggingFace..." -ForegroundColor Yellow + try { + $ollamaFolder = Join-Path $script:Cfg.OllamaCommunityRoot $def.Root + Download-HuggingFaceFile -Repo $def.Repo -FileName $visionAvail.Filename -DestinationFolder $ollamaFolder | Out-Null + Write-Host "Downloaded '$($visionAvail.Filename)'." -ForegroundColor Green + $visionAvail.Local = $true + } catch { + Write-Warning "Failed to download vision module: $_" + } + } + $capturedBackend = $backend + $useVision = Invoke-LLMWizardStep -Context 'vision-toggle' -Default $null -Action { + Read-LLMVisionToggleSpectre + } + if ($null -eq $useVision) { $step = 'backend'; break } + $useVisionFlag = [bool]$useVision $step = if (Get-ModelStrictEnabled -Def $def) { 'strict' } else { 'context' } } @@ -1897,7 +2151,12 @@ function Start-LLMWizardSpectre { if ($null -eq $strict) { $step = 'backend'; break } $useStrict = [bool]$strict - $step = 'context' + if ($useStrict) { + $contextKey = "" + $step = 'action' + } else { + $step = 'context' + } } 'context' { @@ -1918,7 +2177,7 @@ function Start-LLMWizardSpectre { Select-LLMActionSpectre -Backend $captured } if ([string]::IsNullOrWhiteSpace($action)) { - $step = 'context' + $step = if ($useStrict) { 'strict' } else { 'context' } break } @@ -2006,7 +2265,7 @@ function Start-LLMWizardSpectre { } else { Invoke-LLMSelection -ModelKey $modelKey -ContextKey $contextKey -Action $action ` -Backend $backend -LlamaCppMode $llamaCppMode ` - -KvCacheK $kvK -KvCacheV $kvV -UseQ8:$useQ8 -Strict:$useStrict -UseAutoBest:$useAutoBest -AutoBestProfile $autoBestProfile -UseSpectrePrompts + -KvCacheK $kvK -KvCacheV $kvV -UseQ8:$useQ8 -Strict:$useStrict -UseVision:$useVisionFlag -UseAutoBest:$useAutoBest -AutoBestProfile $autoBestProfile -UseSpectrePrompts } } catch { @@ -2020,6 +2279,48 @@ function Start-LLMWizardSpectre { } } +function Get-LocalLLMLaunchLogPath { + $dir = Join-Path $HOME ".local-llm" + if (-not (Test-Path $dir)) { + New-Item -ItemType Directory -Force -Path $dir | Out-Null + } + return (Join-Path $dir "launch.log") +} + +function Write-LaunchLog { + # Appends a timestamped line to ~/.local-llm/launch.log for debugging + # launch flows (vision, proxy, llama-server, claude). Follow with: + # Get-Content -Tail 50 -Wait (Join-Path $HOME '.local-llm' 'launch.log') + param( + [Parameter(Mandatory = $true)][string]$Message, + [ValidateSet('INFO', 'WARN', 'ERROR', 'VISION', 'PROXY', 'SERVER', 'LAUNCH')][string]$Level = 'INFO' + ) + + $logPath = Get-LocalLLMLaunchLogPath + $stamp = Get-Date -Format 'yyyy-MM-dd HH:mm:ss' + $line = "[$stamp] [$Level] $Message" + + try { + Add-Content -LiteralPath $logPath -Value $line -ErrorAction Stop + } catch { + Write-Verbose "Failed to write launch log: $_" + } +} + +function llmlog { + # Print the tail of ~/.local-llm/launch.log (launch debug trace). + param([int]$Lines = 80) + + $logPath = Get-LocalLLMLaunchLogPath + if (-not (Test-Path $logPath)) { + Write-Host "No launch log yet ($logPath does not exist)." -ForegroundColor DarkGray + return + } + + Write-Host "Tail of $logPath (last $Lines lines):" -ForegroundColor Cyan + Get-Content -LiteralPath $logPath -Tail $Lines +} + function llmlogerr { # Print the tail of ~/.local-llm/wizard-errors.log so a captured trace is easy to grab. param([int]$Lines = 80) @@ -2057,10 +2358,15 @@ function Start-LLMWizardSpectreExplicit { } function Start-LLMWizard { + [CmdletBinding()] + param( + [switch]$UseVision + ) + if (Test-LocalLLMWizardSpectreEnabled) { - Start-LLMWizardSpectre + Start-LLMWizardSpectre -UseVision:$UseVision return } - Start-LLMWizardClassic + Start-LLMWizardClassic -UseVision:$UseVision } diff --git a/local-llm/lib/99-entrypoints.ps1 b/local-llm/lib/99-entrypoints.ps1 index cbf7be9..3aa04c2 100644 --- a/local-llm/lib/99-entrypoints.ps1 +++ b/local-llm/lib/99-entrypoints.ps1 @@ -1,7 +1,7 @@ # Top-level command surface (one-liners that wrap into the wizard / reload). -function llm { Start-LLMWizard } -function llmmenu { Start-LLMWizard } +function llm { Start-LLMWizard @args } +function llmmenu { Start-LLMWizard @args } function llmc { Start-LLMWizardClassic } function llms { Start-LLMWizardSpectreExplicit } function reloadllm { Reload-LocalLLMConfig }