diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py index 0197ef5c..fb542e5d 100644 --- a/src/kimi_cli/llm.py +++ b/src/kimi_cli/llm.py @@ -20,6 +20,7 @@ "google_genai", # for backward-compatibility, equals to `gemini` "gemini", "vertexai", + "lm_studio", # LM Studio and similar local servers "_echo", "_chaos", ] @@ -125,6 +126,21 @@ def create_llm( base_url=provider.base_url, api_key=provider.api_key.get_secret_value(), ) + case "lm_studio": + import httpx + from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy + + # LM Studio requires explicit transport to avoid httpx default behavior issues + http_client = httpx.AsyncClient( + transport=httpx.AsyncHTTPTransport(), + timeout=httpx.Timeout(timeout=600.0, connect=10.0), + ) + chat_provider = OpenAILegacy( + model=model.model, + base_url=provider.base_url, + api_key=provider.api_key.get_secret_value(), + http_client=http_client, + ) case "openai_responses": from kosong.contrib.chat_provider.openai_responses import OpenAIResponses diff --git a/src/kimi_cli/ui/shell/setup.py b/src/kimi_cli/ui/shell/setup.py index 184f777f..f6b07278 100644 --- a/src/kimi_cli/ui/shell/setup.py +++ b/src/kimi_cli/ui/shell/setup.py @@ -16,10 +16,15 @@ load_config, save_config, ) +from kimi_cli.soul.kimisoul import RESERVED_TOKENS from kimi_cli.ui.shell.console import console from kimi_cli.ui.shell.slash import registry from kimi_cli.utils.aiohttp import new_client_session +# Minimum context size must be at least RESERVED_TOKENS +MIN_CONTEXT_SIZE = RESERVED_TOKENS +DEFAULT_LOCAL_CONTEXT_SIZE = 131072 # 128k for local models + if TYPE_CHECKING: from kimi_cli.ui.shell import Shell @@ -27,32 +32,41 @@ class _Platform(NamedTuple): id: str name: str - base_url: str + default_base_url: str search_url: str | None = None fetch_url: str | None = None allowed_prefixes: list[str] | None = None + provider_type: str = "kimi" + is_local: bool = False _PLATFORMS = [ _Platform( id="kimi-for-coding", name="Kimi for Coding", - base_url="https://api.kimi.com/coding/v1", + default_base_url="https://api.kimi.com/coding/v1", search_url="https://api.kimi.com/coding/v1/search", fetch_url="https://api.kimi.com/coding/v1/fetch", ), _Platform( id="moonshot-cn", name="Moonshot AI 开放平台 (moonshot.cn)", - base_url="https://api.moonshot.cn/v1", + default_base_url="https://api.moonshot.cn/v1", allowed_prefixes=["kimi-k2-"], ), _Platform( id="moonshot-ai", name="Moonshot AI Open Platform (moonshot.ai)", - base_url="https://api.moonshot.ai/v1", + default_base_url="https://api.moonshot.ai/v1", allowed_prefixes=["kimi-k2-"], ), + _Platform( + id="lm-studio", + name="LM Studio (Local)", + default_base_url="http://localhost:1234/v1", + provider_type="lm_studio", + is_local=True, + ), ] @@ -66,8 +80,8 @@ async def setup(app: Shell, args: list[str]): config = load_config() config.providers[result.platform.id] = LLMProvider( - type="kimi", - base_url=result.platform.base_url, + type=result.platform.provider_type, # type: ignore[arg-type] + base_url=result.base_url, api_key=result.api_key, ) config.models[result.model_id] = LLMModel( @@ -101,6 +115,7 @@ async def setup(app: Shell, args: list[str]): class _SetupResult(NamedTuple): platform: _Platform + base_url: str api_key: SecretStr model_id: str max_context_size: int @@ -118,13 +133,17 @@ async def _setup() -> _SetupResult | None: platform = next(platform for platform in _PLATFORMS if platform.name == platform_name) + # For local servers (LM Studio), use a different setup flow + if platform.is_local: + return await _setup_local(platform) + # enter the API key api_key = await _prompt_text("Enter your API key", is_password=True) if not api_key: return None # list models - models_url = f"{platform.base_url}/models" + models_url = f"{platform.default_base_url}/models" try: async with ( new_client_session() as session, @@ -168,12 +187,76 @@ async def _setup() -> _SetupResult | None: return _SetupResult( platform=platform, + base_url=platform.default_base_url, api_key=SecretStr(api_key), model_id=model_id, max_context_size=model["context_length"], ) +async def _setup_local(platform: _Platform) -> _SetupResult | None: + """Setup flow for local servers like LM Studio.""" + import httpx + + # Ask for base URL with default + base_url = await _prompt_text(f"Enter server URL (default: {platform.default_base_url})") + if not base_url: + base_url = platform.default_base_url + + # Try to get models from local server + console.print(f"[dim]Connecting to {base_url}...[/dim]") + try: + # Use explicit transport for LM Studio compatibility + transport = httpx.AsyncHTTPTransport() + async with httpx.AsyncClient(transport=transport, timeout=10.0) as client: + resp = await client.get(f"{base_url}/models") + resp.raise_for_status() + resp_json = resp.json() + except Exception as e: + console.print(f"[red]Failed to connect to {base_url}: {e}[/red]") + console.print("[yellow]Make sure LM Studio is running and a model is loaded.[/yellow]") + return None + + model_ids: list[str] = [model["id"] for model in resp_json.get("data", [])] + if not model_ids: + console.print("[red]No models found. Make sure a model is loaded in LM Studio.[/red]") + return None + + # Select model + model_id = await _prompt_choice( + header="Select the model", + choices=model_ids, + ) + if not model_id: + console.print("[red]No model selected[/red]") + return None + + # Ask for context size + context_size_str = await _prompt_text( + f"Enter max context size (default: {DEFAULT_LOCAL_CONTEXT_SIZE}, min: {MIN_CONTEXT_SIZE})" + ) + try: + max_context_size = int(context_size_str) if context_size_str else DEFAULT_LOCAL_CONTEXT_SIZE + except ValueError: + max_context_size = DEFAULT_LOCAL_CONTEXT_SIZE + + # Ensure context size meets minimum requirement + if max_context_size < MIN_CONTEXT_SIZE: + console.print( + f"[yellow]Context size {max_context_size} is below minimum ({MIN_CONTEXT_SIZE}). " + f"Using {MIN_CONTEXT_SIZE} instead.[/yellow]" + ) + max_context_size = MIN_CONTEXT_SIZE + + return _SetupResult( + platform=platform, + base_url=base_url, + api_key=SecretStr("lm-studio"), # LM Studio doesn't require API key + model_id=model_id, + max_context_size=max_context_size, + ) + + async def _prompt_choice(*, header: str, choices: list[str]) -> str | None: if not choices: return None