From 74c1e5b1a66cf3cf754d794f4818cad81e7d1f1b Mon Sep 17 00:00:00 2001
From: Dani <gonnotda@uoc.edu>
Date: Tue, 2 Jun 2026 09:17:18 +0200
Subject: [PATCH] Initial release

---
 README.md                  | 543 ++++++++++++++++++----
 config.example.json        |  35 +-
 gemini_web2api.py          | 893 ++++++++++++++++---------------------
 gemini_web2api/__main__.py |  35 +-
 gemini_web2api/config.py   |  18 +-
 gemini_web2api/gemini.py   | 117 ++---
 gemini_web2api/server.py   |  44 +-
 7 files changed, 971 insertions(+), 714 deletions(-)

diff --git a/README.md b/README.md
index 8f5a21e..8ba87e6 100644
--- a/README.md
+++ b/README.md
@@ -6,28 +6,144 @@
 
 [中文文档](README_CN.md)
 
-Convert Google Gemini's web interface into an OpenAI-compatible API. Zero authentication, zero cost, cross-platform.
+Convert Google Gemini's web interface into an OpenAI-compatible API. Multi-account load balancing, mandatory Bearer token authentication, cross-platform.
 
 ## Features
 
-- **Optional API Keys**: no auth when `api_keys` is empty, OpenAI-style Bearer auth when configured
+- **Multi-account**: Distribute requests across multiple Google accounts with automatic round-robin load balancing
+- **Mandatory Auth**: Every request requires a Bearer token — the server refuses to start without configured keys
 - **OpenAI Compatible**: Drop-in replacement for `/v1/chat/completions` and `/v1/models`
 - **Tool Calling**: Full function calling support (OpenAI format)
 - **Multiple Models**: Flash, Flash Thinking (20k+ char output), Pro, Auto, Lite
 - **Thinking Depth**: Adjustable via `@think=N` suffix (0=deepest, 4=shallowest)
 - **Web Search**: Built-in internet access (Gemini's native search)
-- **Cross-Platform**: Pure Python, no dependencies beyond stdlib
-- **Streaming**: SSE streaming support
+- **Streaming**: SSE streaming support via `httpx`
 - **Codex CLI**: Responses API (`/v1/responses`) for OpenAI Codex integration
 - **Gemini CLI**: Google native API (`/v1beta/models`) for Gemini CLI compatibility
+- **Localhost-only by default**: Binds to `127.0.0.1` out of the box for safety
 
-## Quick Start
+---
+
+## Quick Start (local)
+
+### 1. Install dependencies
+
+```bash
+pip install httpx
+```
+
+### 2. Generate a secret token
 
 ```bash
-python gemini_web2api.py
+python -c "import secrets; print(secrets.token_urlsafe(32))"
+# Example output: Xk3mP9qR2vLn8wJdFtYuCbHsAeZoGiNx4KlMpQrWvUy
 ```
 
-Server starts at `http://localhost:8081/v1`.
+### 3. Create `config.json`
+
+```json
+{
+  "port": 8081,
+  "host": "127.0.0.1",
+  "api_keys": ["YOUR_TOKEN_HERE"],
+  "accounts": [
+    {
+      "name": "account1",
+      "cookie_file": "cookies/account1.json",
+      "auth_user": null
+    }
+  ],
+  "default_model": "gemini-3.5-flash",
+  "log_requests": true
+}
+```
+
+### 4. Run
+
+```bash
+python gemini_web2api.py --config config.json
+# or
+python -m gemini_web2api --config config.json
+```
+
+The server will print the number of accounts loaded and API keys configured, then listen at `http://127.0.0.1:8081`.
+
+---
+
+## Authentication
+
+Every request **must** include a Bearer token. The server returns `401` without one.
+
+```bash
+# Header (preferred)
+Authorization: Bearer YOUR_TOKEN_HERE
+
+# Alternative header
+x-api-key: YOUR_TOKEN_HERE
+```
+
+The server **refuses to start** if `api_keys` is empty or missing. You can configure multiple keys (one per user/application):
+
+```json
+"api_keys": [
+  "token-for-cherry-studio",
+  "token-for-my-scripts",
+  "token-for-colleague"
+]
+```
+
+---
+
+## Multi-account Setup
+
+Add all your Google accounts to the `accounts` list. Requests are distributed in round-robin order.
+
+```json
+{
+  "api_keys": ["YOUR_TOKEN"],
+  "accounts": [
+    {
+      "name": "personal",
+      "cookie_file": "cookies/personal.json",
+      "auth_user": null,
+      "proxy": null
+    },
+    {
+      "name": "work",
+      "cookie_file": "cookies/work.json",
+      "auth_user": 1,
+      "proxy": null
+    },
+    {
+      "name": "via-proxy",
+      "cookie_file": "cookies/account3.json",
+      "auth_user": null,
+      "proxy": "http://127.0.0.1:7890"
+    }
+  ]
+}
+```
+
+Each account has its own `cookie_file`, `auth_user` index, and optional `proxy`. If you only have one account, you can still use the legacy flat fields (`cookie_file`, `auth_user`, `proxy`) at the top level.
+
+### How to obtain cookies
+
+1. Open Chrome and go to [gemini.google.com](https://gemini.google.com). Sign in with a Google account.
+2. Open DevTools (`F12`) → **Network** tab → click any request to `gemini.google.com`.
+3. In the **Headers** panel, find the `Cookie` request header and copy its full value.
+4. Find the `SAPISID` cookie specifically (it appears inside that string).
+5. Create a file like `cookies/account1.json`:
+
+```json
+{
+  "cookie": "SID=xxx; HSID=xxx; SSID=xxx; APISID=xxx; SAPISID=xxx; __Secure-1PSID=xxx",
+  "sapisid": "XXXXXXXX/YYYYYYYYYYYYYYYY"
+}
+```
+
+> **`auth_user`**: If the signed-in Gemini URL contains `/u/1/`, `/u/2/`, etc., set `auth_user` to that number. For the primary account it is `null`.
+
+---
 
 ## Client Configuration
 
@@ -35,16 +151,16 @@ Server starts at `http://localhost:8081/v1`.
 
 | Field | Value |
 |-------|-------|
-| Base URL | `http://localhost:8081/v1` |
-| API Key | any `api_keys` value from `config.json`; anything if not configured |
+| Base URL | `http://127.0.0.1:8081/v1` |
+| API Key | One of the tokens in `api_keys` |
 | Model | `gemini-3.5-flash-thinking` |
 
 ### curl
 
 ```bash
-curl http://localhost:8081/v1/chat/completions \
+curl http://127.0.0.1:8081/v1/chat/completions \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-your-key" \
+  -H "Authorization: Bearer YOUR_TOKEN_HERE" \
   -d '{"model":"gemini-3.5-flash","messages":[{"role":"user","content":"Hello!"}]}'
 ```
 
@@ -52,7 +168,7 @@ curl http://localhost:8081/v1/chat/completions \
 
 ```python
 from openai import OpenAI
-client = OpenAI(base_url="http://localhost:8081/v1", api_key="sk-your-key")
+client = OpenAI(base_url="http://127.0.0.1:8081/v1", api_key="YOUR_TOKEN_HERE")
 resp = client.chat.completions.create(
     model="gemini-3.5-flash-thinking",
     messages=[{"role": "user", "content": "Explain quantum computing"}]
@@ -63,15 +179,12 @@ print(resp.choices[0].message.content)
 ### Gemini CLI
 
 ```bash
-export GEMINI_API_KEY=none
-export GOOGLE_GEMINI_BASE_URL=http://localhost:8081
+export GEMINI_API_KEY=YOUR_TOKEN_HERE
+export GOOGLE_GEMINI_BASE_URL=http://127.0.0.1:8081
 gemini
 ```
 
-Supports Google native API endpoints:
-- `GET /v1beta/models` — list models
-- `POST /v1beta/models/{model}:generateContent` — non-streaming
-- `POST /v1beta/models/{model}:streamGenerateContent` — streaming (SSE)
+---
 
 ## Available Models
 
@@ -81,6 +194,7 @@ Supports Google native API endpoints:
 | `gemini-3.5-flash-thinking` | Deep thinking, longest output | **~20k chars** |
 | `gemini-3.5-flash-thinking-lite` | Adaptive thinking depth | ~15k chars |
 | `gemini-3.1-pro` | Pro (needs cookie for real routing) | ~12k chars |
+| `gemini-3.1-pro-enhanced` | Pro with enhanced output | ~12k chars |
 | `gemini-auto` | Auto model selection | varies |
 | `gemini-flash-lite` | Lightweight fast | ~10k chars |
 
@@ -91,127 +205,371 @@ Append `@think=N` to any model name:
 ```
 gemini-3.5-flash-thinking@think=0   # deepest (default)
 gemini-3.5-flash-thinking@think=2   # medium
-gemini-3.5-flash-thinking@think=4   # shallowest
+gemini-3.5-flash-thinking@think=4   # shallowest / fastest
 ```
 
-## Optional: Cookie for Pro
+---
+
+## Proxy
+
+If you cannot reach `gemini.google.com` directly, configure a proxy per-account or globally:
 
-Anonymous access works for all models, but `gemini-3.1-pro` routes to Flash without authentication. To get real Pro routing, provide a cookie file:
+```json
+{
+  "proxy": "http://127.0.0.1:7890",
+  "accounts": [
+    { "name": "account1", "cookie_file": "cookies/a1.json", "proxy": "http://127.0.0.1:7891" }
+  ]
+}
+```
+
+Account-level proxy takes precedence over the global one. Also works via environment variable:
 
 ```bash
-python gemini_web2api.py --cookie-file cookie.txt
+export HTTPS_PROXY=http://127.0.0.1:7890
 ```
 
-### How to get cookies
+---
+
+## Linux Server Deployment
+
+This section covers deploying gemini-web2api on a Linux server (Ubuntu/Debian) with automatic startup via **systemd**.
 
-1. Open Chrome, go to [gemini.google.com](https://gemini.google.com) and sign in with any free Google account
-2. Open DevTools (F12) → Application → Cookies → `https://gemini.google.com`
-3. Copy these cookie values: `SID`, `HSID`, `SSID`, `APISID`, `SAPISID`, `__Secure-1PSID`
-4. Create `cookie.txt` in this format:
+### Step 1 — Connect to your server and install Python
 
+```bash
+ssh user@your-server-ip
+sudo apt update && sudo apt install -y python3 python3-pip git
+pip3 install httpx
 ```
-SID=your_sid_value; HSID=your_hsid_value; SSID=your_ssid_value; APISID=your_apisid_value; SAPISID=your_sapisid_value; __Secure-1PSID=your_1psid_value
+
+### Step 2 — Create a dedicated user (recommended)
+
+Running the service as a non-root user limits the damage if something goes wrong.
+
+```bash
+sudo useradd -r -m -d /opt/gemini-web2api -s /bin/bash gemini
 ```
 
-Or use the JSON format:
-```json
-{"cookie": "SID=xxx; HSID=xxx; SSID=xxx; APISID=xxx; SAPISID=xxx; __Secure-1PSID=xxx", "sapisid": "your_sapisid_value"}
+### Step 3 — Upload the project files
+
+**Option A — Copy from your local machine:**
+
+```bash
+# Run this on your LOCAL machine
+scp gemini-web2api-multicuenta.zip user@your-server-ip:/tmp/
 ```
 
-**Alternative (browser extension)**: Use any "Export Cookies" extension to export cookies for `gemini.google.com` in Netscape format, then convert to the single-line format above.
+Then on the server:
 
-### Authenticated account path and XSRF token
+```bash
+sudo -u gemini bash -c "
+  cd /opt/gemini-web2api
+  unzip /tmp/gemini-web2api-multicuenta.zip
+  mv gemini-web2api-main/* .
+  rmdir gemini-web2api-main
+"
+```
 
-If the signed-in Gemini page URL contains an account index, such as:
+**Option B — Clone from GitHub (if you push the repo):**
 
+```bash
+sudo -u gemini git clone https://github.com/YOUR_USER/gemini-web2api.git /opt/gemini-web2api
 ```
-https://gemini.google.com/u/1/app/...
+
+### Step 4 — Create the cookies directory and cookie files
+
+```bash
+sudo -u gemini mkdir -p /opt/gemini-web2api/cookies
 ```
 
-set `auth_user` to that index. Authenticated web requests may also require the page XSRF token. In the rendered Gemini page source, this token is exposed as `SNlM0e`; pass it as `xsrf_token` in `config.json`. The server sends it as the `at` form field.
+Create each account's cookie file. Replace the placeholder values with the real cookies you copied from the browser:
+
+```bash
+sudo -u gemini nano /opt/gemini-web2api/cookies/account1.json
+```
 
-Example:
+Paste and save:
 
 ```json
 {
-  "cookie_file": "/app/cookie.txt",
-  "auth_user": "1",
-  "xsrf_token": "AOOh0P...",
-  "gemini_bl": "boq_assistant-bard-web-server_YYYYMMDD.xx_p0"
+  "cookie": "SID=xxx; HSID=xxx; SSID=xxx; APISID=xxx; SAPISID=xxx; __Secure-1PSID=xxx",
+  "sapisid": "XXXXXXXX/YYYYYYYYYYYYYYYY"
 }
 ```
 
-If authenticated requests return HTTP 400 with an `xsrf` error, refresh Gemini Web, update `xsrf_token`, and make sure `auth_user` matches the `/u/<index>/` part of the browser URL.
+Restrict permissions so only the `gemini` user can read them:
 
-No paid subscription needed — a free Google account is sufficient.
+```bash
+sudo chmod 600 /opt/gemini-web2api/cookies/*.json
+sudo chown gemini:gemini /opt/gemini-web2api/cookies/*.json
+```
+
+### Step 5 — Generate a secure token and create `config.json`
+
+```bash
+python3 -c "import secrets; print(secrets.token_urlsafe(32))"
+```
 
-## Configuration
+Copy the output, then create the config:
 
-Create `config.json` in the same directory:
+```bash
+sudo -u gemini nano /opt/gemini-web2api/config.json
+```
 
 ```json
 {
   "port": 8081,
-  "host": "0.0.0.0",
+  "host": "127.0.0.1",
+  "api_keys": ["PASTE_YOUR_TOKEN_HERE"],
+  "accounts": [
+    {
+      "name": "account1",
+      "cookie_file": "/opt/gemini-web2api/cookies/account1.json",
+      "auth_user": null,
+      "proxy": null
+    }
+  ],
+  "default_model": "gemini-3.5-flash",
   "retry_attempts": 3,
   "retry_delay_sec": 2,
   "request_timeout_sec": 180,
-  "gemini_bl": "boq_assistant-bard-web-server_20260525.09_p0",
-  "auth_user": null,
-  "xsrf_token": null,
-  "api_keys": ["sk-your-key"],
-  "cookie_file": null,
-  "proxy": null,
   "log_requests": true
 }
 ```
 
-When `api_keys` is `[]`, authentication is disabled. When one or more keys are set, `/v1/*` endpoints require `Authorization: Bearer <key>` or `x-api-key: <key>`.
+```bash
+sudo chmod 640 /opt/gemini-web2api/config.json
+sudo chown gemini:gemini /opt/gemini-web2api/config.json
+```
+
+### Step 6 — Test the server manually
 
-## Docker
+Before creating the service, confirm it starts correctly:
 
 ```bash
-cp config.example.json config.json
-docker build -t gemini-web2api .
-docker run -d --name gemini-web2api -p 8081:8081 -v ./config.json:/app/config.json gemini-web2api
+sudo -u gemini python3 /opt/gemini-web2api/gemini_web2api.py --config /opt/gemini-web2api/config.json
+```
+
+Expected output:
+
+```
+gemini-web2api v1.2.0
+  Listening: http://127.0.0.1:8081
+  Accounts:  1 (account1)
+  API keys:  1 configured
 ```
 
-Or use Docker Compose:
+Press `Ctrl+C` to stop, then continue to the next step.
+
+### Step 7 — Create a systemd service
 
 ```bash
-cp config.example.json config.json
-docker compose up -d
+sudo nano /etc/systemd/system/gemini-web2api.service
 ```
 
-To mount a cookie file:
+Paste the following:
+
+```ini
+[Unit]
+Description=gemini-web2api — Gemini Web to OpenAI API proxy
+Documentation=https://github.com/YOUR_USER/gemini-web2api
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=simple
+User=gemini
+Group=gemini
+WorkingDirectory=/opt/gemini-web2api
+ExecStart=/usr/bin/python3 /opt/gemini-web2api/gemini_web2api.py --config /opt/gemini-web2api/config.json
+Restart=on-failure
+RestartSec=10
+StandardOutput=journal
+StandardError=journal
+SyslogIdentifier=gemini-web2api
+
+# Security hardening
+NoNewPrivileges=true
+PrivateTmp=true
+ProtectSystem=strict
+ReadWritePaths=/opt/gemini-web2api
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### Step 8 — Enable and start the service
 
 ```bash
-docker run -d --name gemini-web2api -p 8081:8081 -v ./config.json:/app/config.json -v ./cookie.txt:/app/cookie.txt gemini-web2api
+# Reload systemd so it picks up the new file
+sudo systemctl daemon-reload
+
+# Enable the service to start automatically on boot
+sudo systemctl enable gemini-web2api
+
+# Start it now
+sudo systemctl start gemini-web2api
+
+# Verify it is running
+sudo systemctl status gemini-web2api
 ```
 
-Set `"cookie_file": "/app/cookie.txt"` in `config.json`.
+You should see `Active: active (running)`.
 
-## Proxy
+### Step 9 — Check the logs
+
+```bash
+# Follow live logs
+sudo journalctl -u gemini-web2api -f
 
-If you cannot access `gemini.google.com` directly (connection timeout), configure a proxy:
+# Last 50 lines
+sudo journalctl -u gemini-web2api -n 50
+
+# Logs since last boot
+sudo journalctl -u gemini-web2api -b
+```
+
+### Step 10 — Test the API from the server
 
-**Method 1: CLI argument**
 ```bash
-python gemini_web2api.py --proxy http://127.0.0.1:7890
+curl http://127.0.0.1:8081/v1/models \
+  -H "Authorization: Bearer YOUR_TOKEN_HERE"
 ```
 
-**Method 2: config.json**
-```json
-{"proxy": "http://127.0.0.1:7890"}
+You should receive a JSON list of available models.
+
+---
+
+## Exposing the API over HTTPS (optional but recommended)
+
+If you want to use the API from outside the server, **do not expose port 8081 directly**. Instead, put it behind a reverse proxy with TLS. Below is a minimal nginx + Certbot setup.
+
+### Install nginx and Certbot
+
+```bash
+sudo apt install -y nginx certbot python3-certbot-nginx
 ```
 
-**Method 3: Environment variable** (auto-detected)
+### Create the nginx site config
+
 ```bash
-export HTTPS_PROXY=http://127.0.0.1:7890
-python gemini_web2api.py
+sudo nano /etc/nginx/sites-available/gemini-api
+```
+
+```nginx
+server {
+    listen 80;
+    server_name api.yourdomain.com;
+
+    location / {
+        proxy_pass http://127.0.0.1:8081;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+
+        # Required for SSE streaming
+        proxy_buffering off;
+        proxy_cache off;
+        proxy_read_timeout 300s;
+        chunked_transfer_encoding on;
+    }
+}
 ```
 
-Works with Clash, V2Ray, Shadowsocks, or any HTTP proxy.
+```bash
+sudo ln -s /etc/nginx/sites-available/gemini-api /etc/nginx/sites-enabled/
+sudo nginx -t && sudo systemctl reload nginx
+```
+
+### Get a free TLS certificate
+
+```bash
+sudo certbot --nginx -d api.yourdomain.com
+```
+
+Certbot will automatically edit your nginx config to add HTTPS and renew the certificate. After this your API is accessible at `https://api.yourdomain.com/v1`.
+
+In your client, update the base URL to `https://api.yourdomain.com/v1`.
+
+---
+
+## Docker
+
+```bash
+cp config.example.json config.json
+# Edit config.json with your tokens and cookie paths
+docker build -t gemini-web2api .
+docker run -d \
+  --name gemini-web2api \
+  -p 127.0.0.1:8081:8081 \
+  -v ./config.json:/app/config.json \
+  -v ./cookies:/app/cookies \
+  gemini-web2api
+```
+
+Or with Docker Compose:
+
+```bash
+docker compose up -d
+```
+
+---
+
+## Service Management (quick reference)
+
+```bash
+# Start / stop / restart
+sudo systemctl start gemini-web2api
+sudo systemctl stop gemini-web2api
+sudo systemctl restart gemini-web2api
+
+# Enable / disable autostart on boot
+sudo systemctl enable gemini-web2api
+sudo systemctl disable gemini-web2api
+
+# Current status
+sudo systemctl status gemini-web2api
+
+# Live logs
+sudo journalctl -u gemini-web2api -f
+
+# Update cookies without full restart
+# (just edit the cookie file — it is reloaded on next request automatically)
+
+# Apply a config.json change
+sudo systemctl restart gemini-web2api
+```
+
+---
+
+## Configuration Reference
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `port` | `8081` | Port to listen on |
+| `host` | `127.0.0.1` | Bind address. Use `0.0.0.0` only behind a reverse proxy |
+| `api_keys` | `[]` | **Required.** List of accepted Bearer tokens. Server won't start if empty |
+| `accounts` | `[]` | List of Google account objects (see below). Falls back to legacy flat fields |
+| `default_model` | `gemini-3.5-flash` | Model used when none is specified |
+| `retry_attempts` | `3` | Number of retries on upstream error |
+| `retry_delay_sec` | `2` | Seconds between retries |
+| `request_timeout_sec` | `180` | HTTP timeout for Gemini upstream requests |
+| `gemini_bl` | *(built-in)* | Gemini build label. Update if requests start failing |
+| `log_requests` | `true` | Log requests and errors to stderr |
+| `proxy` | `null` | Global HTTP proxy. Overridden per-account |
+
+### Account object fields
+
+| Key | Description |
+|-----|-------------|
+| `name` | Label shown in logs |
+| `cookie_file` | Path to the JSON cookie file for this account |
+| `auth_user` | Account index from the Gemini URL (`/u/N/`). `null` for primary account |
+| `proxy` | Per-account proxy, overrides global `proxy` |
+| `xsrf_token` | XSRF token (`SNlM0e` field in page source). Required if requests return HTTP 400 |
+
+---
 
 ## Tool Calling
 
@@ -224,34 +582,43 @@ resp = client.chat.completions.create(
         "function": {
             "name": "get_weather",
             "description": "Get weather for a city",
-            "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}
+            "parameters": {
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+                "required": ["city"]
+            }
         }
     }]
 )
 ```
 
+---
+
 ## Limitations
 
-- **No image/multimodal input**: Gemini's image upload requires a proprietary streaming RPC protocol (WIZ/ProcessFile) that cannot be replicated in a standard HTTP proxy. Image inputs in messages will be ignored with a note.
-- **Not real Pro/Ultra**: Without a paid subscription cookie, `gemini-3.1-pro` routes to the same Flash model. The "Pro" label is a UI preference, not a backend model switch.
-- **Single-turn only**: Each request is an independent conversation. Multi-turn context is simulated by including previous messages in the prompt.
-- **Rate limits**: Google may throttle high-frequency requests. The server retries automatically but sustained heavy use may be blocked.
+- **No image/multimodal input**: Image inputs in messages will be ignored with a note.
+- **Not real Pro/Ultra**: Without a paid subscription cookie, `gemini-3.1-pro` routes to Flash.
+- **Single-turn only**: Multi-turn context is simulated by including previous messages in the prompt.
+- **Rate limits**: Google may throttle high-frequency requests. The server retries automatically.
+- **Cookie expiry**: Google session cookies expire periodically (typically every few weeks). Update them by repeating the export steps and restarting the service.
+
+---
 
 ## Requirements
 
 - Python 3.8+
-- No external dependencies (stdlib only)
-- Network access to `gemini.google.com` (proxy/VPN may be needed in some regions)
+- `httpx` (optional but recommended for true streaming; install with `pip install httpx`)
+- Network access to `gemini.google.com`
+
+---
 
 ## How It Works
 
-This tool reverse-engineers Google Gemini's web StreamGenerate protocol. It sends requests to the same endpoint that the Gemini web app uses, converting between OpenAI's API format and Gemini's internal protobuf-like format.
+This tool reverse-engineers Google Gemini's web StreamGenerate protocol. It converts between OpenAI's API format and Gemini's internal protobuf-like format, sending requests to the same endpoint the Gemini web app uses.
 
-The model selection is controlled by field `[79]` in the request payload, mapped from Gemini's frontend JavaScript source (`MODE_CATEGORY` enum).
+Model selection is controlled by field `[79]` in the request payload, mapped from Gemini's frontend JavaScript source (`MODE_CATEGORY` enum). Multi-account load balancing distributes requests in round-robin order across all configured cookie sessions.
 
-## Acknowledgments
-
-- Inspired by the open-source API proxy ecosystem
+---
 
 ## License
 
@@ -259,9 +626,9 @@ MIT
 
 ---
 
-## 致谢
+## Acknowledgments
 
-本项目的开发 agent 能力由 [GenericAgent](https://github.com/lsdefine/GenericAgent) 提供。
+- Inspired by the open-source API proxy ecosystem
 
 ### 🚩 友情链接
 
diff --git a/config.example.json b/config.example.json
index 93df330..04a8f15 100644
--- a/config.example.json
+++ b/config.example.json
@@ -1,17 +1,38 @@
 {
   "port": 8081,
-  "host": "0.0.0.0",
+  "host": "127.0.0.1",
   "retry_attempts": 3,
   "retry_delay_sec": 2,
   "request_timeout_sec": 180,
   "gemini_bl": "boq_assistant-bard-web-server_20260525.09_p0",
-  "auth_user": null,
-  "xsrf_token": null,
   "default_model": "gemini-3.5-flash",
+  "log_requests": true,
+
+  "_comment_api_keys": "Required. Generate with: python -c \"import secrets; print(secrets.token_urlsafe(32))\"",
   "api_keys": [
-    "sk-gemini"
+    "REPLACE_ME_WITH_A_SECURE_TOKEN"
   ],
-  "cookie_file": null,
-  "proxy": null,
-  "log_requests": true
+
+  "_comment_accounts": "Multi-account list. Each entry is one Google account.",
+  "accounts": [
+    {
+      "name": "account1",
+      "cookie_file": "cookies/account1.json",
+      "auth_user": null,
+      "proxy": null,
+      "xsrf_token": null
+    },
+    {
+      "name": "account2",
+      "cookie_file": "cookies/account2.json",
+      "auth_user": 1,
+      "proxy": null,
+      "xsrf_token": null
+    }
+  ],
+
+  "_comment_single_account": "If you only have one account, remove 'accounts' and use these legacy fields instead:",
+  "_legacy_cookie_file": "cookies/my_account.json",
+  "_legacy_auth_user": null,
+  "_legacy_proxy": null
 }
diff --git a/gemini_web2api.py b/gemini_web2api.py
index 7b1903d..4c3f14d 100644
--- a/gemini_web2api.py
+++ b/gemini_web2api.py
@@ -1,24 +1,19 @@
 #!/usr/bin/env python3
 """
-gemini-web2api - Gemini Web to OpenAI API proxy.
-
-Converts Google Gemini's web interface into an OpenAI-compatible API server.
-Zero authentication required. Works on any platform (Windows/macOS/Linux).
+gemini-web2api - Gemini Web to OpenAI API proxy (multi-account + auth).
 
 Usage:
     pip install httpx
     python gemini_web2api.py [--port 8081] [--config config.json]
 
-Client configuration (Cherry Studio, ChatBox, etc.):
-    Base URL: http://localhost:8081/v1
-    API Key: (anything or empty)
+Authentication (REQUIRED):
+    Every request must include a Bearer token:
+        Authorization: Bearer <your-token>
+    Configure tokens in config.json under "api_keys".
 
-How it works:
-    Sends requests directly to Gemini's public StreamGenerate endpoint.
-    The backend does not verify authentication for basic text generation.
-    Model selection via MODE_CATEGORY field [79] in the request payload.
-    This is NOT a user-tier spoofing attack - the endpoint simply doesn't
-    require auth for anonymous access.
+Multi-account:
+    Add an "accounts" list in config.json. Requests are distributed
+    round-robin across all configured accounts.
 """
 import json
 import urllib.request
@@ -32,6 +27,7 @@
 import hashlib
 import argparse
 import base64
+import threading
 from http.server import HTTPServer, BaseHTTPRequestHandler
 from socketserver import ThreadingMixIn
 
@@ -41,114 +37,157 @@
 except ImportError:
     HAS_HTTPX = False
 
-__version__ = "1.1.0"
+__version__ = "1.2.0"
 
-# ─── Configuration ───────────────────────────────────────────────────────────
+# ─── Configuration ────────────────────────────────────────────────────────────
 
 DEFAULT_CONFIG = {
     "port": 8081,
-    "host": "0.0.0.0",
+    "host": "127.0.0.1",           # localhost-only by default
     "retry_attempts": 3,
     "retry_delay_sec": 2,
     "request_timeout_sec": 180,
     "gemini_bl": "boq_assistant-bard-web-server_20260525.09_p0",
-    "auth_user": None,
-    "xsrf_token": None,
     "default_model": "gemini-3.5-flash",
     "log_requests": True,
+    # ── Security ──────────────────────────────────────────────────────────────
+    # Required. Generate: python -c "import secrets; print(secrets.token_urlsafe(32))"
+    "api_keys": [],
+    # ── Legacy single-account (still supported) ───────────────────────────────
     "cookie_file": None,
+    "auth_user": None,
     "proxy": None,
+    "xsrf_token": None,
+    # ── Multi-account list (takes precedence over legacy fields) ──────────────
+    "accounts": [],
 }
 
 CONFIG = dict(DEFAULT_CONFIG)
 
-# ─── Models ──────────────────────────────────────────────────────────────────
-# Mapping from JS source: MODE_CATEGORY enum (028-6eb337387583.js)
-#   1=FAST, 2=THINKING, 3=PRO, 4=AUTO, 5=FAST_DYNAMIC_THINKING, 6=FLASH_LITE
+# ─── Models ───────────────────────────────────────────────────────────────────
 
 MODELS = {
-    "gemini-3.5-flash": {
-        "mode": 1, "think": 4,
-        "desc": "Fast general-purpose model",
-    },
-    "gemini-3.5-flash-thinking": {
-        "mode": 2, "think": 0,
-        "desc": "Deep thinking mode, longest output (~20k chars)",
-    },
-    "gemini-3.1-pro": {
-        "mode": 3, "think": 4,
-        "desc": "Pro model (requires cookie for real routing)",
-    },
-    "gemini-auto": {
-        "mode": 4, "think": 4,
-        "desc": "Auto model selection",
-    },
-    "gemini-3.5-flash-thinking-lite": {
-        "mode": 5, "think": 0,
-        "desc": "Dynamic thinking with adaptive depth",
-    },
-    "gemini-flash-lite": {
-        "mode": 6, "think": 4,
-        "desc": "Lightweight fast model",
-    },
+    "gemini-3.5-flash":              {"mode": 1, "think": 4, "desc": "Fast general-purpose model"},
+    "gemini-3.5-flash-thinking":     {"mode": 2, "think": 0, "desc": "Deep thinking mode"},
+    "gemini-3.1-pro":                {"mode": 3, "think": 4, "desc": "Pro model"},
+    "gemini-3.1-pro-enhanced":       {"mode": 3, "think": 4, "extra": {31: 2, 80: 3}, "desc": "Pro enhanced"},
+    "gemini-auto":                   {"mode": 4, "think": 4, "desc": "Auto model selection"},
+    "gemini-3.5-flash-thinking-lite":{"mode": 5, "think": 0, "desc": "Dynamic thinking"},
+    "gemini-flash-lite":             {"mode": 6, "think": 4, "desc": "Lightweight fast model"},
 }
 
-# ─── Utilities ───────────────────────────────────────────────────────────────
+# ─── Logging ──────────────────────────────────────────────────────────────────
 
 def log(msg: str):
     if CONFIG["log_requests"]:
         sys.stderr.write(f"[{time.strftime('%H:%M:%S')}] {msg}\n")
         sys.stderr.flush()
 
+# ─── Multi-account pool ───────────────────────────────────────────────────────
 
-def load_cookie() -> tuple:
-    """Load cookie from file. Returns (cookie_str, sapisid)."""
-    cookie_file = CONFIG.get("cookie_file")
-    if not cookie_file:
-        return "", None
-    if not os.path.exists(cookie_file):
-        return "", None
-    try:
-        with open(cookie_file, "r") as f:
-            content = f.read().strip()
-        if content.startswith("{"):
-            data = json.loads(content)
-            cookie_str = data.get("cookie", "")
-            sapisid = data.get("sapisid", "")
+class Account:
+    def __init__(self, cfg: dict, index: int):
+        self.name        = cfg.get("name") or f"account-{index}"
+        self.cookie_file = cfg.get("cookie_file", "")
+        self.auth_user   = cfg.get("auth_user", None)
+        self.proxy       = cfg.get("proxy", None)
+        self.xsrf_token  = cfg.get("xsrf_token", None)
+        self._cache      = {"str": "", "sapisid": None, "mtime": 0}
+
+    def load_cookie(self):
+        if not self.cookie_file or not os.path.exists(self.cookie_file):
+            return "", None
+        try:
+            mtime = os.path.getmtime(self.cookie_file)
+            if mtime == self._cache["mtime"] and self._cache["str"]:
+                return self._cache["str"], self._cache["sapisid"]
+            with open(self.cookie_file) as f:
+                content = f.read().strip()
+            if content.startswith("{"):
+                data = json.loads(content)
+                cookie_str = data.get("cookie", "")
+                sapisid = data.get("sapisid", "")
+            else:
+                cookie_str = content
+                pairs = dict(p.split("=", 1) for p in cookie_str.split("; ") if "=" in p)
+                sapisid = pairs.get("SAPISID", "")
+            self._cache.update({"str": cookie_str, "sapisid": sapisid or None, "mtime": mtime})
+            return cookie_str, sapisid if sapisid else None
+        except Exception as e:
+            log(f"Cookie load error ({self.name}): {e}")
+            return self._cache["str"], self._cache["sapisid"]
+
+    def prefix(self):
+        return f"/u/{self.auth_user}" if self.auth_user not in (None, "") else ""
+
+
+class AccountPool:
+    def __init__(self):
+        self._accounts = []
+        self._index = 0
+        self._lock = threading.Lock()
+
+    def load(self, config: dict):
+        accounts_cfg = config.get("accounts")
+        if accounts_cfg:
+            self._accounts = [Account(a, i) for i, a in enumerate(accounts_cfg)]
         else:
-            cookie_str = content
-            pairs = dict(p.split("=", 1) for p in cookie_str.split("; ") if "=" in p)
-            sapisid = pairs.get("SAPISID", "")
-        return cookie_str, sapisid if sapisid else None
-    except Exception as e:
-        log(f"Cookie load error: {e}")
-        return "", None
+            self._accounts = [Account({
+                "name": "default",
+                "cookie_file": config.get("cookie_file", ""),
+                "auth_user":   config.get("auth_user", None),
+                "proxy":       config.get("proxy", None),
+                "xsrf_token":  config.get("xsrf_token", None),
+            }, 0)]
+
+    def next(self) -> Account:
+        with self._lock:
+            if not self._accounts:
+                return Account({}, 0)
+            acc = self._accounts[self._index % len(self._accounts)]
+            self._index += 1
+            return acc
+
+    def count(self): return len(self._accounts)
+    def names(self):  return [a.name for a in self._accounts]
+
+
+POOL = AccountPool()
 
+# ─── Gemini protocol ──────────────────────────────────────────────────────────
 
-def make_sapisidhash(sapisid: str) -> str:
+def _make_sapisidhash(sapisid: str) -> str:
     ts = int(time.time())
     h = hashlib.sha1(f"{ts} {sapisid} https://gemini.google.com".encode()).hexdigest()
     return f"SAPISIDHASH {ts}_{h}"
 
 
-def account_prefix() -> str:
-    """Return the Gemini account path prefix for non-default Google accounts."""
-    auth_user = CONFIG.get("auth_user")
-    if auth_user is None or auth_user == "":
-        return ""
-    return f"/u/{auth_user}"
-
+def _build_headers(acc: Account) -> dict:
+    prefix = acc.prefix()
+    hdrs = {
+        "Content-Type": "application/x-www-form-urlencoded",
+        "Origin": "https://gemini.google.com",
+        "Referer": f"https://gemini.google.com{prefix}/app",
+        "X-Same-Domain": "1",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+    }
+    if prefix:
+        hdrs["X-Goog-AuthUser"] = str(acc.auth_user)
+    cookie_str, sapisid = acc.load_cookie()
+    if cookie_str:
+        hdrs["Cookie"] = cookie_str
+    if sapisid:
+        hdrs["Authorization"] = _make_sapisidhash(sapisid)
+    return hdrs
 
-# ─── Gemini Protocol ─────────────────────────────────────────────────────────
 
-def gemini_stream_generate(prompt: str, model_id: int, think_mode: int) -> str:
-    """Send prompt to Gemini StreamGenerate with retry."""
-    inner = [None] * 80
-    inner[0] = [prompt, 0, None, None, None, None, 0]
-    inner[1] = ["en"]
-    inner[2] = ["", "", "", None, None, None, None, None, None, ""]
-    inner[6] = [0]
-    inner[7] = 1
+def _build_payload(prompt: str, model_id: int, think_mode: int, acc: Account) -> str:
+    inner = [None] * 102
+    inner[0]  = [prompt, 0, None, None, None, None, 0]
+    inner[1]  = ["en"]
+    inner[2]  = ["", "", "", None, None, None, None, None, None, ""]
+    inner[6]  = [0]
+    inner[7]  = 1
     inner[10] = 1
     inner[11] = 0
     inner[17] = [[think_mode]]
@@ -161,189 +200,130 @@ def gemini_stream_generate(prompt: str, model_id: int, think_mode: int) -> str:
     inner[61] = []
     inner[68] = 1
     inner[79] = model_id
-
     outer = [None, json.dumps(inner)]
     params = {"f.req": json.dumps(outer)}
-    if CONFIG.get("xsrf_token"):
-        params["at"] = CONFIG["xsrf_token"]
-    body = urllib.parse.urlencode(params).encode()
+    xsrf = acc.xsrf_token or CONFIG.get("xsrf_token")
+    if xsrf:
+        params["at"] = xsrf
+    return urllib.parse.urlencode(params)
+
+
+def _get_url(acc: Account) -> str:
     reqid = int(time.time()) % 1000000
-    prefix = account_prefix()
-    url = (
+    prefix = acc.prefix()
+    return (
         f"https://gemini.google.com{prefix}/_/BardChatUi/data/"
         "assistant.lamda.BardFrontendService/StreamGenerate"
         f"?bl={CONFIG['gemini_bl']}&hl=en&_reqid={reqid}&rt=c"
     )
-    headers = {
-        "Content-Type": "application/x-www-form-urlencoded",
-        "Origin": "https://gemini.google.com",
-        "Referer": f"https://gemini.google.com{prefix}/app",
-        "X-Same-Domain": "1",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
-    }
-    if prefix:
-        headers["X-Goog-AuthUser"] = str(CONFIG["auth_user"])
 
-    cookie_str, sapisid = load_cookie()
-    if cookie_str:
-        headers["Cookie"] = cookie_str
-    if sapisid:
-        headers["Authorization"] = make_sapisidhash(sapisid)
 
+def _clean(text: str) -> str:
+    text = re.sub(
+        r'```(?:python|javascript|text)\?code_(?:reference|stdout)&code_event_index=\d+\n.*?```\n?',
+        '', text, flags=re.DOTALL)
+    text = re.sub(r'http://googleusercontent\.com/card_content/\d+\n?', '', text)
+    return text.strip()
+
+
+def _texts_from_line(line: str) -> list:
+    if '"wrb.fr"' not in line or len(line) < 200:
+        return []
+    try:
+        arr = json.loads(line)
+        inner_str = arr[0][2]
+        if not inner_str or len(inner_str) < 50:
+            return []
+        inner = json.loads(inner_str)
+        if not (isinstance(inner, list) and len(inner) > 4 and inner[4]):
+            return []
+        texts = []
+        for part in inner[4]:
+            if isinstance(part, list) and len(part) > 1 and part[1] and isinstance(part[1], list):
+                for t in part[1]:
+                    if isinstance(t, str) and t:
+                        texts.append(t)
+        return texts
+    except (json.JSONDecodeError, IndexError, TypeError):
+        return []
+
+
+def _extract_text(raw: str) -> str:
+    last = ""
+    for line in raw.split("\n"):
+        for t in _texts_from_line(line):
+            if len(t) > len(last):
+                last = t
+    return _clean(last)
+
+
+def gemini_generate(prompt: str, model_id: int, think_mode: int) -> str:
+    acc = POOL.next()
+    log(f"generate: account={acc.name}")
+    body  = _build_payload(prompt, model_id, think_mode, acc).encode()
+    url   = _get_url(acc)
+    hdrs  = _build_headers(acc)
+    ctx   = ssl.create_default_context()
+    proxy = acc.proxy or CONFIG.get("proxy")
     last_err = None
     for attempt in range(CONFIG["retry_attempts"]):
         try:
-            req = urllib.request.Request(url, data=body, headers=headers, method="POST")
-            ctx = ssl.create_default_context()
-            proxy = CONFIG.get("proxy")
+            req = urllib.request.Request(url, data=body, headers=hdrs, method="POST")
             if proxy:
                 opener = urllib.request.build_opener(
                     urllib.request.ProxyHandler({"http": proxy, "https": proxy}),
-                    urllib.request.HTTPSHandler(context=ctx)
-                )
+                    urllib.request.HTTPSHandler(context=ctx))
                 resp = opener.open(req, timeout=CONFIG["request_timeout_sec"])
             else:
                 resp = urllib.request.urlopen(req, context=ctx, timeout=CONFIG["request_timeout_sec"])
-            return resp.read().decode("utf-8", errors="replace")
+            return _extract_text(resp.read().decode("utf-8", errors="replace"))
         except Exception as e:
             last_err = e
             if attempt < CONFIG["retry_attempts"] - 1:
-                log(f"Retry {attempt+1}/{CONFIG['retry_attempts']}: {e}")
+                log(f"Retry {attempt+1}/{CONFIG['retry_attempts']} (account={acc.name}): {e}")
                 time.sleep(CONFIG["retry_delay_sec"])
     raise last_err
 
 
-def gemini_stream_generate_iter(prompt: str, model_id: int, think_mode: int):
-    """Send prompt and yield incremental text deltas using httpx streaming."""
-    inner = [None] * 80
-    inner[0] = [prompt, 0, None, None, None, None, 0]
-    inner[1] = ["en"]
-    inner[2] = ["", "", "", None, None, None, None, None, None, ""]
-    inner[6] = [0]
-    inner[7] = 1
-    inner[10] = 1
-    inner[11] = 0
-    inner[17] = [[think_mode]]
-    inner[18] = 0
-    inner[27] = 1
-    inner[30] = [4]
-    inner[41] = [2]
-    inner[53] = 0
-    inner[59] = str(uuid.uuid4())
-    inner[61] = []
-    inner[68] = 1
-    inner[79] = model_id
-
-    outer = [None, json.dumps(inner)]
-    params = {"f.req": json.dumps(outer)}
-    if CONFIG.get("xsrf_token"):
-        params["at"] = CONFIG["xsrf_token"]
-    body = urllib.parse.urlencode(params)
-    reqid = int(time.time()) % 1000000
-    prefix = account_prefix()
-    url = (
-        f"https://gemini.google.com{prefix}/_/BardChatUi/data/"
-        "assistant.lamda.BardFrontendService/StreamGenerate"
-        f"?bl={CONFIG['gemini_bl']}&hl=en&_reqid={reqid}&rt=c"
-    )
-    headers = {
-        "Content-Type": "application/x-www-form-urlencoded",
-        "Origin": "https://gemini.google.com",
-        "Referer": f"https://gemini.google.com{prefix}/app",
-        "X-Same-Domain": "1",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
-    }
-    if prefix:
-        headers["X-Goog-AuthUser"] = str(CONFIG["auth_user"])
-    cookie_str, sapisid = load_cookie()
-    if cookie_str:
-        headers["Cookie"] = cookie_str
-    if sapisid:
-        headers["Authorization"] = make_sapisidhash(sapisid)
-
-    proxy = CONFIG.get("proxy")
-
+def gemini_generate_stream(prompt: str, model_id: int, think_mode: int):
+    acc   = POOL.next()
+    log(f"generate_stream: account={acc.name}")
     if not HAS_HTTPX:
-        # Fallback: non-streaming with urllib
-        raw = gemini_stream_generate(prompt, model_id, think_mode)
-        text = extract_response_text(raw)
-        if text:
-            yield text
+        text = gemini_generate(prompt, model_id, think_mode)
+        if text: yield text
         return
-
-    prev_text = ""
+    body  = _build_payload(prompt, model_id, think_mode, acc)
+    url   = _get_url(acc)
+    hdrs  = _build_headers(acc)
+    proxy = acc.proxy or CONFIG.get("proxy")
     transport = httpx.HTTPTransport(proxy=proxy) if proxy else None
-    with httpx.Client(transport=transport, timeout=CONFIG["request_timeout_sec"], verify=True) as client:
-        with client.stream("POST", url, content=body, headers=headers) as resp:
-            buf = ""
-            for chunk in resp.iter_text():
-                buf += chunk
-                while "\n" in buf:
-                    line, buf = buf.split("\n", 1)
-                    if '"wrb.fr"' not in line or len(line) < 200:
-                        continue
-                    try:
-                        arr = json.loads(line)
-                        inner_str = arr[0][2]
-                        if not inner_str or len(inner_str) < 50:
-                            continue
-                        inner2 = json.loads(inner_str)
-                        if isinstance(inner2, list) and len(inner2) > 4 and inner2[4]:
-                            for part in inner2[4]:
-                                if isinstance(part, list) and len(part) > 1 and part[1] and isinstance(part[1], list):
-                                    for t in part[1]:
-                                        if isinstance(t, str) and len(t) > len(prev_text):
-                                            delta = t[len(prev_text):]
-                                            delta = clean_gemini_text(delta)
-                                            if delta:
-                                                yield delta
-                                            prev_text = t
-                    except (json.JSONDecodeError, IndexError, TypeError):
-                        pass
-
-
-def clean_gemini_text(text: str) -> str:
-    """Remove internal code execution artifacts."""
-    text = re.sub(
-        r'```(?:python|javascript|text)\?code_(?:reference|stdout)&code_event_index=\d+\n.*?```\n?',
-        '', text, flags=re.DOTALL
-    )
-    return text.strip()
-
-
-def extract_response_text(raw: str) -> str:
-    """Parse StreamGenerate response to extract final text."""
-    texts = []
-    for line in raw.split("\n"):
-        if '"wrb.fr"' not in line or len(line) < 200:
-            continue
+    client = httpx.Client(transport=transport, timeout=CONFIG["request_timeout_sec"], verify=True)
+    last_err = None
+    for attempt in range(CONFIG["retry_attempts"]):
         try:
-            arr = json.loads(line)
-            inner_str = arr[0][2]
-            if not inner_str or len(inner_str) < 50:
-                continue
-            inner = json.loads(inner_str)
-            if isinstance(inner, list) and len(inner) > 4 and inner[4]:
-                for part in inner[4]:
-                    if isinstance(part, list) and len(part) > 1 and part[1]:
-                        if isinstance(part[1], list):
-                            for t in part[1]:
-                                if isinstance(t, str) and len(t) > 0:
-                                    texts.append(t)
-        except (json.JSONDecodeError, IndexError, TypeError):
-            pass
-    text = ""
-    for t in reversed(texts):
-        if t.strip():
-            text = t
-            break
-    return clean_gemini_text(text)
-
+            prev = ""
+            with client.stream("POST", url, content=body, headers=hdrs) as resp:
+                buf = ""
+                for chunk in resp.iter_text():
+                    buf += chunk
+                    while "\n" in buf:
+                        line, buf = buf.split("\n", 1)
+                        for t in _texts_from_line(line):
+                            if len(t) > len(prev):
+                                delta = _clean(t[len(prev):])
+                                if delta: yield delta
+                                prev = t
+            return
+        except Exception as e:
+            last_err = e
+            if attempt < CONFIG["retry_attempts"] - 1:
+                log(f"Stream retry {attempt+1} (account={acc.name}): {e}")
+                time.sleep(CONFIG["retry_delay_sec"])
+    raise last_err
 
-# ─── OpenAI Format Helpers ───────────────────────────────────────────────────
+# ─── Prompt / tool helpers ────────────────────────────────────────────────────
 
 def messages_to_prompt(messages: list, tools: list = None) -> str:
-    """Convert OpenAI messages to prompt string."""
     parts = []
     if tools:
         tool_defs = []
@@ -359,17 +339,13 @@ def messages_to_prompt(messages: list, tools: list = None) -> str:
                 "[System instruction]: You have access to tools. "
                 "To call a tool, respond with:\n"
                 '```tool_call\n{"name": "func_name", "arguments": {...}}\n```\n'
-                "Only use tool_call blocks when needed.\n\n"
-                f"Available tools:\n{json.dumps(tool_defs, indent=2)}"
+                f"Only use tool_call blocks when needed.\n\nAvailable tools:\n{json.dumps(tool_defs, indent=2)}"
             )
     for msg in messages:
-        role = msg.get("role", "user")
+        role    = msg.get("role", "user")
         content = msg.get("content", "")
         if isinstance(content, list):
-            content = " ".join(
-                c.get("text", "") for c in content
-                if c.get("type") in ("text", "input_text")
-            )
+            content = " ".join(c.get("text", "") for c in content if c.get("type") in ("text", "input_text"))
         if role == "system":
             parts.append(f"[System instruction]: {content}")
         elif role == "assistant":
@@ -377,10 +353,7 @@ def messages_to_prompt(messages: list, tools: list = None) -> str:
                 tc_strs = []
                 for tc in msg["tool_calls"]:
                     fn = tc.get("function", {})
-                    tc_strs.append(
-                        f'```tool_call\n{{"name": "{fn.get("name")}", '
-                        f'"arguments": {fn.get("arguments", "{}")}}}\n```'
-                    )
+                    tc_strs.append(f'```tool_call\n{{"name": "{fn.get("name")}", "arguments": {fn.get("arguments", "{}")}}}\n```')
                 parts.append(f"[Assistant]: {content or ''}\n" + "\n".join(tc_strs))
             else:
                 parts.append(f"[Assistant]: {content}")
@@ -392,27 +365,23 @@ def messages_to_prompt(messages: list, tools: list = None) -> str:
 
 
 def parse_tool_calls(text: str) -> tuple:
-    """Extract tool_call blocks. Returns (clean_text, tool_calls_list)."""
     tool_calls = []
     pattern = r'```tool_call\s*\n(.*?)\n```'
-    for match in re.findall(pattern, text, re.DOTALL):
+    clean_parts, last_end = [], 0
+    for m in re.finditer(pattern, text, re.DOTALL):
+        clean_parts.append(text[last_end:m.start()])
+        last_end = m.end()
         try:
-            data = json.loads(match.strip())
-            tool_calls.append({
-                "id": f"call_{uuid.uuid4().hex[:8]}",
-                "type": "function",
-                "function": {
-                    "name": data["name"],
-                    "arguments": json.dumps(data.get("arguments", {}), ensure_ascii=False),
-                },
-            })
+            data = json.loads(m.group(1).strip())
+            tool_calls.append({"id": f"call_{uuid.uuid4().hex[:8]}", "type": "function",
+                                "function": {"name": data["name"],
+                                             "arguments": json.dumps(data.get("arguments", {}), ensure_ascii=False)}})
         except (json.JSONDecodeError, KeyError):
             pass
-    clean = re.sub(pattern, '', text, flags=re.DOTALL).strip()
-    return clean, tool_calls
+    clean_parts.append(text[last_end:])
+    return "".join(clean_parts).strip(), tool_calls
 
-
-# ─── HTTP Handler ────────────────────────────────────────────────────────────
+# ─── HTTP handler ─────────────────────────────────────────────────────────────
 
 class GeminiHandler(BaseHTTPRequestHandler):
     def log_message(self, fmt, *args):
@@ -427,6 +396,20 @@ def send_json(self, data, status=200):
         self.end_headers()
         self.wfile.write(body)
 
+    def _authorized(self) -> bool:
+        keys = CONFIG.get("api_keys") or []
+        if not keys:
+            return False   # deny all if no keys configured
+        auth  = self.headers.get("Authorization", "")
+        token = auth[7:] if auth.startswith("Bearer ") else self.headers.get("x-api-key", "")
+        return token in keys
+
+    def _require_auth(self) -> bool:
+        if not self._authorized():
+            self.send_json({"error": {"message": "Unauthorized. Provide a valid Bearer token.", "type": "auth_error"}}, 401)
+            return True
+        return False
+
     def do_OPTIONS(self):
         self.send_response(204)
         self.send_header("Access-Control-Allow-Origin", "*")
@@ -434,123 +417,105 @@ def do_OPTIONS(self):
         self.send_header("Access-Control-Allow-Headers", "*")
         self.end_headers()
 
+    def _resolve_model(self, model_name: str):
+        think_override = None
+        if "@think=" in model_name:
+            model_name, think_str = model_name.rsplit("@think=", 1)
+            try: think_override = int(think_str)
+            except ValueError: return None, None, None, f"Invalid think level: {think_str}"
+        cfg = MODELS.get(model_name)
+        if not cfg:
+            log(f"Unknown model '{model_name}', falling back to default")
+            model_name = CONFIG["default_model"]
+            cfg = MODELS[model_name]
+        return model_name, cfg["mode"], (think_override if think_override is not None else cfg["think"]), None
+
     def do_GET(self):
         try:
+            if self._require_auth(): return
             if self.path == "/v1/models":
                 self.send_json({"object": "list", "data": [
-                    {"id": n, "object": "model", "created": 1700000000,
-                     "owned_by": "google", "description": c["desc"]}
+                    {"id": n, "object": "model", "created": 1700000000, "owned_by": "google", "description": c["desc"]}
                     for n, c in MODELS.items()
                 ]})
             elif self.path.startswith("/v1beta/models"):
-                self._handle_google_models_list()
+                self.send_json({"models": [
+                    {"name": f"models/{n}", "displayName": n, "description": c["desc"],
+                     "supportedGenerationMethods": ["generateContent", "streamGenerateContent"]}
+                    for n, c in MODELS.items()
+                ]})
             elif self.path == "/":
-                self.send_json({"status": "ok", "version": __version__,
-                                "models": list(MODELS.keys())})
+                self.send_json({"status": "ok", "version": __version__, "models": list(MODELS.keys()),
+                                "accounts": POOL.count(), "account_names": POOL.names()})
             else:
                 self.send_json({"error": "not found"}, 404)
-        except (BrokenPipeError, ConnectionResetError):
-            pass
-        except Exception as e:
-            log(f"GET error: {e}")
+        except (BrokenPipeError, ConnectionResetError): pass
 
     def do_POST(self):
         try:
+            if self._require_auth(): return
             length = int(self.headers.get("Content-Length", 0))
-            body = self.rfile.read(length) if length else b""
+            body   = self.rfile.read(length) if length else b""
             if self.path == "/v1/chat/completions":
-                self.handle_chat(body)
+                self._handle_chat(body)
             elif self.path == "/v1/responses":
-                self.handle_responses(body)
+                self._handle_responses(body)
             elif ":generateContent" in self.path:
                 self._handle_google_generate(body, stream=False)
             elif ":streamGenerateContent" in self.path:
                 self._handle_google_generate(body, stream=True)
             else:
                 self.send_json({"error": "not found"}, 404)
-        except (BrokenPipeError, ConnectionResetError):
-            pass
+        except (BrokenPipeError, ConnectionResetError): pass
         except Exception as e:
             log(f"POST error: {e}")
-            try:
-                self.send_json({"error": {"message": str(e)}}, 500)
-            except:
-                pass
+            try: self.send_json({"error": {"message": str(e)}}, 500)
+            except: pass
 
-    def _resolve_model(self, model_name):
-        think_override = None
-        if "@think=" in model_name:
-            model_name, think_str = model_name.rsplit("@think=", 1)
-            think_override = int(think_str)
-        cfg = MODELS.get(model_name)
-        if not cfg:
-            return None, None, None, f"Unknown model: {model_name}"
-        return model_name, cfg["mode"], (think_override if think_override is not None else cfg["think"]), None
+    def _handle_chat(self, body: bytes):
+        try: req = json.loads(body)
+        except Exception: self.send_json({"error": {"message": "invalid JSON"}}, 400); return
 
-    def _call_gemini(self, prompt, model_id, think_mode, tools):
-        raw = gemini_stream_generate(prompt, model_id, think_mode)
-        text = extract_response_text(raw)
-        tool_calls = None
-        if tools and text:
-            text, tool_calls = parse_tool_calls(text)
-        return text or "", tool_calls
-
-    def handle_chat(self, body: bytes):
-        req = json.loads(body)
-        model_name, model_id, think_mode, err = self._resolve_model(
-            req.get("model", CONFIG["default_model"]))
-        if err:
-            self.send_json({"error": {"message": err}}, 400)
-            return
+        model_name, model_id, think_mode, err = self._resolve_model(req.get("model", CONFIG["default_model"]))
+        if err: self.send_json({"error": {"message": err}}, 400); return
 
-        tools = req.get("tools")
+        tools  = req.get("tools")
         prompt = messages_to_prompt(req.get("messages", []), tools)
-        if not prompt.strip():
-            self.send_json({"error": {"message": "empty prompt"}}, 400)
-            return
+        if not prompt.strip(): self.send_json({"error": {"message": "empty prompt"}}, 400); return
 
         stream = req.get("stream", False)
-        cid = f"chatcmpl-{uuid.uuid4().hex[:12]}"
+        cid    = f"chatcmpl-{uuid.uuid4().hex[:12]}"
 
         if stream and not tools:
-            # True streaming: forward chunks as they arrive
             try:
                 self.send_response(200)
                 self.send_header("Content-Type", "text/event-stream")
                 self.send_header("Cache-Control", "no-cache")
                 self.send_header("Access-Control-Allow-Origin", "*")
                 self.end_headers()
-                for delta_text in gemini_stream_generate_iter(prompt, model_id, think_mode):
+                for delta in gemini_generate_stream(prompt, model_id, think_mode):
                     chunk = {"id": cid, "object": "chat.completion.chunk", "created": int(time.time()),
-                             "model": model_name, "choices": [{"index": 0, "delta": {"content": delta_text}, "finish_reason": None}]}
+                             "model": model_name, "choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}]}
                     self.wfile.write(f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode())
                     self.wfile.flush()
-                # Final chunk
-                chunk = {"id": cid, "object": "chat.completion.chunk", "created": int(time.time()),
-                         "model": model_name, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
-                self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode())
-                self.wfile.write(b"data: [DONE]\n\n")
+                end = {"id": cid, "object": "chat.completion.chunk", "created": int(time.time()),
+                       "model": model_name, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
+                self.wfile.write(f"data: {json.dumps(end)}\n\ndata: [DONE]\n\n".encode())
                 self.wfile.flush()
-            except (BrokenPipeError, ConnectionResetError):
-                pass
-            except Exception as e:
-                log(f"Stream error: {e}")
+            except (BrokenPipeError, ConnectionResetError): pass
             return
 
-        # Non-streaming (or tool calling which needs full response)
-        try:
-            text, tool_calls = self._call_gemini(prompt, model_id, think_mode, tools)
-        except Exception as e:
-            self.send_json({"error": {"message": f"upstream error: {e}"}}, 502)
-            return
+        try: text = gemini_generate(prompt, model_id, think_mode)
+        except Exception as e: self.send_json({"error": {"message": f"upstream error: {e}"}}, 502); return
 
-        msg = {"role": "assistant", "content": text or None}
-        if tool_calls:
-            msg["tool_calls"] = tool_calls
+        tool_calls = None
+        if tools and text:
+            text, tool_calls = parse_tool_calls(text)
+        msg    = {"role": "assistant", "content": text or None}
+        if tool_calls: msg["tool_calls"] = tool_calls
         finish = "tool_calls" if tool_calls else "stop"
 
         if stream:
-            # Stream mode with tools: send as single chunk (need full parse for tool_calls)
             self.send_response(200)
             self.send_header("Content-Type", "text/event-stream")
             self.send_header("Cache-Control", "no-cache")
@@ -558,30 +523,24 @@ def handle_chat(self, body: bytes):
             self.end_headers()
             chunk = {"id": cid, "object": "chat.completion.chunk", "created": int(time.time()),
                      "model": model_name, "choices": [{"index": 0, "delta": msg, "finish_reason": finish}]}
-            self.wfile.write(f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode())
-            self.wfile.write(b"data: [DONE]\n\n")
+            self.wfile.write(f"data: {json.dumps(chunk, ensure_ascii=False)}\n\ndata: [DONE]\n\n".encode())
             self.wfile.flush()
         else:
-            self.send_json({
-                "id": cid, "object": "chat.completion", "created": int(time.time()),
-                "model": model_name,
-                "choices": [{"index": 0, "message": msg, "finish_reason": finish}],
-                "usage": {"prompt_tokens": len(prompt)//4, "completion_tokens": len(text)//4,
-                          "total_tokens": (len(prompt)+len(text))//4},
-            })
+            self.send_json({"id": cid, "object": "chat.completion", "created": int(time.time()),
+                            "model": model_name,
+                            "choices": [{"index": 0, "message": msg, "finish_reason": finish}],
+                            "usage": {"prompt_tokens": len(prompt)//4, "completion_tokens": len(text or "")//4,
+                                      "total_tokens": (len(prompt)+len(text or ""))//4}})
 
-    def handle_responses(self, body: bytes):
-        """OpenAI Responses API for Codex CLI compatibility."""
-        req = json.loads(body)
-        model_name, model_id, think_mode, err = self._resolve_model(
-            req.get("model", CONFIG["default_model"]))
-        if err:
-            self.send_json({"error": {"message": err}}, 400)
-            return
+    def _handle_responses(self, body: bytes):
+        try: req = json.loads(body)
+        except Exception: self.send_json({"error": {"message": "invalid JSON"}}, 400); return
+
+        model_name, model_id, think_mode, err = self._resolve_model(req.get("model", CONFIG["default_model"]))
+        if err: self.send_json({"error": {"message": err}}, 400); return
 
         input_items = req.get("input", [])
         tools = req.get("tools")
-
         messages = []
         if req.get("instructions"):
             messages.append({"role": "system", "content": req["instructions"]})
@@ -592,49 +551,22 @@ def handle_responses(self, body: bytes):
                 if isinstance(item, str):
                     messages.append({"role": "user", "content": item})
                 elif isinstance(item, dict):
-                    if item.get("type") == "function_call_output":
-                        messages.append({"role": "tool", "tool_call_id": item.get("call_id", ""),
-                                         "name": item.get("name", ""), "content": item.get("output", "")})
-                    elif item.get("role") == "assistant" or (item.get("type") == "message" and item.get("role") == "assistant"):
-                        cp = item.get("content", [])
-                        text_acc, tc_list = "", []
-                        if isinstance(cp, list):
-                            for c in cp:
-                                if isinstance(c, dict):
-                                    if c.get("type") == "output_text": text_acc += c.get("text", "")
-                                    elif c.get("type") == "function_call": tc_list.append(c)
-                        elif isinstance(cp, str):
-                            text_acc = cp
-                        m = {"role": "assistant", "content": text_acc or None}
-                        if tc_list:
-                            m["tool_calls"] = [{"id": tc.get("call_id", f"call_{i}"), "type": "function",
-                                                "function": {"name": tc.get("name",""), "arguments": tc.get("arguments","{}")}}
-                                               for i, tc in enumerate(tc_list)]
-                        messages.append(m)
-                    else:
-                        role = item.get("role", "user")
-                        content = item.get("content", "")
-                        if isinstance(content, list):
-                            content = " ".join(c.get("text", "") for c in content if c.get("type") in ("text", "input_text"))
-                        messages.append({"role": role, "content": content})
-
-        if tools:
-            tools = [{"type": "function", "function": {"name": t["name"], "description": t.get("description", ""), "parameters": t.get("parameters", {})}}
-                     if t.get("type") == "function" and "function" not in t else t for t in tools]
-
+                    role    = item.get("role", "user")
+                    content = item.get("content", "")
+                    if isinstance(content, list):
+                        content = " ".join(c.get("text","") for c in content if c.get("type") in ("text","input_text"))
+                    messages.append({"role": role, "content": content})
         prompt = messages_to_prompt(messages, tools)
-        if not prompt.strip():
-            self.send_json({"error": {"message": "empty input"}}, 400)
-            return
+        if not prompt.strip(): self.send_json({"error": {"message": "empty input"}}, 400); return
 
-        try:
-            text, tool_calls = self._call_gemini(prompt, model_id, think_mode, tools)
-        except Exception as e:
-            self.send_json({"error": {"message": f"upstream error: {e}"}}, 502)
-            return
+        try: text = gemini_generate(prompt, model_id, think_mode)
+        except Exception as e: self.send_json({"error": {"message": f"upstream error: {e}"}}, 502); return
+
+        tool_calls = None
+        if tools and text:
+            text, tool_calls = parse_tool_calls(text)
 
-        rid = f"resp_{uuid.uuid4().hex[:16]}"
-        mid = f"msg_{uuid.uuid4().hex[:12]}"
+        rid, mid = f"resp_{uuid.uuid4().hex[:16]}", f"msg_{uuid.uuid4().hex[:12]}"
         output = []
         if tool_calls:
             for tc in tool_calls:
@@ -643,130 +575,51 @@ def handle_responses(self, body: bytes):
         if text or not tool_calls:
             output.append({"type": "message", "id": mid, "role": "assistant", "status": "completed",
                            "content": [{"type": "output_text", "text": text or "", "annotations": []}]})
+        self.send_json({"id": rid, "object": "response", "created_at": int(time.time()), "status": "completed",
+                        "model": model_name, "output": output,
+                        "usage": {"input_tokens": len(prompt)//4, "output_tokens": len(text or "")//4,
+                                  "total_tokens": (len(prompt)+len(text or ""))//4}})
 
-        if req.get("stream"):
-            self.send_response(200)
-            self.send_header("Content-Type", "text/event-stream")
-            self.send_header("Cache-Control", "no-cache")
-            self.send_header("Access-Control-Allow-Origin", "*")
-            self.end_headers()
-            ev = {"type": "response.created", "response": {"id": rid, "object": "response", "status": "in_progress", "model": model_name, "output": []}}
-            self.wfile.write(f"event: response.created\ndata: {json.dumps(ev)}\n\n".encode())
-            for item in output:
-                if item["type"] == "function_call":
-                    ev = {"type": "response.function_call_arguments.done", "item_id": item["id"], "call_id": item["call_id"], "name": item["name"], "arguments": item["arguments"]}
-                    self.wfile.write(f"event: response.function_call_arguments.done\ndata: {json.dumps(ev)}\n\n".encode())
-                elif item["type"] == "message":
-                    for ci, cp in enumerate(item["content"]):
-                        ev = {"type": "response.output_text.done", "item_id": item["id"], "content_index": ci, "text": cp["text"]}
-                        self.wfile.write(f"event: response.output_text.done\ndata: {json.dumps(ev)}\n\n".encode())
-            resp_obj = {"id": rid, "object": "response", "status": "completed", "model": model_name, "output": output,
-                        "usage": {"input_tokens": len(prompt)//4, "output_tokens": len(text)//4, "total_tokens": (len(prompt)+len(text))//4}}
-            self.wfile.write(f"event: response.completed\ndata: {json.dumps({'type': 'response.completed', 'response': resp_obj})}\n\n".encode())
-            self.wfile.flush()
-        else:
-            self.send_json({"id": rid, "object": "response", "created_at": int(time.time()), "status": "completed",
-                            "model": model_name, "output": output,
-                            "usage": {"input_tokens": len(prompt)//4, "output_tokens": len(text)//4, "total_tokens": (len(prompt)+len(text))//4}})
-
-
-    # ─── Google Native API (Gemini CLI compatible) ────────────────────────────
-
-    def _parse_google_model_from_path(self):
-        """Extract model name from /v1beta/models/{model}:method path."""
+    def _handle_google_generate(self, body: bytes, stream: bool):
+        try: req = json.loads(body)
+        except Exception: self.send_json({"error": {"message": "invalid JSON"}}, 400); return
         m = re.match(r'/v1beta/models/([^:?]+)', self.path)
-        if m:
-            return m.group(1)
-        return None
-
-    def _handle_google_models_list(self):
-        """GET /v1beta/models — Google AI format model list."""
-        models = []
-        for name, cfg in MODELS.items():
-            models.append({
-                "name": f"models/{name}",
-                "displayName": name,
-                "description": cfg["desc"],
-                "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
-            })
-        self.send_json({"models": models})
+        model_raw = m.group(1) if m else CONFIG["default_model"]
+        model_name, model_id, think_mode, err = self._resolve_model(model_raw)
+        if err: self.send_json({"error": {"message": err}}, 400); return
 
-    def _google_contents_to_prompt(self, req: dict) -> str:
-        """Convert Google API contents format to prompt string."""
-        parts = []
-        sys_inst = req.get("systemInstruction")
+        parts, sys_inst = [], req.get("systemInstruction")
         if sys_inst:
-            sys_parts = sys_inst.get("parts", [])
-            sys_text = " ".join(p.get("text", "") for p in sys_parts if p.get("text"))
-            if sys_text:
-                parts.append(f"[System instruction]: {sys_text}")
-
+            sys_text = " ".join(p.get("text","") for p in sys_inst.get("parts",[]) if p.get("text"))
+            if sys_text: parts.append(f"[System instruction]: {sys_text}")
         for content in req.get("contents", []):
             role = content.get("role", "user")
-            text_parts = []
-            for p in content.get("parts", []):
-                if p.get("text"):
-                    text_parts.append(p["text"])
-            text = " ".join(text_parts)
-            if role == "model":
-                parts.append(f"[Assistant]: {text}")
-            else:
-                parts.append(text)
-        return "\n\n".join(p for p in parts if p)
-
-    def _handle_google_generate(self, body: bytes, stream: bool):
-        """Handle Google native generateContent / streamGenerateContent."""
-        req = json.loads(body)
-        model_name = self._parse_google_model_from_path()
-        if not model_name:
-            self.send_json({"error": {"message": "model not specified in path"}}, 400)
-            return
-
-        model_name, model_id, think_mode, err = self._resolve_model(model_name)
-        if err:
-            self.send_json({"error": {"message": err}}, 400)
-            return
-
-        prompt = self._google_contents_to_prompt(req)
-        if not prompt.strip():
-            self.send_json({"error": {"message": "empty content"}}, 400)
-            return
-
-        try:
-            text, _ = self._call_gemini(prompt, model_id, think_mode, None)
-        except Exception as e:
-            self.send_json({"error": {"message": f"upstream error: {e}"}}, 502)
-            return
-
-        candidate = {
-            "content": {"parts": [{"text": text or ""}], "role": "model"},
-            "finishReason": "STOP",
-            "index": 0,
-        }
-        usage = {
-            "promptTokenCount": len(prompt) // 4,
-            "candidatesTokenCount": len(text) // 4,
-            "totalTokenCount": (len(prompt) + len(text)) // 4,
-        }
-        response_obj = {
-            "candidates": [candidate],
-            "usageMetadata": usage,
+            text = " ".join(p.get("text","") for p in content.get("parts",[]) if p.get("text"))
+            parts.append(f"[Assistant]: {text}" if role == "model" else text)
+        prompt = "\n\n".join(p for p in parts if p)
+        if not prompt.strip(): self.send_json({"error": {"message": "empty content"}}, 400); return
+
+        try: text = gemini_generate(prompt, model_id, think_mode)
+        except Exception as e: self.send_json({"error": {"message": f"upstream error: {e}"}}, 502); return
+
+        resp_obj = {
+            "candidates": [{"content": {"parts": [{"text": text or ""}], "role": "model"}, "finishReason": "STOP", "index": 0}],
+            "usageMetadata": {"promptTokenCount": len(prompt)//4, "candidatesTokenCount": len(text or "")//4,
+                              "totalTokenCount": (len(prompt)+len(text or ""))//4},
             "modelVersion": model_name,
         }
-
         if stream:
             self.send_response(200)
             self.send_header("Content-Type", "text/event-stream")
             self.send_header("Cache-Control", "no-cache")
             self.send_header("Access-Control-Allow-Origin", "*")
             self.end_headers()
-            self.wfile.write(f"data: {json.dumps(response_obj)}\n\n".encode())
+            self.wfile.write(f"data: {json.dumps(resp_obj)}\n\n".encode())
             self.wfile.flush()
         else:
-            self.send_json(response_obj)
-
+            self.send_json(resp_obj)
 
-# ─── Main ────────────────────────────────────────────────────────────────────
+# ─── Main ─────────────────────────────────────────────────────────────────────
 
 def load_config(path: str):
     if path and os.path.exists(path):
@@ -776,42 +629,48 @@ def load_config(path: str):
 
 
 def main():
-    parser = argparse.ArgumentParser(description="Gemini Web to OpenAI API")
-    parser.add_argument("--port", type=int, default=None)
-    parser.add_argument("--config", type=str, default=None)
-    parser.add_argument("--cookie-file", type=str, default=None, help="Path to cookie file")
-    parser.add_argument("--proxy", type=str, default=None, help="HTTP proxy, e.g. http://127.0.0.1:7890")
-    parser.add_argument("--version", action="version", version=f"gemini-web2api {__version__}")
+    parser = argparse.ArgumentParser(description="Gemini Web to OpenAI API (multi-account + auth)")
+    parser.add_argument("--port",        type=int, default=None)
+    parser.add_argument("--host",        type=str, default=None)
+    parser.add_argument("--config",      type=str, default=None)
+    parser.add_argument("--cookie-file", type=str, default=None)
+    parser.add_argument("--proxy",       type=str, default=None)
+    parser.add_argument("--version",     action="version", version=f"gemini-web2api {__version__}")
     args = parser.parse_args()
 
     config_path = args.config or os.environ.get("GEMINI_WEB2API_CONFIG")
     if not config_path:
         for p in ["./config.json", os.path.expanduser("~/.config/gemini-web2api/config.json")]:
-            if os.path.exists(p):
-                config_path = p
-                break
+            if os.path.exists(p): config_path = p; break
     load_config(config_path)
 
-    if args.port:
-        CONFIG["port"] = args.port
-    if args.cookie_file:
-        CONFIG["cookie_file"] = args.cookie_file
-    if args.proxy:
-        CONFIG["proxy"] = args.proxy
+    if args.port:        CONFIG["port"]        = args.port
+    if args.host:        CONFIG["host"]        = args.host
+    if args.cookie_file: CONFIG["cookie_file"] = args.cookie_file
+    if args.proxy:       CONFIG["proxy"]       = args.proxy
+
+    POOL.load(CONFIG)
+
+    if not CONFIG.get("api_keys"):
+        print("ERROR: No api_keys configured. Add tokens to config.json:")
+        print('  "api_keys": ["<your-secret-token>"]')
+        print()
+        print('  Generate: python -c "import secrets; print(secrets.token_urlsafe(32))"')
+        sys.exit(1)
 
     class ThreadedServer(ThreadingMixIn, HTTPServer):
         daemon_threads = True
         allow_reuse_address = True
 
-    port = CONFIG["port"]
-    server = ThreadedServer((CONFIG["host"], port), GeminiHandler)
+    host, port = CONFIG["host"], CONFIG["port"]
+    server = ThreadedServer((host, port), GeminiHandler)
     print(f"gemini-web2api v{__version__}")
-    print(f"  Listening: http://0.0.0.0:{port}")
-    print(f"  Base URL:  http://localhost:{port}/v1")
+    print(f"  Listening: http://{host}:{port}")
+    print(f"  Base URL:  http://{host}:{port}/v1")
     print(f"  Models:    {', '.join(MODELS.keys())}")
-    print(f"  Cookie:    {'yes (' + CONFIG['cookie_file'] + ')' if CONFIG.get('cookie_file') else 'none (anonymous)'}")
-    print(f"  Proxy:     {CONFIG.get('proxy') or 'none (uses system env HTTP_PROXY/HTTPS_PROXY)'}")
-    print(f"  Retry:     {CONFIG['retry_attempts']}x / {CONFIG['retry_delay_sec']}s")
+    print(f"  Accounts:  {POOL.count()} ({', '.join(POOL.names())})")
+    print(f"  API keys:  {len(CONFIG['api_keys'])} configured")
+    print(f"  Proxy:     {CONFIG.get('proxy') or 'system env'}")
     print()
     try:
         server.serve_forever()
diff --git a/gemini_web2api/__main__.py b/gemini_web2api/__main__.py
index e9b9ff7..4c3b56a 100644
--- a/gemini_web2api/__main__.py
+++ b/gemini_web2api/__main__.py
@@ -4,16 +4,18 @@
 
 from .config import CONFIG, load_config, find_config
 from .models import MODELS
-from .gemini import HAS_HTTPX
+from .gemini import HAS_HTTPX, log
+from .accounts import POOL
 from .server import GeminiHandler, ThreadedServer
 from . import __version__
 
 
 def main():
-    parser = argparse.ArgumentParser(description="Gemini Web to OpenAI API")
+    parser = argparse.ArgumentParser(description="Gemini Web to OpenAI API (multi-account)")
     parser.add_argument("--port", type=int, default=None)
+    parser.add_argument("--host", type=str, default=None, help="Bind host (default: 127.0.0.1)")
     parser.add_argument("--config", type=str, default=None)
-    parser.add_argument("--cookie-file", type=str, default=None)
+    parser.add_argument("--cookie-file", type=str, default=None, help="Cookie file for single-account mode")
     parser.add_argument("--proxy", type=str, default=None, help="HTTP proxy, e.g. http://127.0.0.1:7890")
     parser.add_argument("--version", action="version", version=f"gemini-web2api {__version__}")
     args = parser.parse_args()
@@ -24,21 +26,40 @@ def main():
 
     if args.port:
         CONFIG["port"] = args.port
+    if args.host:
+        CONFIG["host"] = args.host
     if args.cookie_file:
         CONFIG["cookie_file"] = args.cookie_file
     if args.proxy:
         CONFIG["proxy"] = args.proxy
 
+    # Initialise account pool from config
+    POOL.load_from_config(CONFIG)
+
+    # Safety check: refuse to start without api_keys
+    if not CONFIG.get("api_keys"):
+        print("ERROR: No api_keys configured.")
+        print("  Add at least one token to config.json:")
+        print('  "api_keys": ["<your-secret-token>"]')
+        print()
+        print("  Generate a secure token with:")
+        print('  python -c "import secrets; print(secrets.token_urlsafe(32))"')
+        raise SystemExit(1)
+
     port = CONFIG["port"]
-    server = ThreadedServer((CONFIG["host"], port), GeminiHandler)
+    host = CONFIG["host"]
+    server = ThreadedServer((host, port), GeminiHandler)
+
     print(f"gemini-web2api v{__version__}")
-    print(f"  Listening: http://0.0.0.0:{port}")
-    print(f"  Base URL:  http://localhost:{port}/v1")
+    print(f"  Listening: http://{host}:{port}")
+    print(f"  Base URL:  http://{host}:{port}/v1")
     print(f"  Models:    {', '.join(MODELS.keys())}")
-    print(f"  Cookie:    {'yes' if CONFIG.get('cookie_file') else 'none (anonymous)'}")
+    print(f"  Accounts:  {POOL.count()} ({', '.join(POOL.names())})")
+    print(f"  API keys:  {len(CONFIG['api_keys'])} configured")
     print(f"  Proxy:     {CONFIG.get('proxy') or 'system env'}")
     print(f"  Streaming: {'httpx (true streaming)' if HAS_HTTPX else 'urllib (buffered)'}")
     print()
+
     try:
         server.serve_forever()
     except KeyboardInterrupt:
diff --git a/gemini_web2api/config.py b/gemini_web2api/config.py
index 9faa94d..4930941 100644
--- a/gemini_web2api/config.py
+++ b/gemini_web2api/config.py
@@ -4,18 +4,28 @@
 
 DEFAULT_CONFIG = {
     "port": 8081,
-    "host": "0.0.0.0",
+    "host": "127.0.0.1",          # Changed default: localhost only for security
     "retry_attempts": 3,
     "retry_delay_sec": 2,
     "request_timeout_sec": 180,
     "gemini_bl": "boq_assistant-bard-web-server_20260525.09_p0",
-    "auth_user": None,
-    "xsrf_token": None,
     "default_model": "gemini-3.5-flash",
     "log_requests": True,
+    # ── Security ──────────────────────────────────────────────────────────────
+    # List of accepted Bearer tokens. REQUIRED — server rejects all requests
+    # if this list is empty or missing. Generate with: python -c "import secrets; print(secrets.token_urlsafe(32))"
+    "api_keys": [],
+    # ── Legacy single-account fields (still supported) ────────────────────────
     "cookie_file": None,
+    "auth_user": None,
     "proxy": None,
-    "api_keys": [],
+    "xsrf_token": None,
+    # ── Multi-account list (takes precedence over legacy fields) ──────────────
+    # "accounts": [
+    #   {"name": "account1", "cookie_file": "cookies/account1.json", "auth_user": null, "proxy": null},
+    #   {"name": "account2", "cookie_file": "cookies/account2.json", "auth_user": 1,    "proxy": null}
+    # ]
+    "accounts": [],
 }
 
 CONFIG = dict(DEFAULT_CONFIG)
diff --git a/gemini_web2api/gemini.py b/gemini_web2api/gemini.py
index 8f08ff9..25d76cf 100644
--- a/gemini_web2api/gemini.py
+++ b/gemini_web2api/gemini.py
@@ -6,7 +6,6 @@
 import urllib.request
 import urllib.parse
 import ssl
-import os
 import hashlib
 
 try:
@@ -16,10 +15,9 @@
     HAS_HTTPX = False
 
 from .config import CONFIG
+from .accounts import POOL, Account
 
 _ssl_ctx = None
-_cookie_cache = {"str": "", "sapisid": None, "mtime": 0}
-_httpx_client = None
 
 
 def log(msg: str):
@@ -36,67 +34,24 @@ def _get_ssl_ctx():
     return _ssl_ctx
 
 
-def _get_httpx_client():
-    global _httpx_client
-    if _httpx_client is None and HAS_HTTPX:
-        proxy = CONFIG.get("proxy")
-        transport = httpx.HTTPTransport(proxy=proxy) if proxy else None
-        _httpx_client = httpx.Client(transport=transport, timeout=CONFIG["request_timeout_sec"], verify=True)
-    return _httpx_client
-
-
-def load_cookie() -> tuple:
-    """Load cookie from file with mtime-based caching."""
-    cookie_file = CONFIG.get("cookie_file")
-    if not cookie_file or not os.path.exists(cookie_file):
-        return "", None
-    try:
-        mtime = os.path.getmtime(cookie_file)
-        if mtime == _cookie_cache["mtime"] and _cookie_cache["str"]:
-            return _cookie_cache["str"], _cookie_cache["sapisid"]
-        with open(cookie_file, "r") as f:
-            content = f.read().strip()
-        if content.startswith("{"):
-            data = json.loads(content)
-            cookie_str = data.get("cookie", "")
-            sapisid = data.get("sapisid", "")
-        else:
-            cookie_str = content
-            pairs = dict(p.split("=", 1) for p in cookie_str.split("; ") if "=" in p)
-            sapisid = pairs.get("SAPISID", "")
-        _cookie_cache.update({"str": cookie_str, "sapisid": sapisid or None, "mtime": mtime})
-        return cookie_str, sapisid if sapisid else None
-    except Exception as e:
-        log(f"Cookie load error: {e}")
-        return _cookie_cache["str"], _cookie_cache["sapisid"]
-
-
 def make_sapisidhash(sapisid: str) -> str:
     ts = int(time.time())
     h = hashlib.sha1(f"{ts} {sapisid} https://gemini.google.com".encode()).hexdigest()
     return f"SAPISIDHASH {ts}_{h}"
 
 
-def _account_prefix() -> str:
-    """Return the Gemini account path prefix for non-default Google accounts."""
-    auth_user = CONFIG.get("auth_user")
-    if auth_user is None or auth_user == "":
-        return ""
-    return f"/u/{auth_user}"
-
-
-def _build_headers() -> dict:
-    account_prefix = _account_prefix()
+def _build_headers(account: Account) -> dict:
+    prefix = account.account_prefix()
     headers = {
         "Content-Type": "application/x-www-form-urlencoded",
         "Origin": "https://gemini.google.com",
-        "Referer": f"https://gemini.google.com{account_prefix}/app",
+        "Referer": f"https://gemini.google.com{prefix}/app",
         "X-Same-Domain": "1",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
     }
-    if account_prefix:
-        headers["X-Goog-AuthUser"] = str(CONFIG["auth_user"])
-    cookie_str, sapisid = load_cookie()
+    if prefix:
+        headers["X-Goog-AuthUser"] = str(account.auth_user)
+    cookie_str, sapisid = account.load_cookie()
     if cookie_str:
         headers["Cookie"] = cookie_str
     if sapisid:
@@ -104,7 +59,9 @@ def _build_headers() -> dict:
     return headers
 
 
-def _build_payload(prompt: str, model_id: int, think_mode: int, file_refs: list = None, extra_fields: dict = None) -> str:
+def _build_payload(prompt: str, model_id: int, think_mode: int,
+                   file_refs: list = None, extra_fields: dict = None,
+                   account: Account = None) -> str:
     inner = [None] * 102
     if file_refs:
         refs = [[None, None, ref] for ref in file_refs]
@@ -132,16 +89,17 @@ def _build_payload(prompt: str, model_id: int, think_mode: int, file_refs: list
             inner[k] = v
     outer = [None, json.dumps(inner)]
     params = {"f.req": json.dumps(outer)}
-    if CONFIG.get("xsrf_token"):
-        params["at"] = CONFIG["xsrf_token"]
+    xsrf = (account.xsrf_token if account else None) or CONFIG.get("xsrf_token")
+    if xsrf:
+        params["at"] = xsrf
     return urllib.parse.urlencode(params)
 
 
-def _get_url() -> str:
+def _get_url(account: Account) -> str:
     reqid = int(time.time()) % 1000000
-    account_prefix = _account_prefix()
+    prefix = account.account_prefix()
     return (
-        f"https://gemini.google.com{account_prefix}/_/BardChatUi/data/"
+        f"https://gemini.google.com{prefix}/_/BardChatUi/data/"
         "assistant.lamda.BardFrontendService/StreamGenerate"
         f"?bl={CONFIG['gemini_bl']}&hl=en&_reqid={reqid}&rt=c"
     )
@@ -157,7 +115,6 @@ def clean_text(text: str) -> str:
 
 
 def _extract_texts_from_line(line: str) -> list:
-    """Parse a single wrb.fr line and return list of text strings found."""
     if '"wrb.fr"' not in line or len(line) < 200:
         return []
     try:
@@ -180,7 +137,6 @@ def _extract_texts_from_line(line: str) -> list:
 
 
 def extract_response_text(raw: str) -> str:
-    """Parse full response to get final text."""
     last_text = ""
     for line in raw.split("\n"):
         for t in _extract_texts_from_line(line):
@@ -189,13 +145,16 @@ def extract_response_text(raw: str) -> str:
     return clean_text(last_text)
 
 
-def generate(prompt: str, model_id: int, think_mode: int, file_refs: list = None, extra_fields: dict = None) -> str:
-    """Non-streaming generation with retry."""
-    body = _build_payload(prompt, model_id, think_mode, file_refs, extra_fields).encode()
-    url = _get_url()
-    headers = _build_headers()
+def generate(prompt: str, model_id: int, think_mode: int,
+             file_refs: list = None, extra_fields: dict = None) -> str:
+    """Non-streaming generation. Picks next account via round-robin."""
+    account = POOL.next()
+    log(f"generate: account={account.name} model_id={model_id}")
+    body = _build_payload(prompt, model_id, think_mode, file_refs, extra_fields, account).encode()
+    url = _get_url(account)
+    headers = _build_headers(account)
     ctx = _get_ssl_ctx()
-    proxy = CONFIG.get("proxy")
+    proxy = account.proxy or CONFIG.get("proxy")
 
     last_err = None
     for attempt in range(CONFIG["retry_attempts"]):
@@ -214,23 +173,29 @@ def generate(prompt: str, model_id: int, think_mode: int, file_refs: list = None
         except Exception as e:
             last_err = e
             if attempt < CONFIG["retry_attempts"] - 1:
-                log(f"Retry {attempt+1}/{CONFIG['retry_attempts']}: {e}")
-                time.sleep(CONFIG["retry_delay_sec"])
+                log(f"Retry {attempt+1}/{CONFIG['retry_attempts']} (account={account.name}): {e}")
+                import time as _t; _t.sleep(CONFIG["retry_delay_sec"])
     raise last_err
 
 
-def generate_stream(prompt: str, model_id: int, think_mode: int, file_refs: list = None, extra_fields: dict = None):
-    """Streaming generation via httpx with retry on connection failure."""
+def generate_stream(prompt: str, model_id: int, think_mode: int,
+                    file_refs: list = None, extra_fields: dict = None):
+    """Streaming generation via httpx. Picks next account via round-robin."""
+    account = POOL.next()
+    log(f"generate_stream: account={account.name} model_id={model_id}")
+
     if not HAS_HTTPX:
         text = generate(prompt, model_id, think_mode, file_refs, extra_fields)
         if text:
             yield text
         return
 
-    body = _build_payload(prompt, model_id, think_mode, file_refs, extra_fields)
-    url = _get_url()
-    headers = _build_headers()
-    client = _get_httpx_client()
+    body = _build_payload(prompt, model_id, think_mode, file_refs, extra_fields, account)
+    url = _get_url(account)
+    headers = _build_headers(account)
+    proxy = account.proxy or CONFIG.get("proxy")
+    transport = httpx.HTTPTransport(proxy=proxy) if proxy else None
+    client = httpx.Client(transport=transport, timeout=CONFIG["request_timeout_sec"], verify=True)
 
     last_err = None
     for attempt in range(CONFIG["retry_attempts"]):
@@ -252,6 +217,6 @@ def generate_stream(prompt: str, model_id: int, think_mode: int, file_refs: list
         except Exception as e:
             last_err = e
             if attempt < CONFIG["retry_attempts"] - 1:
-                log(f"Stream retry {attempt+1}/{CONFIG['retry_attempts']}: {e}")
-                time.sleep(CONFIG["retry_delay_sec"])
+                log(f"Stream retry {attempt+1}/{CONFIG['retry_attempts']} (account={account.name}): {e}")
+                import time as _t; _t.sleep(CONFIG["retry_delay_sec"])
     raise last_err
diff --git a/gemini_web2api/server.py b/gemini_web2api/server.py
index 3b85a8c..201f3a2 100644
--- a/gemini_web2api/server.py
+++ b/gemini_web2api/server.py
@@ -11,6 +11,7 @@
 from .gemini import generate, generate_stream, log
 from .tools import messages_to_prompt, parse_tool_calls, google_contents_to_prompt, parse_google_function_calls
 from .multimodal import upload_image, fetch_image_bytes
+from .accounts import POOL
 from . import __version__
 
 
@@ -21,7 +22,6 @@ def _usage(prompt: str, text: str) -> dict:
 
 
 def _upload_images(images: list) -> list:
-    """Upload images and return list of file references. Returns None if no images."""
     if not images:
         return None
     file_refs = []
@@ -66,13 +66,25 @@ def _parse_body(self, body: bytes) -> dict:
         except (json.JSONDecodeError, ValueError):
             return None
 
-    def _authorized(self):
+    def _authorized(self) -> bool:
+        """Validate Bearer token. Rejects ALL requests if no api_keys are configured."""
         keys = CONFIG.get("api_keys") or []
         if not keys:
-            return True
+            # No keys configured → deny everything to avoid accidentally open servers
+            return False
         auth = self.headers.get("Authorization", "")
-        key = auth[7:] if auth.startswith("Bearer ") else self.headers.get("x-api-key", "")
-        return key in keys
+        token = auth[7:] if auth.startswith("Bearer ") else self.headers.get("x-api-key", "")
+        return token in keys
+
+    def _require_auth(self) -> bool:
+        """Send 401 and return True if the request is unauthorized."""
+        if not self._authorized():
+            self.send_json(
+                {"error": {"message": "Unauthorized. Provide a valid Bearer token.", "type": "auth_error"}},
+                401
+            )
+            return True
+        return False
 
     def do_OPTIONS(self):
         self.send_response(204)
@@ -83,8 +95,7 @@ def do_OPTIONS(self):
 
     def do_GET(self):
         try:
-            if self.path.startswith("/v1/") and not self._authorized():
-                self.send_json({"error": {"message": "invalid api key"}}, 401)
+            if self._require_auth():
                 return
             if self.path == "/v1/models":
                 self.send_json({"object": "list", "data": [
@@ -99,7 +110,14 @@ def do_GET(self):
                     for n, c in MODELS.items()
                 ]})
             elif self.path == "/":
-                self.send_json({"status": "ok", "version": __version__, "models": list(MODELS.keys())})
+                # Health check — also requires auth
+                self.send_json({
+                    "status": "ok",
+                    "version": __version__,
+                    "models": list(MODELS.keys()),
+                    "accounts": POOL.count(),
+                    "account_names": POOL.names(),
+                })
             else:
                 self.send_json({"error": "not found"}, 404)
         except (BrokenPipeError, ConnectionResetError):
@@ -107,8 +125,7 @@ def do_GET(self):
 
     def do_POST(self):
         try:
-            if self.path.startswith("/v1/") and not self._authorized():
-                self.send_json({"error": {"message": "invalid api key"}}, 401)
+            if self._require_auth():
                 return
             length = int(self.headers.get("Content-Length", 0))
             body = self.rfile.read(length) if length else b""
@@ -201,7 +218,7 @@ def _handle_chat(self, body: bytes):
                           "total_tokens": (len(prompt)+len(text or ""))//4},
             })
 
-    # ─── /v1/responses (Codex CLI) ───────────────────────────────────────────
+    # ─── /v1/responses ────────────────────────────────────────────────────────
 
     def _handle_responses(self, body: bytes):
         req = self._parse_body(body)
@@ -242,7 +259,7 @@ def _handle_responses(self, body: bytes):
                         m = {"role": "assistant", "content": text_acc or None}
                         if tc_list:
                             m["tool_calls"] = [{"id": tc.get("call_id", f"call_{i}"), "type": "function",
-                                                "function": {"name": tc.get("name",""), "arguments": tc.get("arguments","{}")}}
+                                                "function": {"name": tc.get("name",""), "arguments": tc.get("arguments","{}") }}
                                                for i, tc in enumerate(tc_list)]
                         messages.append(m)
                     else:
@@ -368,9 +385,6 @@ def _handle_google_generate(self, body: bytes, stream: bool):
             self.send_json({"error": {"message": f"upstream error: {e}"}}, 502)
             return
 
-        if not text:
-            log("Warning: empty response from Gemini")
-
         response_parts = []
         if has_tools and text:
             clean_text, function_calls = parse_google_function_calls(text)