From 9848767e4deac87f4dcf41a8bbf7e324f1c728c0 Mon Sep 17 00:00:00 2001 From: Huan-zhaojun <111294954+Huan-zhaojun@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:11:29 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20search=20source=20passthrough=20?= =?UTF-8?q?=E2=80=94=20web=20+=20X=20post=20sources=20appended=20as=20##?= =?UTF-8?q?=20Sources?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Grok SSE 流中的 webSearchResults 和 xSearchResults 透传给下游消费者。 采集层(StreamAdapter): - webSearchResults: 直接使用原始 url + title - xSearchResults: postId+username 拼接 URL,text 前 50 字构造 title, 空白归一化,共享 set 跨类型去重 - references_suffix() 统一转义 Markdown 特殊字符后输出 多轮剥离(_extract_message): - 标记行 [grok2api-sources]: # (CommonMark link ref def,渲染器不显示) - 正则覆盖 string content + block list content,CRLF 兼容 - 仅匹配含标记行的段落,用户自写 ## Sources 不受影响 配置: - features.show_search_sources(默认 false),管理面板可开关 - 管理面板 + 6 语言 i18n (zh/en/de/es/fr/ja) --- app/dataplane/reverse/protocol/xai_chat.py | 49 ++++++++++++++++++++-- app/products/openai/chat.py | 17 +++++++- app/statics/admin/config.html | 1 + app/statics/i18n/de.json | 1 + app/statics/i18n/en.json | 4 ++ app/statics/i18n/es.json | 1 + app/statics/i18n/fr.json | 1 + app/statics/i18n/ja.json | 1 + app/statics/i18n/zh.json | 4 ++ config.defaults.toml | 2 + 10 files changed, 77 insertions(+), 4 deletions(-) diff --git a/app/dataplane/reverse/protocol/xai_chat.py b/app/dataplane/reverse/protocol/xai_chat.py index ba775c51..b5a0c903 100644 --- a/app/dataplane/reverse/protocol/xai_chat.py +++ b/app/dataplane/reverse/protocol/xai_chat.py @@ -181,6 +181,8 @@ class StreamAdapter: "_summary_mode", "_last_rollout", "_content_started", + "_web_search_results", + "_web_search_urls_seen", "thinking_buf", "text_buf", "image_urls", @@ -197,14 +199,28 @@ def __init__(self) -> None: self._last_rollout: str = "" self._content_started: bool = False self._reasoning = ReasoningAggregator() if self._summary_mode else None + self._web_search_results: list[dict] = [] + self._web_search_urls_seen: set[str] = set() self.thinking_buf: list[str] = [] self.text_buf: list[str] = [] self.image_urls: list[tuple[str, str]] = [] # [(url, imageUuid), ...] - # 引用已内联为 [[N]](url) 格式,无需末尾附录 + # 搜索信源追加:当配置启用且有 webSearchResults 时,格式化为 ## Sources 段落 + # 标记行 [grok2api-sources]: # 是 markdown link reference definition,渲染器不显示, + # 用于 _extract_message() 在多轮对话中精确识别并剥离前轮的 Sources 段落 def references_suffix(self) -> str: - """No-op — citations are now inlined as ``[[N]](url)`` markdown links.""" - return "" + """当有搜索信源且配置启用时,格式化为 ## Sources markdown 段落。""" + if not self._web_search_results: + return "" + if not get_config().get_bool("features.show_search_sources", False): + return "" + lines = ["\n\n## Sources", "[grok2api-sources]: #"] + for item in self._web_search_results: + title = item.get("title") or item.get("url", "") + # 转义 Markdown 链接文本中的特殊字符,防止 []\ 打坏语法 + title = title.replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]") + lines.append(f"- [{title}]({item['url']})") + return "\n".join(lines) + "\n" # ------------------------------------------------------------------ # Public API @@ -231,6 +247,33 @@ def feed(self, data: str) -> list[FrameEvent]: if card_raw: events.extend(self._handle_card(card_raw)) + # ── 采集 webSearchResults(搜索信源,多帧累积去重)─────── + wsr = resp.get("webSearchResults") + if wsr and isinstance(wsr, dict): + for item in wsr.get("results", []): + if isinstance(item, dict) and item.get("url"): + url = item["url"] + if url not in self._web_search_urls_seen: + self._web_search_urls_seen.add(url) + self._web_search_results.append(item) + + # ── 采集 xSearchResults(X/Twitter 帖子信源,多帧累积去重)── + xsr = resp.get("xSearchResults") + if xsr and isinstance(xsr, dict): + for item in xsr.get("results", []): + if isinstance(item, dict) and item.get("postId") and item.get("username"): + url = f"https://x.com/{item['username']}/status/{item['postId']}" + if url not in self._web_search_urls_seen: + self._web_search_urls_seen.add(url) + # 构造 title:归一化空白,text 为空退回 @username + # Markdown 转义统一在 references_suffix() 中处理 + raw = re.sub(r"\s+", " ", (item.get("text") or "")).strip() + if raw: + title = f"𝕏/@{item['username']}: {raw[:50]}{'...' if len(raw) > 50 else ''}" + else: + title = f"𝕏/@{item['username']}" + self._web_search_results.append({"url": url, "title": title}) + token = resp.get("token") think = resp.get("isThinking") tag = resp.get("messageTag") diff --git a/app/products/openai/chat.py b/app/products/openai/chat.py index 5e5db437..32c65aca 100644 --- a/app/products/openai/chat.py +++ b/app/products/openai/chat.py @@ -2,6 +2,7 @@ import asyncio import base64 +import re from typing import Any, AsyncGenerator import orjson @@ -188,6 +189,12 @@ def _normalize_image_format(value: str | None) -> str: return fmt +# 精确匹配 grok2api 注入的 Sources 段落(含标记行),用于多轮对话剥离 +_SOURCES_STRIP_RE = re.compile( + r"(?:^|\r?\n\r?\n)## Sources\r?\n\[grok2api-sources\]: #\r?\n[\s\S]*$" +) + + def _extract_message(messages: list[dict]) -> tuple[str, list[str]]: """Flatten OpenAI messages into a single prompt string + file attachments.""" parts: list[str] = [] @@ -220,6 +227,10 @@ def _extract_message(messages: list[dict]) -> tuple[str, list[str]]: parts.append(f"[assistant]:\n{xml}") continue + # ── 剥离前轮 assistant 消息中 grok2api 注入的 Sources 段落 ──────────── + if role == "assistant" and isinstance(content, str): + content = _SOURCES_STRIP_RE.sub("", content) + # ── normal content handling ─────────────────────────────────────────── if isinstance(content, str): if content.strip(): @@ -230,7 +241,11 @@ def _extract_message(messages: list[dict]) -> tuple[str, list[str]]: continue btype = block.get("type") if btype == "text": - text = (block.get("text") or "").strip() + text = (block.get("text") or "") + # 块列表中的 assistant text 也需剥离 Sources(先 regex 再 strip,与 str 路径对齐) + if role == "assistant": + text = _SOURCES_STRIP_RE.sub("", text) + text = text.strip() if text: parts.append(f"[{role}]: {text}") elif btype == "image_url": diff --git a/app/statics/admin/config.html b/app/statics/admin/config.html index bcf2df6a..0c3d98f5 100644 --- a/app/statics/admin/config.html +++ b/app/statics/admin/config.html @@ -385,6 +385,7 @@ { key: 'thinking_summary', label: '思考精简输出', labelKey: 'config.schema.fields.thinkingSummary.label', type: 'bool', desc: '启用后将思考过程提炼为结构化摘要。关闭时输出完整的原始推理过程,支持多 Agent 模型的协作详情与工具调用展示。', descKey: 'config.schema.fields.thinkingSummary.desc' }, { key: 'dynamic_statsig', label: '动态 Statsig', labelKey: 'config.schema.fields.dynamicStatsig.label', type: 'bool', desc: '为每次请求动态生成 Statsig 设备指纹,以降低风控拦截概率。', descKey: 'config.schema.fields.dynamicStatsig.desc' }, { key: 'enable_nsfw', label: '允许 NSFW 生成', labelKey: 'config.schema.fields.enableNsfw.label', type: 'bool', desc: '允许图像生成接口绕过 NSFW 内容过滤。', descKey: 'config.schema.fields.enableNsfw.desc' }, + { key: 'show_search_sources', label: '搜索信源', labelKey: 'config.schema.fields.showSearchSources.label', type: 'bool', desc: '当 Grok 执行网络搜索时,在响应末尾追加信源链接(## Sources 段落)。', descKey: 'config.schema.fields.showSearchSources.desc' }, { key: 'custom_instruction', label: '全局附加指令', labelKey: 'config.schema.fields.customInstruction.label', type: 'textarea', desc: '为每次请求注入统一的 system 消息,用于约束模型行为或固定角色设定。', descKey: 'config.schema.fields.customInstruction.desc' }, ] }, diff --git a/app/statics/i18n/de.json b/app/statics/i18n/de.json index 281a6434..047bda8e 100644 --- a/app/statics/i18n/de.json +++ b/app/statics/i18n/de.json @@ -375,6 +375,7 @@ "thinkingSummary": { "label": "Kompakte Reasoning-Ausgabe" }, "dynamicStatsig": { "label": "Dynamisches Statsig" }, "enableNsfw": { "label": "NSFW-Erzeugung zulassen" }, + "showSearchSources": { "label": "Suchquellen" }, "customInstruction": { "label": "Globale Zusatzanweisung" }, "imageFormat": { "label": "Bildausgabeformat" }, "videoFormat": { "label": "Videoausgabeformat" }, diff --git a/app/statics/i18n/en.json b/app/statics/i18n/en.json index fd54dfd1..3a6c625e 100644 --- a/app/statics/i18n/en.json +++ b/app/statics/i18n/en.json @@ -411,6 +411,10 @@ "label": "Allow NSFW Generation", "desc": "Permits the image generation endpoint to bypass NSFW content filtering." }, + "showSearchSources": { + "label": "Search Sources", + "desc": "Append search source links (## Sources section) at the end of responses when Grok performs web search." + }, "customInstruction": { "label": "Global Supplemental Instruction", "desc": "Injects a consistent system message into every request to enforce model behavior or establish a fixed role." diff --git a/app/statics/i18n/es.json b/app/statics/i18n/es.json index 1e60228d..fd7aef10 100644 --- a/app/statics/i18n/es.json +++ b/app/statics/i18n/es.json @@ -375,6 +375,7 @@ "thinkingSummary": { "label": "Razonamiento condensado" }, "dynamicStatsig": { "label": "Statsig dinámico" }, "enableNsfw": { "label": "Permitir generación NSFW" }, + "showSearchSources": { "label": "Fuentes de búsqueda" }, "customInstruction": { "label": "Instrucción suplementaria global" }, "imageFormat": { "label": "Formato de salida de imagen" }, "videoFormat": { "label": "Formato de salida de video" }, diff --git a/app/statics/i18n/fr.json b/app/statics/i18n/fr.json index 0ff1d376..1cb60566 100644 --- a/app/statics/i18n/fr.json +++ b/app/statics/i18n/fr.json @@ -375,6 +375,7 @@ "thinkingSummary": { "label": "Raisonnement condensé" }, "dynamicStatsig": { "label": "Statsig dynamique" }, "enableNsfw": { "label": "Autoriser la génération NSFW" }, + "showSearchSources": { "label": "Sources de recherche" }, "customInstruction": { "label": "Instruction globale supplémentaire" }, "imageFormat": { "label": "Format de sortie image" }, "videoFormat": { "label": "Format de sortie vidéo" }, diff --git a/app/statics/i18n/ja.json b/app/statics/i18n/ja.json index 94ccacea..1554ee08 100644 --- a/app/statics/i18n/ja.json +++ b/app/statics/i18n/ja.json @@ -375,6 +375,7 @@ "thinkingSummary": { "label": "思考要約出力" }, "dynamicStatsig": { "label": "動的 Statsig" }, "enableNsfw": { "label": "NSFW 生成を許可" }, + "showSearchSources": { "label": "検索ソース" }, "customInstruction": { "label": "グローバル補助指示" }, "imageFormat": { "label": "画像出力形式" }, "videoFormat": { "label": "動画出力形式" }, diff --git a/app/statics/i18n/zh.json b/app/statics/i18n/zh.json index 052dac57..7664049d 100644 --- a/app/statics/i18n/zh.json +++ b/app/statics/i18n/zh.json @@ -411,6 +411,10 @@ "label": "允许 NSFW 生成", "desc": "允许图像生成接口绕过 NSFW 内容过滤。" }, + "showSearchSources": { + "label": "搜索信源", + "desc": "当 Grok 执行网络搜索时,在响应末尾追加搜索信源链接(## Sources 段落)。" + }, "customInstruction": { "label": "全局附加指令", "desc": "为每次请求注入统一的 system 消息,用于约束模型行为或固定角色设定。" diff --git a/config.defaults.toml b/config.defaults.toml index 639cf26d..1f5bebd7 100644 --- a/config.defaults.toml +++ b/config.defaults.toml @@ -36,6 +36,8 @@ thinking_summary = false dynamic_statsig = true # 是否允许生成 NSFW 图片 enable_nsfw = true +# 当 Grok 执行网络搜索时,在响应末尾追加搜索信源链接(## Sources 段落) +show_search_sources = false # 全局附加指令 custom_instruction = ""