From a87f8d9e70ca24471c40168204d33ba1ac37af4c Mon Sep 17 00:00:00 2001 From: zhangzhenghao Date: Tue, 7 Apr 2026 18:42:24 +0800 Subject: [PATCH 1/6] feat: add AI topic to navigation tabs --- frontend/src/layouts/MainLayout.vue | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/layouts/MainLayout.vue b/frontend/src/layouts/MainLayout.vue index 84f321b..4e2b194 100644 --- a/frontend/src/layouts/MainLayout.vue +++ b/frontend/src/layouts/MainLayout.vue @@ -85,6 +85,7 @@ export default { drawer: false, topics: [ "all", + "ai", "python", "java", "cpp", From 33aa7ff2711215b5fa91a96f2cb90c06634b27ff Mon Sep 17 00:00:00 2001 From: zhangzhenghao Date: Tue, 7 Apr 2026 19:38:23 +0800 Subject: [PATCH 2/6] feat: add isai function to detect AI-related repositories - Add AIRelevance Pydantic model for structured output - Add isai() function using chat completion with structured outputs - Determines if a repository is related to AI/ML/NLP/Computer Vision --- utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/utils.py b/utils.py index 0441ea7..518093b 100644 --- a/utils.py +++ b/utils.py @@ -14,6 +14,7 @@ from sqlalchemy import Column, String, Integer, DateTime, JSON from sqlalchemy.orm import declarative_base from openai import OpenAI +from pydantic import BaseModel MAX_COMMENT_LENGTH = 512 @@ -186,6 +187,43 @@ def tldr(text: str) -> str: return resp.choices[0].message.content +class AIRelevance(BaseModel): + """Model for AI relevance detection.""" + is_ai_related: bool + + +def isai(text: str) -> bool: + """ + Determine if a repository is related to AI based on its description. + + Args: + text: The repository description or README content. + + Returns: + True if the repository is AI-related, False otherwise. + """ + prompt = ( + "Determine if this GitHub repository is related to Artificial Intelligence (AI), " + "Machine Learning (ML), Deep Learning, Natural Language Processing (NLP), " + "Computer Vision, or other AI/ML fields. " + "Consider libraries, frameworks, models, and tools for AI/ML development.\n\n" + f"Repository description/README:\n{text}" + ) + + resp = openai_client.beta.chat.completions.parse( + model="qwen-turbo", + messages=[ + { + "role": "user", + "content": prompt, + } + ], + response_format=AIRelevance, + ) + + return resp.choices[0].message.parsed.is_ai_related + + def get_repo_info(github_client: Github, full_name: str) -> Optional[Dict]: """ Get GitHub repository information. From e5e2f2ec51999ea1b128600ca61d14ef703b6297 Mon Sep 17 00:00:00 2001 From: zhangzhenghao Date: Tue, 7 Apr 2026 19:40:57 +0800 Subject: [PATCH 3/6] feat: add 'ai' category for AI-related repositories - Call isai() to check if repository is AI-related - Add 'ai' to categories if isai() returns true --- utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/utils.py b/utils.py index 518093b..dedbbcc 100644 --- a/utils.py +++ b/utils.py @@ -242,6 +242,14 @@ def get_repo_info(github_client: Github, full_name: str) -> Optional[Dict]: if description is None: description = tldr(repo.get_readme().decoded_content.decode("utf-8")) print("QWEN:", description) + + # Check if repository is AI-related and add "ai" category + if isai(description): + if categories is None: + categories = ["ai"] + elif "ai" not in categories: + categories.append("ai") + description_embedding = embedding(description) item = { "ItemId": full_name.replace("/", ":").lower(), From 3b784283b97c791ab17e0aa58a2eb41d0069f4e1 Mon Sep 17 00:00:00 2001 From: zhangzhenghao Date: Tue, 7 Apr 2026 19:43:24 +0800 Subject: [PATCH 4/6] feat: add upgrade_isai command to process existing repos - Add upgrade_isai command to x.py - Iterate through all items in Gorse - Check if repository is AI-related using isai() - Add 'ai' category if AI-related - Skip items that already have 'ai' category --- x.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/x.py b/x.py index 2d6577d..1d82169 100644 --- a/x.py +++ b/x.py @@ -317,5 +317,45 @@ def dump_playground(database: str, username: Optional[str], password: Optional[s f"Dump complete: {num_users} users, {num_items} items, {num_feedback} feedback.") + +@command.command() +def upgrade_isai(): + """Upgrade items with AI category detection.""" + cursor = "" + updated_count = 0 + while True: + items, cursor = gorse_client.get_items(1000, cursor) + if cursor == "": + break + for item in tqdm(items): + item_id = item["ItemId"] + categories = item.get("Categories") or [] + + # Skip if already has "ai" category + if "ai" in categories: + continue + + # Get description from comment + description = item.get("Comment", "") + if not description: + continue + + # Check if AI-related + try: + if isai(description): + categories.append("ai") + gorse_client.update_item( + item_id, + categories=categories, + ) + updated_count += 1 + print(f"UPDATE {item_id} -> ai") + except Exception as e: + print(f"FAIL {item_id}: {e}") + continue + + print(f"Upgrade complete: {updated_count} items updated with 'ai' category.") + + if __name__ == "__main__": command() From bcc3fb60f33ab630b213fb8cc63c7813f57e8ca8 Mon Sep 17 00:00:00 2001 From: zhangzhenghao Date: Tue, 7 Apr 2026 20:12:36 +0800 Subject: [PATCH 5/6] feat: add OPENAI_MODEL environment variable support - Add OPENAI_MODEL env variable with default value 'qwen-turbo' - Update tldr() and isai() to use OPENAI_MODEL - Update README.md with OPENAI_MODEL configuration --- README.md | 1 + utils.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 56c0d24..dd30952 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ S3_HOST_BUCKET=%(bucket)s.s3.amazonaws.com # Open AI (Aliyun) OPENAI_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1 OPENAI_API_KEY=******** +OPENAI_MODEL=qwen-turbo ``` - Generate a [personal access token](https://github.com/settings/tokens) from GitHub and fill the `GITHUB_ACCESS_TOKEN` diff --git a/utils.py b/utils.py index dedbbcc..e42dfb8 100644 --- a/utils.py +++ b/utils.py @@ -23,6 +23,9 @@ api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_API_BASE") ) +# OpenAI model for chat completions (default: qwen-turbo) +OPENAI_MODEL = os.getenv("OPENAI_MODEL", "qwen-turbo") + class LogFormatter(logging.Formatter): @@ -176,7 +179,7 @@ def tldr(text: str) -> str: + f"The README of the repository is: \n\n{text}" ) resp = openai_client.chat.completions.create( - model="qwen-turbo", + model=OPENAI_MODEL, messages=[ { "role": "user", @@ -211,7 +214,7 @@ def isai(text: str) -> bool: ) resp = openai_client.beta.chat.completions.parse( - model="qwen-turbo", + model=OPENAI_MODEL, messages=[ { "role": "user", From d226fe5667845e9120a8231b5e03cc5c02ab46fa Mon Sep 17 00:00:00 2001 From: zhenghaoz Date: Tue, 7 Apr 2026 21:26:02 +0800 Subject: [PATCH 6/6] Update prompt --- utils.py | 25 ++++++++++++++++--------- x.py | 12 +++++++----- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/utils.py b/utils.py index e42dfb8..bc69499 100644 --- a/utils.py +++ b/utils.py @@ -192,27 +192,29 @@ def tldr(text: str) -> str: class AIRelevance(BaseModel): """Model for AI relevance detection.""" + is_ai_related: bool def isai(text: str) -> bool: """ Determine if a repository is related to AI based on its description. - + Args: text: The repository description or README content. - + Returns: True if the repository is AI-related, False otherwise. """ prompt = ( "Determine if this GitHub repository is related to Artificial Intelligence (AI), " - "Machine Learning (ML), Deep Learning, Natural Language Processing (NLP), " - "Computer Vision, or other AI/ML fields. " - "Consider libraries, frameworks, models, and tools for AI/ML development.\n\n" + "Large Language Models (LLMs), Vision Language Model (VLM), World Model, " + "Retrieval-Augmented Generation (RAG), Vector Database, Embedding, Agent," + "Vibe Coding, Harness Engineering, or other AI fields. " + "Consider libraries, frameworks, models, and tools for AI development.\n\n" f"Repository description/README:\n{text}" ) - + resp = openai_client.beta.chat.completions.parse( model=OPENAI_MODEL, messages=[ @@ -222,8 +224,13 @@ def isai(text: str) -> bool: } ], response_format=AIRelevance, + extra_body={ + "chat_template_kwargs": { + "enable_thinking": False, + } + }, ) - + return resp.choices[0].message.parsed.is_ai_related @@ -245,14 +252,14 @@ def get_repo_info(github_client: Github, full_name: str) -> Optional[Dict]: if description is None: description = tldr(repo.get_readme().decoded_content.decode("utf-8")) print("QWEN:", description) - + # Check if repository is AI-related and add "ai" category if isai(description): if categories is None: categories = ["ai"] elif "ai" not in categories: categories.append("ai") - + description_embedding = embedding(description) item = { "ItemId": full_name.replace("/", ":").lower(), diff --git a/x.py b/x.py index 1d82169..51922cc 100644 --- a/x.py +++ b/x.py @@ -226,8 +226,6 @@ def upgrade_embedding(): cursor = "" while True: items, cursor = gorse_client.get_items(1000, cursor) - if cursor == "": - break for item in tqdm(items): if len(item["Comment"]) > 0: item["Labels"]["embedding"] = embedding(item["Comment"]) @@ -235,6 +233,9 @@ def upgrade_embedding(): item["ItemId"], labels=item["Labels"], ) + + if cursor == "": + break def write_dump(f, data: message.Message): @@ -319,14 +320,12 @@ def dump_playground(database: str, username: Optional[str], password: Optional[s @command.command() -def upgrade_isai(): +def upgrade_ai(): """Upgrade items with AI category detection.""" cursor = "" updated_count = 0 while True: items, cursor = gorse_client.get_items(1000, cursor) - if cursor == "": - break for item in tqdm(items): item_id = item["ItemId"] categories = item.get("Categories") or [] @@ -353,6 +352,9 @@ def upgrade_isai(): except Exception as e: print(f"FAIL {item_id}: {e}") continue + + if cursor == "": + break print(f"Upgrade complete: {updated_count} items updated with 'ai' category.")