Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ S3_HOST_BUCKET=%(bucket)s.s3.amazonaws.com
# Open AI (Aliyun)
OPENAI_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1
OPENAI_API_KEY=********
OPENAI_MODEL=qwen-turbo
```

- Generate a [personal access token](https://github.com/settings/tokens) from GitHub and fill the `GITHUB_ACCESS_TOKEN`
Expand Down
1 change: 1 addition & 0 deletions frontend/src/layouts/MainLayout.vue
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ export default {
drawer: false,
topics: [
"all",
"ai",
"python",
"java",
"cpp",
Expand Down
58 changes: 57 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from sqlalchemy import Column, String, Integer, DateTime, JSON
from sqlalchemy.orm import declarative_base
from openai import OpenAI
from pydantic import BaseModel

MAX_COMMENT_LENGTH = 512

Expand All @@ -22,6 +23,9 @@
api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_API_BASE")
)

# OpenAI model for chat completions (default: qwen-turbo)
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "qwen-turbo")


class LogFormatter(logging.Formatter):

Expand Down Expand Up @@ -175,7 +179,7 @@ def tldr(text: str) -> str:
+ f"The README of the repository is: \n\n{text}"
)
resp = openai_client.chat.completions.create(
model="qwen-turbo",
model=OPENAI_MODEL,
messages=[
{
"role": "user",
Expand All @@ -186,6 +190,50 @@ def tldr(text: str) -> str:
return resp.choices[0].message.content


class AIRelevance(BaseModel):
"""Model for AI relevance detection."""

is_ai_related: bool


def isai(text: str) -> bool:
"""
Determine if a repository is related to AI based on its description.

Args:
text: The repository description or README content.

Returns:
True if the repository is AI-related, False otherwise.
"""
prompt = (
"Determine if this GitHub repository is related to Artificial Intelligence (AI), "
"Large Language Models (LLMs), Vision Language Model (VLM), World Model, "
"Retrieval-Augmented Generation (RAG), Vector Database, Embedding, Agent,"
"Vibe Coding, Harness Engineering, or other AI fields. "
"Consider libraries, frameworks, models, and tools for AI development.\n\n"
f"Repository description/README:\n{text}"
)

resp = openai_client.beta.chat.completions.parse(
model=OPENAI_MODEL,
messages=[
{
"role": "user",
"content": prompt,
}
],
response_format=AIRelevance,
extra_body={
"chat_template_kwargs": {
"enable_thinking": False,
}
},
)

return resp.choices[0].message.parsed.is_ai_related


def get_repo_info(github_client: Github, full_name: str) -> Optional[Dict]:
"""
Get GitHub repository information.
Expand All @@ -204,6 +252,14 @@ def get_repo_info(github_client: Github, full_name: str) -> Optional[Dict]:
if description is None:
description = tldr(repo.get_readme().decoded_content.decode("utf-8"))
print("QWEN:", description)

# Check if repository is AI-related and add "ai" category
if isai(description):
if categories is None:
categories = ["ai"]
elif "ai" not in categories:
categories.append("ai")

description_embedding = embedding(description)
item = {
"ItemId": full_name.replace("/", ":").lower(),
Expand Down
46 changes: 44 additions & 2 deletions x.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,15 +226,16 @@ def upgrade_embedding():
cursor = ""
while True:
items, cursor = gorse_client.get_items(1000, cursor)
if cursor == "":
break
for item in tqdm(items):
if len(item["Comment"]) > 0:
item["Labels"]["embedding"] = embedding(item["Comment"])
gorse_client.update_item(
item["ItemId"],
labels=item["Labels"],
)

if cursor == "":
break


def write_dump(f, data: message.Message):
Expand Down Expand Up @@ -317,5 +318,46 @@ def dump_playground(database: str, username: Optional[str], password: Optional[s
f"Dump complete: {num_users} users, {num_items} items, {num_feedback} feedback.")



@command.command()
def upgrade_ai():
"""Upgrade items with AI category detection."""
cursor = ""
updated_count = 0
while True:
items, cursor = gorse_client.get_items(1000, cursor)
for item in tqdm(items):
item_id = item["ItemId"]
categories = item.get("Categories") or []

# Skip if already has "ai" category
if "ai" in categories:
continue

# Get description from comment
description = item.get("Comment", "")
if not description:
continue

# Check if AI-related
try:
if isai(description):
categories.append("ai")
gorse_client.update_item(
item_id,
categories=categories,
)
updated_count += 1
print(f"UPDATE {item_id} -> ai")
except Exception as e:
print(f"FAIL {item_id}: {e}")
continue

if cursor == "":
break

print(f"Upgrade complete: {updated_count} items updated with 'ai' category.")


if __name__ == "__main__":
command()
Loading