CadanHu · CadanHu · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/backend/app/agents/channel_exec.py b/backend/app/agents/channel_exec.py
@@ -8,6 +8,7 @@
 from datetime import datetime, timezone
 import uuid
 
+import httpx
 import structlog
 
 from app.config import settings
@@ -40,10 +41,33 @@ async def deploy(self, channel_config: dict, content: dict, assets: dict) -> lis
         return [f"google_ad_{uuid.uuid4().hex[:8]}"]
 
 
+class ZhihuAdapter:
+    async def deploy(self, channel_config: dict, content: dict, assets: dict) -> list[str]:
+        """Deploy article to Zhihu."""
+        logger.info("zhihu_deploy_start")
+
+        if not settings.zhihu_cookie:
+            logger.warning("zhihu_no_cookie_configured")
+            return ["zhihu_failed_no_cookie"]
+
+        # In MVP, we simulate the network call to Zhihu's internal API
+        # Actual implementation would use httpx to POST to https://zhuanlan.zhihu.com/api/articles
+        # with the provided Cookie and Markdown content converted to Zhihu's format.
+
+        async with httpx.AsyncClient() as client:
+            # Placeholder for actual Zhihu API interaction
+            # For MVP/Safety, we log the intent and return a simulated ID
+            logger.info("zhihu_article_published_simulated",
+                        title=content.get("variants", [{}])[0].get("title"))
+
+        return [f"zhihu_art_{uuid.uuid4().hex[:8]}"]
+
+
 _ADAPTERS = {
     "meta":   MetaAdapter(),
     "tiktok": TikTokAdapter(),
     "google": GoogleAdapter(),
+    "zhihu":  ZhihuAdapter(),
 }
 
 

diff --git a/backend/app/agents/content_gen.py b/backend/app/agents/content_gen.py
@@ -6,8 +6,10 @@
 Events: ContentGenerated
 """
 import json
+import re
 import uuid
 
+import httpx
 import structlog
 from tenacity import retry, stop_after_attempt, wait_exponential
 
@@ -19,25 +21,71 @@
 logger = structlog.get_logger(__name__)
 
 
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
-async def _call_llm(prompt: str) -> list[dict]:
-    """Call LLM to generate copy variants. Retries 3x on transient errors."""
+async def _get_github_readme(url: str) -> str:
+    """Fetch README from GitHub URL."""
+    # Convert GitHub repo URL to raw user content URL if needed
+    match = re.match(r"https://github\.com/([^/]+)/([^/]+)/?$", url)
+    if match:
+        owner, repo = match.groups()
+        url = f"https://raw.githubusercontent.com/{owner}/{repo}/main/README.md"
+
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(url)
+            if resp.status_code == 404:
+                # Try master branch if main fails
+                url = url.replace("/main/", "/master/")
+                resp = await client.get(url)
+            resp.raise_for_status()
+            return resp.text
+        except Exception as exc:
+            logger.warning("github_readme_fetch_failed", url=url, error=str(exc))
+            return ""
+
+
+# NOTE: Retry 2x to fit within 300s job limit (2 * 180s > 300s, but LLM usually faster)
+@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=2, max=10))
+async def _call_llm(prompt: str, is_article: bool = False) -> list[dict]:
+    """Call LLM to generate copy variants. Retries 2x on transient errors."""
+    system_prompt = (
+        "You are a senior performance marketing copywriter and technical evangelist. "
+        "Return a JSON array of copy variants."
+    )
+
+    if is_article:
+        system_prompt += (
+            " Each variant must have: variant_label (A), title, body (the full Markdown article), channel. "
+            "Generate ONLY 1 high-quality, professional technical article in Markdown format, "
+            "suitable for Zhihu, Juejin, or CSDN. "
+        )
+    else:
+        system_prompt += (
+            " Each variant must have: variant_label (A/B/C), hook, body, cta, channel. "
+            "Generate 3 A/B/C copy variants optimized for these channels."
+        )
+
+    system_prompt += "Output ONLY valid JSON, no markdown outside the JSON structure."
+
     raw = await llm_client.chat_completion(
-        system=(
-            "You are a senior performance marketing copywriter. "
-            "Return a JSON array of copy variants, each with: "
-            "variant_label (A/B/C), hook, body, cta, channel. "
-            "Output ONLY valid JSON, no markdown."
-        ),
+        system=system_prompt,
         messages=[{"role": "user", "content": prompt}],
+        max_tokens=8192 if is_article else 2048, # NOTE: Technical articles need more tokens
     )
-    # Clean up potential markdown code blocks if the LLM includes them
-    if raw.startswith("```json"):
-        raw = raw.split("```json")[1].split("```")[0].strip()
-    elif raw.startswith("```"):
-        raw = raw.split("```")[1].split("```")[0].strip()
 
-    return json.loads(raw)
+    # NOTE: Extraction logic using find/rfind to handle nested code blocks in article body
+    try:
+        start = raw.find('[')
+        end = raw.rfind(']') + 1
+        if start != -1 and end != 0:
+            raw = raw[start:end]
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        # Fallback to simple cleaning
+        if "```json" in raw:
+            raw = raw.split("```json")[1].split("```")[0].strip()
+        elif raw.startswith("```"):
+            raw = raw.split("```")[1].split("```")[0].strip()
+        return json.loads(raw)
 
 
 async def content_gen_node(state: CampaignState) -> dict:
@@ -49,32 +97,88 @@ async def content_gen_node(state: CampaignState) -> dict:
     strategy = state.get("strategy") or {}
     channels = strategy.get("channel_plan", [{"channel": "tiktok"}, {"channel": "meta"}])
     channel_names = [c["channel"] for c in channels]
+    is_technical_promo = any(ch in ["zhihu", "juejin", "csdn"] for ch in channel_names)
+
+    # Analyze external repo if URL is in goal or constraints
+    repo_content = ""
+    repo_url_match = re.search(r"https://github\.com/[^\s]+", state["goal"])
+    if repo_url_match:
+        repo_url = repo_url_match.group(0)
+        repo_content = await _get_github_readme(repo_url)
 
     prompt = (
         f"Product goal: {state['goal']}\n"
         f"Target channels: {', '.join(channel_names)}\n"
-        f"KPI target: {state['kpi']['metric']} = {state['kpi']['target']}\n"
-        f"Generate 3 A/B/C copy variants optimized for these channels."
+        f"KPI target: {state.get('kpi', {}).get('metric', 'awareness')} = {state.get('kpi', {}).get('target', 'high')}\n"
     )
 
+    if repo_content:
+        prompt += f"\nProject Context (README):\n{repo_content[:4000]}\n"
+
+    if is_technical_promo:
+        prompt += (
+            "\nFocus on deep technical analysis. Generate a comprehensive technical article "
+            "that highlights the project's innovation, architecture, and value proposition."
+        )
+    else:
+        prompt += "\nGenerate 3 A/B/C copy variants optimized for these channels."
+
     try:
-        variants = await _call_llm(prompt)
+        variants = await _call_llm(prompt, is_article=is_technical_promo)
 
-        bundle = {
-            "bundle_id": f"bundle_{uuid.uuid4().hex[:8]}",
+        bundle_id = uuid.uuid4()
+        bundle_data = {
+            "bundle_id": str(bundle_id),
             "variants": variants,
             "llm_model": settings.anthropic_model,
         }
 
+        # ── Persistence Layer ─────────────────────────────────────────────
+        from app.database import async_session_factory
+        from app.models.content import ContentBundle, Copy
+
+        async with async_session_factory() as db:
+            # Ensure campaign exists (it might be 'demo' in some contexts,
+            # we should skip persistence for demo or handle it)
+            campaign_id = state.get("campaign_id")
+            try:
+                camp_uuid = uuid.UUID(campaign_id)
+
+                new_bundle = ContentBundle(
+                    id=bundle_id,
+                    campaign_id=camp_uuid,
+                    llm_model=settings.anthropic_model,
+                    generation_params={"tone": state.get("tone", "professional")},
+                )
+                db.add(new_bundle)
+
+                for var in variants:
+                    new_copy = Copy(
+                        bundle_id=bundle_id,
+                        campaign_id=camp_uuid,
+                        variant_label=var.get("variant_label", "A"),
+                        hook=var.get("title") or var.get("hook"),
+                        body=var.get("body"),
+                        cta=var.get("cta"),
+                        channel=var.get("channel"),
+                        status="GENERATED"
+                    )
+                    db.add(new_copy)
+
+                await db.commit()
+                logger.info("content_gen_persisted", bundle_id=str(bundle_id))
+            except (ValueError, TypeError):
+                logger.warning("content_gen_persistence_skipped", campaign_id=campaign_id)
+
         await event_bus.publish(
             "ContentGenerated",
-            {"bundle": bundle},
+            {"bundle": bundle_data},
             state["campaign_id"],
         )
 
         logger.info("content_gen_done", variants=len(variants))
         return {
-            "content": bundle,
+            "content": bundle_data,
             "status": "PRODUCTION",
             "completed_tasks": ["content_gen"],
         }

diff --git a/backend/app/api/articles.py b/backend/app/api/articles.py
@@ -0,0 +1,44 @@
+from typing import List, Optional
+from uuid import UUID
+import structlog
+from fastapi import APIRouter, Depends, Query
+from sqlalchemy import select, desc
+from sqlalchemy.ext.asyncio import AsyncSession
+from app.database import get_db
+from app.models.content import Copy
+
+logger = structlog.get_logger(__name__)
+router = APIRouter()
+
+@router.get("", summary="Get historical articles")
+async def list_articles(
+    limit: int = Query(20, ge=1, le=100),
+    offset: int = Query(0, ge=0),
+    db: AsyncSession = Depends(get_db)
+):
+    """Return historical generated articles/copies."""
+    query = (
+        select(Copy)
+        .order_by(desc(Copy.created_at))
+        .offset(offset)
+        .limit(limit)
+    )
+    result = await db.execute(query)
+    items = result.scalars().all()
+
+    return {
+        "items": [
+            {
+                "id": str(item.id),
+                "bundle_id": str(item.bundle_id),
+                "campaign_id": str(item.campaign_id),
+                "variant_label": item.variant_label,
+                "title": item.hook,
+                "body": item.body,
+                "channel": item.channel,
+                "status": item.status,
+                "created_at": item.created_at.isoformat()
+            }
+            for item in items
+        ]
+    }
diff --git a/backend/app/api/router.py b/backend/app/api/router.py
@@ -3,10 +3,12 @@
 
 from .campaigns import router as campaigns_router
 from .agents import router as agents_router
+from .articles import router as articles_router
 from .ws import router as ws_router
 
 api_router = APIRouter()
 
 api_router.include_router(campaigns_router, prefix="/v1/campaigns", tags=["Campaigns"])
 api_router.include_router(agents_router,   prefix="/v1/agents",    tags=["A2A Agents"])
+api_router.include_router(articles_router, prefix="/v1/articles",  tags=["Articles"])
 api_router.include_router(ws_router,                               tags=["WebSocket"])
diff --git a/backend/app/config.py b/backend/app/config.py
@@ -70,6 +70,9 @@ class Settings(BaseSettings):
     google_ads_client_secret: str = Field(default="", alias="GOOGLE_ADS_CLIENT_SECRET")
     google_ads_refresh_token: str = Field(default="", alias="GOOGLE_ADS_REFRESH_TOKEN")
 
+    # ── Zhihu ──────────────────────────────────────────────────────
+    zhihu_cookie: str = Field(default="", alias="ZHIHU_COOKIE")
+
     # ── Image Generation ───────────────────────────────────────────
     openai_api_key: str = Field(default="", alias="OPENAI_API_KEY")
     stability_api_key: str = Field(default="", alias="STABILITY_API_KEY")

diff --git a/backend/app/core/llm.py b/backend/app/core/llm.py
@@ -81,16 +81,19 @@ async def chat_completion(
             raise ValueError(f"Unsupported provider: {provider}")
 
     async def _anthropic_completion(self, messages, system, model, max_tokens):
+        logger.info("llm_request", provider="anthropic", model=model or settings.anthropic_model)
         response = await self.anthropic.messages.create(
             model=model or settings.anthropic_model,
             max_tokens=max_tokens or settings.anthropic_max_tokens,
             system=system,
             messages=messages,
         )
+        logger.info("llm_response", provider="anthropic", tokens=response.usage.output_tokens)
         return response.content[0].text
 
     async def _openai_compatible_completion(self, base_url, api_key, messages, system, model, max_tokens):
-        async with httpx.AsyncClient() as client:
+        # NOTE: Timeout must be 180s for long technical articles
+        async with httpx.AsyncClient(timeout=180.0) as client:
             full_messages = []
             if system:
                 full_messages.append({"role": "system", "content": system})
@@ -106,10 +109,18 @@ async def _openai_compatible_completion(self, base_url, api_key, messages, syste
             if api_key:
                 headers["Authorization"] = f"Bearer {api_key}"
 
-            url = f"{base_url}/chat/completions" if "/chat/completions" not in base_url else base_url
-            response = await client.post(url, json=payload, headers=headers, timeout=60.0)
+            url = base_url
+            if "/chat/completions" not in url:
+                if "?" in url:
+                    url = url.replace("?", "/chat/completions?", 1)
+                else:
+                    url = url.rstrip("/") + "/chat/completions"
+
+            logger.info("llm_request", provider="openai-compat", model=model, url=url)
+            response = await client.post(url, json=payload, headers=headers)
             response.raise_for_status()
             data = response.json()
+            logger.info("llm_response", provider="openai-compat", model=model)
             return data["choices"][0]["message"]["content"]
 
 llm_client = LLMClient()
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -23,8 +23,9 @@ dependencies = [
     "alembic>=1.14,<2.0",
 
     # Agent framework
-    "langgraph>=0.2,<0.3",
-    "langchain-anthropic>=0.3,<0.4",
+    "langgraph>=0.2",
+    "langchain-anthropic>=0.3",
+    "langgraph-checkpoint-postgres>=2.0",
     "anthropic>=0.40,<0.50",
 
     # Protocol