JefferyHcool · iorireal · May 6, 2026
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,15 @@ yarn-error.log*
 lerna-debug.log*
 .pnpm-debug.log*
 .DS_Store
+
+# Local AI agent files
+.claude/
+.codex/
+AGENTS.md
+CLAUDE.md
+MODEL_ROUTING.md
+REFLECTION_LOG.md
+
 # Diagnostic reports (https://nodejs.org/api/report.html)
 report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 BiliNote/pnpm-lock.yaml
@@ -324,6 +333,9 @@ cython_debug/
 /BiliNote_frontend/.idea/*
 /BiliNote_frontend/src-tauri/bin/
 
+# Local platform cookies
+**/cookies.txt
+
 # FFmpeg 构建文件（不应该提交到仓库）
 ffmpeg*/
-ffmpg*/
+ffmpg*/
diff --git a/backend/app/downloaders/bilibili_downloader.py b/backend/app/downloaders/bilibili_downloader.py
@@ -1,28 +1,42 @@
 import os
 import json
-import logging
 import tempfile
 from abc import ABC
+from pathlib import Path
 from typing import Union, Optional, List
 
 import yt_dlp
 
 from app.downloaders.base import Downloader, DownloadQuality, QUALITY_MAP
 from app.models.notes_model import AudioDownloadResult
 from app.models.transcriber_model import TranscriptResult, TranscriptSegment
+from app.utils.logger import get_logger
 from app.utils.path_helper import get_data_dir
 from app.utils.url_parser import extract_video_id
 from app.services.cookie_manager import CookieConfigManager
 
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
+
+BILIBILI_COOKIES_FILE = os.getenv("BILIBILI_COOKIES_FILE", "cookies.txt")
+BILIBILI_HTTP_HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+    'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
+    'Accept-Encoding': 'gzip, deflate, br',
+    'Referer': 'https://www.bilibili.com/',
+    'Origin': 'https://www.bilibili.com',
+    'Sec-Fetch-Dest': 'empty',
+    'Sec-Fetch-Mode': 'cors',
+    'Sec-Fetch-Site': 'same-site',
+}
 
 
 class BilibiliDownloader(Downloader, ABC):
     def __init__(self):
         super().__init__()
         self._cookie_mgr = CookieConfigManager()
         self._cookie = self._cookie_mgr.get('bilibili')
-        self._cookiefile = self._write_netscape_cookie_file()
+        self._cookiefile = self._write_netscape_cookie_file() or self._resolve_cookies_file()
 
     def _write_netscape_cookie_file(self) -> Optional[str]:
         """将 Cookie 写入 Netscape 格式临时文件，返回文件路径（供 yt-dlp cookiefile 使用）"""
@@ -40,12 +54,54 @@ def _write_netscape_cookie_file(self) -> Optional[str]:
         logger.info("已生成 B站 Netscape Cookie 文件: %s (条目: %d)", tmp.name, len(lines) - 1)
         return tmp.name
 
+    def _resolve_cookies_file(self) -> Optional[str]:
+        """按约定位置查找 Netscape cookies.txt 文件。"""
+        configured = Path(BILIBILI_COOKIES_FILE)
+        backend_root = Path(__file__).resolve().parents[2]
+        candidates: list[Path] = []
+
+        if configured.is_absolute():
+            candidates.append(configured)
+        else:
+            env_value = os.getenv("BILIBILI_COOKIES_FILE")
+            if env_value:
+                candidates.append(Path.cwd() / configured)
+            candidates.extend([
+                backend_root / configured,
+                Path.cwd() / configured,
+                Path("/app") / configured,
+            ])
+
+        seen: set[Path] = set()
+        for candidate in candidates:
+            candidate = candidate.resolve()
+            if candidate in seen:
+                continue
+            seen.add(candidate)
+            if candidate.is_file():
+                logger.info("使用 B站 cookies 文件: %s", candidate)
+                return str(candidate)
+            if candidate.exists():
+                logger.warning("忽略非文件 cookies 路径: %s", candidate)
+
+        logger.warning("B站 Cookie 未配置且 cookies.txt 不存在，下载可能失败")
+        return None
+
+    def _apply_common_ydl_opts(self, ydl_opts: dict) -> dict:
+        existing_headers = ydl_opts.get('http_headers', {})
+        ydl_opts['http_headers'] = {**BILIBILI_HTTP_HEADERS, **existing_headers}
+        ydl_opts['extractor_retries'] = 5
+        if self._cookiefile:
+            ydl_opts['cookiefile'] = self._cookiefile
+        return ydl_opts
+
     def download(
         self,
         video_url: str,
         output_dir: Union[str, None] = None,
         quality: DownloadQuality = "fast",
-        need_video:Optional[bool]=False
+        need_video: Optional[bool] = False,
+        skip_download: bool = False,
     ) -> AudioDownloadResult:
         if output_dir is None:
             output_dir = get_data_dir()
@@ -58,7 +114,6 @@ def download(
         ydl_opts = {
             'format': 'bestaudio[ext=m4a]/bestaudio/best',
             'outtmpl': output_path,
-            'http_headers': {'Referer': 'https://www.bilibili.com'},
             'postprocessors': [
                 {
                     'key': 'FFmpegExtractAudio',
@@ -69,11 +124,12 @@ def download(
             'noplaylist': True,
             'quiet': False,
         }
-        if self._cookiefile:
-            ydl_opts['cookiefile'] = self._cookiefile
+        if skip_download:
+            ydl_opts['skip_download'] = True
+        self._apply_common_ydl_opts(ydl_opts)
 
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(video_url, download=True)
+            info = ydl.extract_info(video_url, download=not skip_download)
             video_id = info.get("id")
             title = info.get("title")
             duration = info.get("duration", 0)
@@ -117,13 +173,11 @@ def download_video(
         ydl_opts = {
             'format': 'bv*[ext=mp4]/bestvideo+bestaudio/best',
             'outtmpl': output_path,
-            'http_headers': {'Referer': 'https://www.bilibili.com'},
             'noplaylist': True,
             'quiet': False,
             'merge_output_format': 'mp4',  # 确保合并成 mp4
         }
-        if self._cookiefile:
-            ydl_opts['cookiefile'] = self._cookiefile
+        self._apply_common_ydl_opts(ydl_opts)
 
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(video_url, download=True)
@@ -175,11 +229,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None,
             'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'),
             'quiet': True,
         }
-
-        # 通过 CookieConfigManager 注入 B站 Cookie（Netscape cookiefile）
-        if self._cookiefile:
-            ydl_opts['cookiefile'] = self._cookiefile
-            ydl_opts['http_headers'] = {'Referer': 'https://www.bilibili.com'}
+        self._apply_common_ydl_opts(ydl_opts)
 
         try:
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -330,4 +380,4 @@ def _parse_json3_subtitle(self, subtitle_file: str, language: str) -> Optional[T
 
         except Exception as e:
             logger.warning(f"解析字幕文件失败: {e}")
-            return None
+            return None
diff --git a/backend/app/downloaders/youtube_downloader.py b/backend/app/downloaders/youtube_downloader.py
@@ -1,5 +1,4 @@
 import os
-import logging
 from abc import ABC
 from typing import Union, Optional, List
 
@@ -9,10 +8,11 @@
 from app.downloaders.youtube_subtitle import YouTubeSubtitleFetcher
 from app.models.notes_model import AudioDownloadResult
 from app.models.transcriber_model import TranscriptResult
+from app.utils.logger import get_logger
 from app.utils.path_helper import get_data_dir
 from app.utils.url_parser import extract_video_id
 
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 
 
 class YoutubeDownloader(Downloader, ABC):

diff --git a/backend/app/routers/note.py b/backend/app/routers/note.py
@@ -175,11 +175,38 @@ def generate_note(data: VideoRequest, background_tasks: BackgroundTasks):
 def get_task_status(task_id: str):
     status_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.status.json")
     result_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.json")
+    pending_status = {
+        "status": TaskStatus.PENDING.value,
+        "message": "任务排队中",
+        "task_id": task_id,
+    }
 
     # 优先读状态文件
     if os.path.exists(status_path):
-        with open(status_path, "r", encoding="utf-8") as f:
-            status_content = json.load(f)
+        try:
+            with open(status_path, "r", encoding="utf-8") as f:
+                content = f.read()
+                if content.strip():
+                    status_content = json.loads(content)
+                elif os.path.exists(result_path):
+                    logger.warning(f"状态文件为空但结果文件已存在: {status_path}")
+                    status_content = {"status": TaskStatus.SUCCESS.value, "task_id": task_id}
+                else:
+                    logger.warning(f"状态文件为空: {status_path}")
+                    status_content = pending_status
+        except (json.JSONDecodeError, OSError) as e:
+            logger.warning(f"读取状态文件失败: {status_path}, {e}")
+            if os.path.exists(result_path):
+                status_content = {"status": TaskStatus.SUCCESS.value, "task_id": task_id}
+            else:
+                status_content = pending_status
+
+        if status_content == pending_status:
+            try:
+                with open(status_path, "w", encoding="utf-8") as wf:
+                    json.dump(status_content, wf, ensure_ascii=False)
+            except OSError as e:
+                logger.warning(f"重建状态文件失败: {status_path}, {e}")
 
         status = status_content.get("status")
         message = status_content.get("message", "")

diff --git a/backend/app/services/note.py b/backend/app/services/note.py
@@ -321,31 +321,15 @@ def _update_status(self, task_id: Optional[str], status: Union[str, TaskStatus],
 
         NOTE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
         status_file = NOTE_OUTPUT_DIR / f"{task_id}.status.json"
-        print(f"写入状态文件: {status_file} 当前状态: {status}")
         data = {"status": status.value if isinstance(status, TaskStatus) else status}
         if message:
             data["message"] = message
 
         try:
-            # First create a temporary file
-            temp_file = status_file.with_suffix('.tmp')
-
-            # Write to temporary file
-            with temp_file.open('w', encoding='utf-8') as f:
+            with status_file.open('w', encoding='utf-8') as f:
                 json.dump(data, f, ensure_ascii=False, indent=2)
-
-            # Atomic rename operation
-            temp_file.replace(status_file)
-
-            print(f"状态文件写入成功: {status_file}")
         except Exception as e:
             logger.error(f"写入状态文件失败 (task_id={task_id})：{e}")
-            # Try to write error to file directly as fallback
-            try:
-                with status_file.open('w', encoding='utf-8') as f:
-                    f.write(f"Error writing status: {str(e)}")
-            except:
-                logger.error(f"写入错误  {e}")
 
     def _handle_exception(self, task_id, exc):
         logger.error(f"任务异常 (task_id={task_id})", exc_info=True)

diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -125,5 +125,5 @@ webencodings==0.5.1
 websockets==15.0.1
 yarl==1.19.0
 youtube-transcript-api>=1.0.0
-yt-dlp==2025.3.31
+yt-dlp==2026.3.17
 zopfli==0.2.3.post1
diff --git a/backend/tests/test_note_status_resilience.py b/backend/tests/test_note_status_resilience.py
@@ -0,0 +1,40 @@
+import inspect
+import json
+
+from app.downloaders.bilibili_downloader import BilibiliDownloader
+from app.enmus.task_status_enums import TaskStatus
+from app.routers import note as note_router
+
+
+def _response_payload(response):
+    return json.loads(response.body.decode("utf-8"))
+
+
+def test_empty_status_file_returns_pending_and_rewrites_json(tmp_path, monkeypatch):
+    task_id = "empty-status"
+    monkeypatch.setattr(note_router, "NOTE_OUTPUT_DIR", str(tmp_path))
+    status_path = tmp_path / f"{task_id}.status.json"
+    status_path.write_text("", encoding="utf-8")
+
+    payload = _response_payload(note_router.get_task_status(task_id))
+
+    assert payload["data"]["status"] == TaskStatus.PENDING.value
+    assert json.loads(status_path.read_text(encoding="utf-8"))["status"] == TaskStatus.PENDING.value
+
+
+def test_invalid_status_file_returns_existing_result(tmp_path, monkeypatch):
+    task_id = "invalid-status"
+    monkeypatch.setattr(note_router, "NOTE_OUTPUT_DIR", str(tmp_path))
+    (tmp_path / f"{task_id}.status.json").write_text("{", encoding="utf-8")
+    (tmp_path / f"{task_id}.json").write_text('{"markdown": "done"}', encoding="utf-8")
+
+    payload = _response_payload(note_router.get_task_status(task_id))
+
+    assert payload["data"]["status"] == TaskStatus.SUCCESS.value
+    assert payload["data"]["result"] == {"markdown": "done"}
+
+
+def test_bilibili_downloader_accepts_skip_download_argument():
+    signature = inspect.signature(BilibiliDownloader.download)
+
+    assert "skip_download" in signature.parameters