Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
.DS_Store

# Local AI agent files
.claude/
.codex/
AGENTS.md
CLAUDE.md
MODEL_ROUTING.md
REFLECTION_LOG.md

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
BiliNote/pnpm-lock.yaml
Expand Down Expand Up @@ -324,6 +333,9 @@ cython_debug/
/BiliNote_frontend/.idea/*
/BiliNote_frontend/src-tauri/bin/

# Local platform cookies
**/cookies.txt

# FFmpeg 构建文件(不应该提交到仓库)
ffmpeg*/
ffmpg*/
ffmpg*/
84 changes: 67 additions & 17 deletions backend/app/downloaders/bilibili_downloader.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,42 @@
import os
import json
import logging
import tempfile
from abc import ABC
from pathlib import Path
from typing import Union, Optional, List

import yt_dlp

from app.downloaders.base import Downloader, DownloadQuality, QUALITY_MAP
from app.models.notes_model import AudioDownloadResult
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
from app.utils.logger import get_logger
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
from app.services.cookie_manager import CookieConfigManager

logger = logging.getLogger(__name__)
logger = get_logger(__name__)

BILIBILI_COOKIES_FILE = os.getenv("BILIBILI_COOKIES_FILE", "cookies.txt")
BILIBILI_HTTP_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.bilibili.com/',
'Origin': 'https://www.bilibili.com',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
}


class BilibiliDownloader(Downloader, ABC):
def __init__(self):
super().__init__()
self._cookie_mgr = CookieConfigManager()
self._cookie = self._cookie_mgr.get('bilibili')
self._cookiefile = self._write_netscape_cookie_file()
self._cookiefile = self._write_netscape_cookie_file() or self._resolve_cookies_file()

def _write_netscape_cookie_file(self) -> Optional[str]:
"""将 Cookie 写入 Netscape 格式临时文件,返回文件路径(供 yt-dlp cookiefile 使用)"""
Expand All @@ -40,12 +54,54 @@ def _write_netscape_cookie_file(self) -> Optional[str]:
logger.info("已生成 B站 Netscape Cookie 文件: %s (条目: %d)", tmp.name, len(lines) - 1)
return tmp.name

def _resolve_cookies_file(self) -> Optional[str]:
"""按约定位置查找 Netscape cookies.txt 文件。"""
configured = Path(BILIBILI_COOKIES_FILE)
backend_root = Path(__file__).resolve().parents[2]
candidates: list[Path] = []

if configured.is_absolute():
candidates.append(configured)
else:
env_value = os.getenv("BILIBILI_COOKIES_FILE")
if env_value:
candidates.append(Path.cwd() / configured)
candidates.extend([
backend_root / configured,
Path.cwd() / configured,
Path("/app") / configured,
])

seen: set[Path] = set()
for candidate in candidates:
candidate = candidate.resolve()
if candidate in seen:
continue
seen.add(candidate)
if candidate.is_file():
logger.info("使用 B站 cookies 文件: %s", candidate)
return str(candidate)
if candidate.exists():
logger.warning("忽略非文件 cookies 路径: %s", candidate)

logger.warning("B站 Cookie 未配置且 cookies.txt 不存在,下载可能失败")
return None

def _apply_common_ydl_opts(self, ydl_opts: dict) -> dict:
existing_headers = ydl_opts.get('http_headers', {})
ydl_opts['http_headers'] = {**BILIBILI_HTTP_HEADERS, **existing_headers}
ydl_opts['extractor_retries'] = 5
if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile
return ydl_opts

def download(
self,
video_url: str,
output_dir: Union[str, None] = None,
quality: DownloadQuality = "fast",
need_video:Optional[bool]=False
need_video: Optional[bool] = False,
skip_download: bool = False,
) -> AudioDownloadResult:
if output_dir is None:
output_dir = get_data_dir()
Expand All @@ -58,7 +114,6 @@ def download(
ydl_opts = {
'format': 'bestaudio[ext=m4a]/bestaudio/best',
'outtmpl': output_path,
'http_headers': {'Referer': 'https://www.bilibili.com'},
'postprocessors': [
{
'key': 'FFmpegExtractAudio',
Expand All @@ -69,11 +124,12 @@ def download(
'noplaylist': True,
'quiet': False,
}
if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile
if skip_download:
ydl_opts['skip_download'] = True
self._apply_common_ydl_opts(ydl_opts)

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=True)
info = ydl.extract_info(video_url, download=not skip_download)
video_id = info.get("id")
title = info.get("title")
duration = info.get("duration", 0)
Expand Down Expand Up @@ -117,13 +173,11 @@ def download_video(
ydl_opts = {
'format': 'bv*[ext=mp4]/bestvideo+bestaudio/best',
'outtmpl': output_path,
'http_headers': {'Referer': 'https://www.bilibili.com'},
'noplaylist': True,
'quiet': False,
'merge_output_format': 'mp4', # 确保合并成 mp4
}
if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile
self._apply_common_ydl_opts(ydl_opts)

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=True)
Expand Down Expand Up @@ -175,11 +229,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None,
'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'),
'quiet': True,
}

# 通过 CookieConfigManager 注入 B站 Cookie(Netscape cookiefile)
if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile
ydl_opts['http_headers'] = {'Referer': 'https://www.bilibili.com'}
self._apply_common_ydl_opts(ydl_opts)

try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
Expand Down Expand Up @@ -330,4 +380,4 @@ def _parse_json3_subtitle(self, subtitle_file: str, language: str) -> Optional[T

except Exception as e:
logger.warning(f"解析字幕文件失败: {e}")
return None
return None
4 changes: 2 additions & 2 deletions backend/app/downloaders/youtube_downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import logging
from abc import ABC
from typing import Union, Optional, List

Expand All @@ -9,10 +8,11 @@
from app.downloaders.youtube_subtitle import YouTubeSubtitleFetcher
from app.models.notes_model import AudioDownloadResult
from app.models.transcriber_model import TranscriptResult
from app.utils.logger import get_logger
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


class YoutubeDownloader(Downloader, ABC):
Expand Down
31 changes: 29 additions & 2 deletions backend/app/routers/note.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,38 @@ def generate_note(data: VideoRequest, background_tasks: BackgroundTasks):
def get_task_status(task_id: str):
status_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.status.json")
result_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.json")
pending_status = {
"status": TaskStatus.PENDING.value,
"message": "任务排队中",
"task_id": task_id,
}

# 优先读状态文件
if os.path.exists(status_path):
with open(status_path, "r", encoding="utf-8") as f:
status_content = json.load(f)
try:
with open(status_path, "r", encoding="utf-8") as f:
content = f.read()
if content.strip():
status_content = json.loads(content)
elif os.path.exists(result_path):
logger.warning(f"状态文件为空但结果文件已存在: {status_path}")
status_content = {"status": TaskStatus.SUCCESS.value, "task_id": task_id}
else:
logger.warning(f"状态文件为空: {status_path}")
status_content = pending_status
except (json.JSONDecodeError, OSError) as e:
logger.warning(f"读取状态文件失败: {status_path}, {e}")
if os.path.exists(result_path):
status_content = {"status": TaskStatus.SUCCESS.value, "task_id": task_id}
else:
status_content = pending_status

if status_content == pending_status:
try:
with open(status_path, "w", encoding="utf-8") as wf:
json.dump(status_content, wf, ensure_ascii=False)
except OSError as e:
logger.warning(f"重建状态文件失败: {status_path}, {e}")

status = status_content.get("status")
message = status_content.get("message", "")
Expand Down
18 changes: 1 addition & 17 deletions backend/app/services/note.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,31 +321,15 @@ def _update_status(self, task_id: Optional[str], status: Union[str, TaskStatus],

NOTE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
status_file = NOTE_OUTPUT_DIR / f"{task_id}.status.json"
print(f"写入状态文件: {status_file} 当前状态: {status}")
data = {"status": status.value if isinstance(status, TaskStatus) else status}
if message:
data["message"] = message

try:
# First create a temporary file
temp_file = status_file.with_suffix('.tmp')

# Write to temporary file
with temp_file.open('w', encoding='utf-8') as f:
with status_file.open('w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)

# Atomic rename operation
temp_file.replace(status_file)

print(f"状态文件写入成功: {status_file}")
except Exception as e:
logger.error(f"写入状态文件失败 (task_id={task_id}):{e}")
# Try to write error to file directly as fallback
try:
with status_file.open('w', encoding='utf-8') as f:
f.write(f"Error writing status: {str(e)}")
except:
logger.error(f"写入错误 {e}")

def _handle_exception(self, task_id, exc):
logger.error(f"任务异常 (task_id={task_id})", exc_info=True)
Expand Down
2 changes: 1 addition & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,5 @@ webencodings==0.5.1
websockets==15.0.1
yarl==1.19.0
youtube-transcript-api>=1.0.0
yt-dlp==2025.3.31
yt-dlp==2026.3.17
zopfli==0.2.3.post1
40 changes: 40 additions & 0 deletions backend/tests/test_note_status_resilience.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import inspect
import json

from app.downloaders.bilibili_downloader import BilibiliDownloader
from app.enmus.task_status_enums import TaskStatus
from app.routers import note as note_router


def _response_payload(response):
return json.loads(response.body.decode("utf-8"))


def test_empty_status_file_returns_pending_and_rewrites_json(tmp_path, monkeypatch):
task_id = "empty-status"
monkeypatch.setattr(note_router, "NOTE_OUTPUT_DIR", str(tmp_path))
status_path = tmp_path / f"{task_id}.status.json"
status_path.write_text("", encoding="utf-8")

payload = _response_payload(note_router.get_task_status(task_id))

assert payload["data"]["status"] == TaskStatus.PENDING.value
assert json.loads(status_path.read_text(encoding="utf-8"))["status"] == TaskStatus.PENDING.value


def test_invalid_status_file_returns_existing_result(tmp_path, monkeypatch):
task_id = "invalid-status"
monkeypatch.setattr(note_router, "NOTE_OUTPUT_DIR", str(tmp_path))
(tmp_path / f"{task_id}.status.json").write_text("{", encoding="utf-8")
(tmp_path / f"{task_id}.json").write_text('{"markdown": "done"}', encoding="utf-8")

payload = _response_payload(note_router.get_task_status(task_id))

assert payload["data"]["status"] == TaskStatus.SUCCESS.value
assert payload["data"]["result"] == {"markdown": "done"}


def test_bilibili_downloader_accepts_skip_download_argument():
signature = inspect.signature(BilibiliDownloader.download)

assert "skip_download" in signature.parameters