From 6defe262f6a3cf1ed32ca4f5814d91ab28a395d6 Mon Sep 17 00:00:00 2001 From: Loong Loong Date: Sun, 14 Sep 2025 00:37:18 +0800 Subject: [PATCH 1/5] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=8C=96=E8=84=9A=E6=9C=AC,=20=20=E5=BD=93=E7=94=A8?= =?UTF-8?q?=E6=88=B7=E6=8F=90=E4=BA=A4=E6=96=87=E6=A1=A3=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E5=80=99=E4=BC=98=E5=85=88=E6=8A=8A=E5=9B=BE=E7=89=87=E6=94=BE?= =?UTF-8?q?=E5=88=B0=E5=92=8CMDX=E5=90=8C=E4=B8=80=E7=BA=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .husky/pre-commit | 13 ++ README.md | 9 + .../multimodal/llava/images}/word-img-03.png | Bin .../multimodal/llava/images}/word-img-04.png | Bin .../multimodal/llava/images}/word-img-05.png | Bin app/docs/ai/multimodal/llava/index.mdx | 136 ++++++------ package.json | 4 +- scripts/check-images.mjs | 198 +++++++++++++++++ scripts/move-doc-images.mjs | 199 ++++++++++++++++++ 9 files changed, 490 insertions(+), 69 deletions(-) rename {public/images/word => app/docs/ai/multimodal/llava/images}/word-img-03.png (100%) rename {public/images/word => app/docs/ai/multimodal/llava/images}/word-img-04.png (100%) rename {public/images/word => app/docs/ai/multimodal/llava/images}/word-img-05.png (100%) create mode 100644 scripts/check-images.mjs create mode 100644 scripts/move-doc-images.mjs diff --git a/.husky/pre-commit b/.husky/pre-commit index 5ee7abd..abcd1e0 100644 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1 +1,14 @@ +#!/bin/sh +. "$(dirname -- "$0")/_/husky.sh" + +# 1) 将 /images/* 文章图片就近复制并更新引用 +pnpm migrate:images || exit 1 + +# 将迁移后的变更加入暂存,确保本次提交包含更新 +git add -A + +# 2) 校验图片路径与命名(不合规则阻止提交) +pnpm lint:images || exit 1 + +# 3) 其余按 lint-staged 处理(如 Prettier) pnpm exec lint-staged diff --git a/README.md b/README.md index d6fe8c0..815b3ca 100644 --- a/README.md +++ b/README.md @@ -97,3 +97,12 @@ pnpm start # 启动生产服务器 # 导出 pnpm export # 导出静态站点到 /out 目录 ``` + +## 🖼️ 图片管理规范 + +- 存放位置:与文档同目录(推荐子目录 `images/`)。 + - 示例:`app/docs/ai/multimodal/llava/index.mdx` 的图片放 `app/docs/ai/multimodal/llava/images/…`。 +- 引用方式:在 MDX 中使用相对路径,如 `![](./images/training-loop.png)` 或 `![](./diagram.png)`。 +- 命名规范:`kebab-case`,语义清晰,可选顺序前缀(如 `fig-01-architecture.png`)。 +- 站点级资源:全站共享资产仍放 `public/images/site/…`;组件演示放 `public/images/components//…`。 +- 校验脚本:`pnpm lint:images` 检查图片路径是否就近引用、命名是否合规、文件是否存在。 diff --git a/public/images/word/word-img-03.png b/app/docs/ai/multimodal/llava/images/word-img-03.png similarity index 100% rename from public/images/word/word-img-03.png rename to app/docs/ai/multimodal/llava/images/word-img-03.png diff --git a/public/images/word/word-img-04.png b/app/docs/ai/multimodal/llava/images/word-img-04.png similarity index 100% rename from public/images/word/word-img-04.png rename to app/docs/ai/multimodal/llava/images/word-img-04.png diff --git a/public/images/word/word-img-05.png b/app/docs/ai/multimodal/llava/images/word-img-05.png similarity index 100% rename from public/images/word/word-img-05.png rename to app/docs/ai/multimodal/llava/images/word-img-05.png diff --git a/app/docs/ai/multimodal/llava/index.mdx b/app/docs/ai/multimodal/llava/index.mdx index b89da28..b25cce4 100644 --- a/app/docs/ai/multimodal/llava/index.mdx +++ b/app/docs/ai/multimodal/llava/index.mdx @@ -1,6 +1,6 @@ ---- +--- title: "LLaVA" -description: "LLaVA多模态大模型框架:架构解析、CLIP基础、论文精读、复现实践" +description: "LLaVAģ̬ģͿܣܹCLIPľʵ" date: "2025-01-27" tags: - llava @@ -10,109 +10,109 @@ tags: - visual-instruction-tuning --- -LLaVA (Large Language and Vision Assistant) 是多模态大模型的开创性框架,开启了视觉指令调优的新范式。 +LLaVA (Large Language and Vision Assistant) Ƕģ̬ģ͵ĿԿܣӾָŵ·ʽ -![](/images/word/word-img-03.png) +![](./images/word-img-03.png) -## 核心架构 +## ļܹ -### 基本结构 +### ṹ ``` -ViT视觉编码器 → 投影层跨模态对齐 → LLM语言生成 +ViTӾ ͶӰģ̬ LLM ``` -![](/images/word/word-img-04.png) +![](./images/word-img-04.png) -![](/images/word/word-img-05.png) +![](./images/word-img-05.png) -### 技术特点 +### ص -- **视觉编码**: 使用预训练的Vision Transformer处理图像 -- **跨模态对齐**: 通过投影层将视觉特征映射到语言空间 -- **语言生成**: 基于LLM进行多模态理解和生成 -- **指令调优**: 开创了视觉指令调优的新范式 +- **Ӿ**: ʹԤѵVision Transformerͼ +- **ģ̬**: ͨͶӰ㽫Ӿӳ䵽Կռ +- ****: LLMжģ̬ +- **ָ**: Ӿָŵ·ʽ -## 学习资源 +## ѧϰԴ -### 核心论文 +### -- **论文**: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) -- **代码**: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA) -- **特色**: 首次提出视觉指令调优概念 +- ****: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) +- ****: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA) +- **ɫ**: ״ӾָŸ -### CLIP基础 +### CLIP -**CLIP (Contrastive Language-Image Pre-training)** 是多模态学习的重要基础技术。 +**CLIP (Contrastive Language-Image Pre-training)** Ƕģ̬ѧϰҪ -**架构设计**: +**ܹ**: -- **双塔结构**: Text Encoder + Image Encoder -- **对比学习**: 通过(image, text)数据对进行预训练 -- **零样本能力**: 强大的图文匹配和分类能力 +- **˫ṹ**: Text Encoder + Image Encoder +- **Աѧϰ**: ͨ(image, text)ݶԽԤѵ +- ****: ǿͼƥͷ -**学习资源**: +**ѧϰԴ**: -- **论文**: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020) -- **代码**: [OpenAI CLIP](https://github.com/openai/CLIP) +- ****: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020) +- ****: [OpenAI CLIP](https://github.com/openai/CLIP) -### LLaVA复现项目 +### LLaVAĿ -计划复现LLaVA模型,深入理解多模态模型的训练流程和技术细节。 +ƻLLaVAģͣģ̬ģ͵ѵ̺ͼϸڡ -## 技术深度解析 +## Ƚ -### 视觉指令调优 +### Ӿָ -**核心思想**: 让模型学会理解和执行基于图像的指令。 +**˼**: ģѧִлͼָ -**数据构建**: +**ݹ**: -- 图像描述任务 -- 视觉问答任务 -- 复杂推理任务 -- 指令遵循任务 +- ͼ +- Ӿʴ +- +- ָѭ -### 跨模态对齐 +### ģ̬ -**对齐挑战**: 视觉和语言模态的语义空间差异 +**ս**: Ӿģ̬ռ -**解决方案**: +****: -- 线性投影层映射 -- 对比学习预训练 -- 多任务联合训练 -- 渐进式对齐策略 +- ͶӰӳ +- ԱѧϰԤѵ +- ѵ +- ʽ -## 应用场景 +## Ӧó -### 图像理解 +### ͼ -- **图像描述**: 自动生成图像的详细描述 -- **视觉问答**: 基于图像内容回答问题 -- **场景分析**: 理解复杂场景和行为 -- **细节检测**: 识别图像中的关键细节 +- **ͼ**: Զͼϸ +- **Ӿʴ**: ͼݻش +- ****: ⸴ӳΪ +- **ϸڼ**: ʶͼеĹؼϸ -### 教育辅助 +### -- **视觉教学**: 基于图像的知识讲解 -- **作业辅导**: 帮助理解图表和示例 -- **创意启发**: 基于视觉内容的创意引导 -- **学习评估**: 视觉化学习效果评估 +- **Ӿѧ**: ͼ֪ʶ +- **ҵ**: ͼʾ +- ****: ӾݵĴ +- **ѧϰ**: ӾѧϰЧ -### 内容创作 +### ݴ -- **故事创作**: 基于图像创作故事 -- **营销文案**: 商品图像的描述生成 -- **社交媒体**: 图片配文和hashtag生成 -- **创意设计**: 设计思路和概念阐释 +- **´**: ͼ +- **Ӫİ**: Ʒͼ +- **罻ý**: ͼƬĺhashtag +- ****: ˼·͸ -## 学习建议 +## ѧϰ -1. **CLIP基础**: 理解跨模态预训练 -2. **论文精读**: 深入研究LLaVA技术细节 -3. **代码分析**: 阅读官方实现代码 -4. **复现实践**: 尝试简化版本实现 -5. **应用开发**: 构建实际应用场景 +1. **CLIP**: ģ̬Ԥѵ +2. **ľ**: оLLaVAϸ +3. ****: Ķٷʵִ +4. **ʵ**: Լ򻯰汾ʵ +5. **Ӧÿ**: ʵӦó -LLaVA作为多模态大模型的里程碑工作,为理解视觉语言交互和构建智能多模态系统提供了重要基础。 +LLaVAΪģ̬ģ͵̱ΪӾԽ͹ܶģ̬ϵͳṩҪ diff --git a/package.json b/package.json index 5d7f298..1247058 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,9 @@ "build": "next build", "start": "next start", "postinstall": "fumadocs-mdx", - "prepare": "husky" + "prepare": "husky", + "lint:images": "node scripts/check-images.mjs", + "migrate:images": "node scripts/move-doc-images.mjs && node scripts/check-images.mjs" }, "dependencies": { "@types/mdx": "^2.0.13", diff --git a/scripts/check-images.mjs b/scripts/check-images.mjs new file mode 100644 index 0000000..a5192a9 --- /dev/null +++ b/scripts/check-images.mjs @@ -0,0 +1,198 @@ +#!/usr/bin/env node +/** + * MDX 图片路径校验脚本(中文注释) + * + * 功能 + * - 扫描 `app/docs/??/?.mdx`(含 .md) + * - 识别 Markdown `![]()` 与内联 `` 的图片引用 + * - 强制使用“就近图片”:推荐相对路径(如 `./images/…`) + * - 仅对站点级共享资源允许绝对路径:`/images/site/*`、`/images/components/*` + * - 校验图片文件是否存在、文件名是否符合 kebab-case + * + * 目的 + * - 图片与文章同目录,便于维护与迁移 + * - 避免全局命名冲突,降低重构成本 + * + * 使用 + * - 包脚本:`pnpm lint:images` + * - 直接运行:`node scripts/check-images.mjs` + * + * 退出码 + * - 0:通过;1:存在问题(便于接入 CI) + */ + +import fs from "fs"; +import path from "path"; + +const ROOT = process.cwd(); +const DOCS_DIR = path.join(ROOT, "app", "docs"); +// 允许保留的绝对路径前缀(站点通用/组件演示) +const ALLOWED_ABSOLUTE_PREFIXES = ["/images/site/", "/images/components/"]; + +// 认定为“图片”的后缀名(忽略大小写) +const IMAGE_FILE_EXTS = new Set([ + ".png", + ".jpg", + ".jpeg", + ".gif", + ".webp", + ".svg", +]); +const exts = new Set([".mdx", ".md"]); + +/** Recursively list files */ +function* walk(dir) { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const e of entries) { + const p = path.join(dir, e.name); + if (e.isDirectory()) { + yield* walk(p); + } else { + yield p; + } + } +} + +/** + * 推断文档的“路由路径”(仅用于参考/信息) + */ +function toRoutePath(file) { + const rel = path.relative(DOCS_DIR, file).split(path.sep).join("/"); + const base = path.basename(rel); + const dir = path.dirname(rel); + if (base.toLowerCase() === "index.mdx") return dir === "." ? "" : dir; + const name = base.replace(/\.[^.]+$/, ""); + return dir === "." ? name : `${dir}/${name}`; +} + +/** + * 判断文件名(含扩展名)是否为 kebab-case(仅校验主名,不含后缀) + * 示例:training-loop.png、fig-01-architecture.webp 为合格 + */ +function isKebabCase(name) { + // allow dot for extension only + const base = name.replace(/\.[^.]+$/, ""); + return /^[a-z0-9]+(?:-[a-z0-9]+)*$/.test(base); +} + +/** + * 校验单个 MD(X) 文件中的图片使用: + * - 非站点级的绝对路径 `/images/*` 会被提示应改为相对路径 + * - 相对路径需位于同一文档目录(避免越级引用) + * - 检查文件是否存在与命名是否符合 kebab-case + */ +/** + * 构建全局引用表:统计所有文档对非站点级 `/images/...` 的引用次数 + */ +function buildRefs() { + const reMdx = /!\[[^\]]*\]\(([^)]+)\)/g; + const reHtml = /]*src=["']([^"']+)["'][^>]*>/gi; + /** @type {Map>} */ + const refs = new Map(); + for (const f of walk(DOCS_DIR)) { + if (!exts.has(path.extname(f))) continue; + const s = fs.readFileSync(f, "utf8"); + const urls = new Set(); + for (const m of s.matchAll(reMdx)) urls.add(m[1]); + for (const m of s.matchAll(reHtml)) urls.add(m[1]); + for (const url of urls) { + const u = url.replace(/\\/g, "/"); + if (!u.startsWith("/images/")) continue; + if (ALLOWED_ABSOLUTE_PREFIXES.some((p) => u.startsWith(p))) continue; + if (!refs.has(u)) refs.set(u, new Set()); + refs.get(u).add(f); + } + } + return refs; +} + +function checkFile(file, refs) { + const content = fs.readFileSync(file, "utf8"); + const routePath = toRoutePath(file); + const baseDir = path.dirname(file); + // Markdown 图片语法:![alt](src) + const re = /!\[[^\]]*\]\(([^)]+)\)/g; + // HTML 图片语法: + const inlineRe = /]*src=["']([^"']+)["'][^>]*>/gi; + const problems = []; + + function checkUrl(url, loc) { + const urlNorm = url.replace(/\\/g, "/"); + if (/^https?:\/\//i.test(urlNorm)) return; // 外链忽略 + // 绝对路径:仅允许站点级前缀;或“确认为被多个文档共享”的图片 + if (urlNorm.startsWith("/images/")) { + const okAbs = + ALLOWED_ABSOLUTE_PREFIXES.some((p) => urlNorm.startsWith(p)) || + (refs.get(urlNorm)?.size || 0) > 1; + if (!okAbs) { + problems.push( + `${loc}: prefer co-located images; use relative path like ./images/ (avoid ${urlNorm})`, + ); + } + const fname = urlNorm.split("/").pop() || ""; + if (!isKebabCase(fname)) + problems.push(`${loc}: filename not kebab-case -> ${fname}`); + return; + } + // 相对路径 + if (urlNorm.startsWith("./") || urlNorm.startsWith("../")) { + const abs = path.resolve(baseDir, urlNorm); + const relToDocs = path.relative(DOCS_DIR, abs); + if (relToDocs.startsWith("..")) { + problems.push( + `${loc}: image must live within the same doc folder (got ${urlNorm})`, + ); + } + const ext = path.extname(abs).toLowerCase(); + if (!IMAGE_FILE_EXTS.has(ext)) return; // 非图片链接跳过 + if (!fs.existsSync(abs)) { + problems.push(`${loc}: image file not found -> ${urlNorm}`); + } + const fname = path.basename(abs); + if (!isKebabCase(fname)) + problems.push(`${loc}: filename not kebab-case -> ${fname}`); + return; + } + // 其它形式(如 data: 或 import)忽略 + } + + // scan markdown image syntax + for (const m of content.matchAll(re)) { + checkUrl(m[1], "mdx"); + } + // scan inline + for (const m of content.matchAll(inlineRe)) { + checkUrl(m[1], "html"); + } + + return problems; +} + +function main() { + if (!fs.existsSync(DOCS_DIR)) { + console.error(`Docs dir not found: ${DOCS_DIR}`); + process.exit(1); + } + let total = 0; + let bad = 0; + const refs = buildRefs(); + for (const f of walk(DOCS_DIR)) { + if (!exts.has(path.extname(f))) continue; + total++; + const probs = checkFile(f, refs); + if (probs.length) { + bad++; + const rel = path.relative(ROOT, f).split(path.sep).join("/"); + console.log(`\n${rel}`); + for (const p of probs) console.log(` - ${p}`); + } + } + if (bad) { + console.log(`\nFound ${bad}/${total} files with image issues.`); + process.exit(1); + } else { + console.log(`Checked ${total} MDX files: no issues.`); + } +} + +main(); diff --git a/scripts/move-doc-images.mjs b/scripts/move-doc-images.mjs new file mode 100644 index 0000000..e9bb95f --- /dev/null +++ b/scripts/move-doc-images.mjs @@ -0,0 +1,199 @@ +#!/usr/bin/env node +/** + * MDX 图片就近迁移脚本(中文注释) + * + * 目标 + * - 扫描 `app/docs/??/?.mdx`(含 .md)里的图片引用 + * - 对于以 `/images/...` 绝对路径引用且仅被“单一文档”使用的图片:移动到对应 MDX 同目录下的 `images/` 子目录 + * - 同时将文中的绝对路径替换为相对路径 `./images/<文件名>`(站点级资源除外) + * + * 为什么需要 + * - 图片与文章就近存放,便于迁移、重命名、归档与协作 + * - 避免公共目录下命名冲突与难以追踪的引用关系 + * + * 使用方式 + * - 包管理脚本:`pnpm migrate:images` + * - 直接运行:`node scripts/move-doc-images.mjs` + * + * 规则 + * - 保留并忽略站点级绝对路径:`/images/site/*`、`/images/components/*` + * - 共享图片(被多个文档引用)将保留在 public 中,并保持绝对路径;防止重复与不必要的拷贝 + * - 单一文档引用的图片采用“移动”(rename 或 copy+unlink),避免产生重复副本 + **/ + +import fs from "fs"; +import path from "path"; +import crypto from "crypto"; + +// 仓库根目录、文档目录与 public 目录 +const ROOT = process.cwd(); +const DOCS_DIR = path.join(ROOT, "app", "docs"); +const PUBLIC_DIR = path.join(ROOT, "public"); + +// 排除不迁移的绝对路径前缀(站点级 & 组件演示级别) +const EXCLUDE_PREFIXES = ["/images/site/", "/images/components/"]; + +// 需要处理的文档扩展名 +const exts = new Set([".mdx", ".md"]); + +/** 递归遍历目录,产出文件路径 */ +function* walk(dir) { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const e of entries) { + const p = path.join(dir, e.name); + if (e.isDirectory()) yield* walk(p); + else yield p; + } +} + +/** 确保目录存在(多层级) */ +function ensureDir(p) { + fs.mkdirSync(p, { recursive: true }); +} + +/** 读取文件内容为字符串 */ +function read(file) { + return fs.readFileSync(file, "utf8"); +} + +/** 简单计算文件 SHA1,用于判等(可避免重复移动时产生副本) */ +function sha1(p) { + const h = crypto.createHash("sha1"); + h.update(fs.readFileSync(p)); + return h.digest("hex"); +} + +/** + * 构建引用表:统计所有文档对 `/images/...`(排除站点级前缀)的引用次数 + */ +function buildRefs() { + const mdxImg = /!\[[^\]]*\]\(([^)]+)\)/g; + const htmlImg = /]*src=["']([^"']+)["'][^>]*>/gi; + /** @type {Map>} url -> set of files */ + const refs = new Map(); + for (const f of walk(DOCS_DIR)) { + if (!exts.has(path.extname(f))) continue; + const content = read(f); + const urls = new Set(); + for (const m of content.matchAll(mdxImg)) urls.add(m[1]); + for (const m of content.matchAll(htmlImg)) urls.add(m[1]); + for (const url of urls) { + if (!url.startsWith("/images/")) continue; + if (EXCLUDE_PREFIXES.some((p) => url.startsWith(p))) continue; + if (!refs.has(url)) refs.set(url, new Set()); + refs.get(url).add(f); + } + } + return refs; +} + +/** + * 处理单个 MD(X) 文件: + * - 抓取 Markdown 与 HTML 中的图片地址 + * - 对“仅被本文件引用”的 `/images/...` 图片执行移动并替换为相对路径 + */ +function moveForFile(file, refs) { + const raw = fs.readFileSync(file, "utf8"); + let content = raw; + // Markdown 图片语法:![alt](src) + const mdxImg = /!\[[^\]]*\]\(([^)]+)\)/g; + // HTML 图片语法: + const htmlImg = /]*src=["']([^"']+)["'][^>]*>/gi; + const urls = new Set(); + for (const m of content.matchAll(mdxImg)) urls.add(m[1]); + for (const m of content.matchAll(htmlImg)) urls.add(m[1]); + + if (urls.size === 0) return { moved: 0, updated: false }; + let moved = 0; + const dir = path.dirname(file); + const destDir = path.join(dir, "images"); + + for (const url of urls) { + // 仅处理以 /images/ 开头的绝对路径 + if (!url.startsWith("/images/")) continue; + // 站点级与组件级图片跳过(保持绝对路径) + if (EXCLUDE_PREFIXES.some((p) => url.startsWith(p))) continue; + + // 若该图片被多个文档引用,则视为“共享图片”,保留绝对路径 + const consumers = refs.get(url); + if (consumers && consumers.size > 1) { + continue; + } + + // 计算 public 下的源文件路径 + const relFromPublic = url.replace(/^\//, ""); + const src = path.join(PUBLIC_DIR, relFromPublic); + if (!fs.existsSync(src)) { + console.warn( + `Skip (not found): ${src} (from ${url}) in ${path.relative(ROOT, file)}`, + ); + continue; + } + + // 移动到文章同目录的 images 子目录 + const base = path.basename(src); + ensureDir(destDir); + const dest = path.join(destDir, base); + if (fs.existsSync(dest)) { + // 若已存在同名文件,尝试比较内容,若相同则删除源文件以避免重复 + try { + if (sha1(src) === sha1(dest)) { + fs.unlinkSync(src); + moved++; + } else { + // 内容不同则保留源文件并提示人工处理 + console.warn( + `Conflict: ${path.relative(ROOT, dest)} already exists with different content.`, + ); + continue; + } + } catch (e) { + console.warn(`Compare failed for ${src} vs ${dest}: ${e.message}`); + continue; + } + } else { + // 优先使用 rename,提高效率;跨卷失败则回退为 copy+unlink + try { + fs.renameSync(src, dest); + } catch (e) { + try { + fs.copyFileSync(src, dest); + fs.unlinkSync(src); + } catch (e2) { + console.warn(`Move failed for ${src} -> ${dest}: ${e2.message}`); + continue; + } + } + moved++; + } + + // 将文中的绝对路径替换为相对路径 ./images/<文件名> + const rel = `./images/${base}`; + // 转义正则中的特殊字符,确保全量替换 + const escaped = url.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const re = new RegExp(escaped, "g"); + content = content.replace(re, rel); + } + + if (content !== raw) fs.writeFileSync(file, content); + return { moved, updated: content !== raw }; +} + +/** 主流程:遍历所有 MDX,累计迁移数量并输出统计 */ +function main() { + let totalFiles = 0; + let totalMoved = 0; + // 第一步:构建全局引用表,识别共享图片 + const refs = buildRefs(); + for (const f of walk(DOCS_DIR)) { + if (!exts.has(path.extname(f))) continue; + totalFiles++; + const { moved } = moveForFile(f, refs); + totalMoved += moved; + } + console.log( + `已处理 ${totalFiles} 个文档,复制 ${totalMoved} 张图片到就近目录。`, + ); +} + +main(); From 55033490a8dfc3a865c8c80f24d58c93d0b2d720 Mon Sep 17 00:00:00 2001 From: Loong Loong Date: Sun, 14 Sep 2025 01:07:37 +0800 Subject: [PATCH 2/5] =?UTF-8?q?feat:=20=E7=A7=BB=E5=8A=A8=E5=88=B0?= =?UTF-8?q?=E5=90=8C=E4=B8=80=E7=BA=A7=E7=9A=84assets=E5=90=8C=E5=90=8D?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=E4=B8=8B,=20=E5=85=B7=E4=BD=93=E8=A7=84?= =?UTF-8?q?=E5=88=99=E7=9C=8B=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .husky/pre-commit | 3 - CONTRIBUTING.md | 29 ++-- README.md | 24 ++-- .../{images => index.assets}/word-img-03.png | Bin .../{images => index.assets}/word-img-04.png | Bin .../{images => index.assets}/word-img-05.png | Bin app/docs/ai/multimodal/llava/index.mdx | 134 +++++++++--------- public/readme_docs_structure.png | Bin 0 -> 10538 bytes scripts/check-images.mjs | 20 +-- scripts/move-doc-images.mjs | 47 +++++- 10 files changed, 148 insertions(+), 109 deletions(-) rename app/docs/ai/multimodal/llava/{images => index.assets}/word-img-03.png (100%) rename app/docs/ai/multimodal/llava/{images => index.assets}/word-img-04.png (100%) rename app/docs/ai/multimodal/llava/{images => index.assets}/word-img-05.png (100%) create mode 100644 public/readme_docs_structure.png diff --git a/.husky/pre-commit b/.husky/pre-commit index abcd1e0..a2aca1f 100644 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,6 +1,3 @@ -#!/bin/sh -. "$(dirname -- "$0")/_/husky.sh" - # 1) 将 /images/* 文章图片就近复制并更新引用 pnpm migrate:images || exit 1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9f54689..ab82a78 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -30,7 +30,7 @@ touch docs/computer-science/data-structures/new-topic/index.mdx 使用 Markdown/MDX 编写文章: -````mdx +```mdx --- title: "文章标题" description: "文章简短描述" @@ -51,20 +51,7 @@ tags: 更多内容... ## 代码示例 - -```javascript -// 你的代码 -function example() { - return "Hello World!"; -} ``` -```` - -## 总结 - -文章总结... - -```` ### 步骤4:测试修改 @@ -72,7 +59,8 @@ function example() { ```bash pnpm build -```` +pnpm migrate:images # 迁移图片脚本 +``` 此命令将: @@ -141,6 +129,17 @@ npm dev ## 📚 文档规范 所有文档放在 `docs/` 目录。 +图片需要放在 被引用的文档的同名`assets`目录下(正常情况下您不应该关心这个, 该项目有自动脚本来移动图片), 例如: +docxA 引用了 imgA 图片, 那么他们的文档结构应该是 `docxA.assets/imgA`: + +```md +docsA.mdx +docsA.assets/ +imgA +``` + +![img](public\readme_docs_structure.png) + 每个文档都需要一个 Frontmatter,例如: ```md diff --git a/README.md b/README.md index 815b3ca..57e56be 100644 --- a/README.md +++ b/README.md @@ -90,19 +90,17 @@ pnpm dev pnpm dev # 启动开发服务器 pnpm build # 构建生产版本 pnpm start # 启动生产服务器 - -# 内容 - - -# 导出 -pnpm export # 导出静态站点到 /out 目录 +pnpm postinstall +pnpm lint:images # 检查图片符合规则 +pnpm migrate:images # 迁移图片 ``` -## 🖼️ 图片管理规范 +## 图片管理规范(简要) + +自动化脚本会移动您引用的图片到 MDX 同目录下, 遵循以下规则: -- 存放位置:与文档同目录(推荐子目录 `images/`)。 - - 示例:`app/docs/ai/multimodal/llava/index.mdx` 的图片放 `app/docs/ai/multimodal/llava/images/…`。 -- 引用方式:在 MDX 中使用相对路径,如 `![](./images/training-loop.png)` 或 `![](./diagram.png)`。 -- 命名规范:`kebab-case`,语义清晰,可选顺序前缀(如 `fig-01-architecture.png`)。 -- 站点级资源:全站共享资产仍放 `public/images/site/…`;组件演示放 `public/images/components//…`。 -- 校验脚本:`pnpm lint:images` 检查图片路径是否就近引用、命名是否合规、文件是否存在。 +- 存放:与 MDX 同目录的 `./.assets/` 中。 + - 例:`foo.mdx` → `./foo.assets/.png`;`index.mdx` → `./index.assets/.png`。 +- 引用:相对路径 `![](./.assets/.png)`。 +- 自动化:提交时自动迁移并改引用;图片 Lint 只提示不拦截提交。 +- 共享:站点级用 `/images/site/*`、组件演示用 `/images/components//*`;多文档共用的图片可保留 `/images/...`。 diff --git a/app/docs/ai/multimodal/llava/images/word-img-03.png b/app/docs/ai/multimodal/llava/index.assets/word-img-03.png similarity index 100% rename from app/docs/ai/multimodal/llava/images/word-img-03.png rename to app/docs/ai/multimodal/llava/index.assets/word-img-03.png diff --git a/app/docs/ai/multimodal/llava/images/word-img-04.png b/app/docs/ai/multimodal/llava/index.assets/word-img-04.png similarity index 100% rename from app/docs/ai/multimodal/llava/images/word-img-04.png rename to app/docs/ai/multimodal/llava/index.assets/word-img-04.png diff --git a/app/docs/ai/multimodal/llava/images/word-img-05.png b/app/docs/ai/multimodal/llava/index.assets/word-img-05.png similarity index 100% rename from app/docs/ai/multimodal/llava/images/word-img-05.png rename to app/docs/ai/multimodal/llava/index.assets/word-img-05.png diff --git a/app/docs/ai/multimodal/llava/index.mdx b/app/docs/ai/multimodal/llava/index.mdx index b25cce4..7bd78eb 100644 --- a/app/docs/ai/multimodal/llava/index.mdx +++ b/app/docs/ai/multimodal/llava/index.mdx @@ -1,6 +1,6 @@ --- title: "LLaVA" -description: "LLaVAģ̬ģͿܣܹCLIPľʵ" +description: "LLaVA��ģ̬��ģ�Ϳ�ܣ��ܹ�������CLIP���������ľ���������ʵ��" date: "2025-01-27" tags: - llava @@ -10,109 +10,109 @@ tags: - visual-instruction-tuning --- -LLaVA (Large Language and Vision Assistant) Ƕģ̬ģ͵ĿԿܣӾָŵ·ʽ +LLaVA (Large Language and Vision Assistant) �Ƕ�ģ̬��ģ�͵Ŀ����Կ�ܣ��������Ӿ�ָ����ŵ��·�ʽ�� -![](./images/word-img-03.png) +![](./index.assets/word-img-03.png) -## ļܹ +## ���ļܹ� -### ṹ +### �����ṹ ``` -ViTӾ ͶӰģ̬ LLM +ViT�Ӿ������� �� ͶӰ���ģ̬���� �� LLM�������� ``` -![](./images/word-img-04.png) +![](./index.assets/word-img-04.png) -![](./images/word-img-05.png) +![](./index.assets/word-img-05.png) -### ص +### �����ص� -- **Ӿ**: ʹԤѵVision Transformerͼ -- **ģ̬**: ͨͶӰ㽫Ӿӳ䵽Կռ -- ****: LLMжģ̬ -- **ָ**: Ӿָŵ·ʽ +- **�Ӿ�����**: ʹ��Ԥѵ����Vision Transformer����ͼ�� +- **��ģ̬����**: ͨ��ͶӰ�㽫�Ӿ�����ӳ�䵽���Կռ� +- **��������**: ����LLM���ж�ģ̬��������� +- **ָ�����**: �������Ӿ�ָ����ŵ��·�ʽ -## ѧϰԴ +## ѧϰ��Դ -### +### �������� -- ****: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) -- ****: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA) -- **ɫ**: ״ӾָŸ +- **����**: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) +- **����**: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA) +- **��ɫ**: �״�����Ӿ�ָ����Ÿ��� -### CLIP +### CLIP���� -**CLIP (Contrastive Language-Image Pre-training)** Ƕģ̬ѧϰҪ +**CLIP (Contrastive Language-Image Pre-training)** �Ƕ�ģ̬ѧϰ����Ҫ���������� -**ܹ**: +**�ܹ����**: -- **˫ṹ**: Text Encoder + Image Encoder -- **Աѧϰ**: ͨ(image, text)ݶԽԤѵ -- ****: ǿͼƥͷ +- **˫���ṹ**: Text Encoder + Image Encoder +- **�Ա�ѧϰ**: ͨ��(image, text)���ݶԽ���Ԥѵ�� +- **����������**: ǿ���ͼ��ƥ��ͷ������� -**ѧϰԴ**: +**ѧϰ��Դ**: -- ****: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020) -- ****: [OpenAI CLIP](https://github.com/openai/CLIP) +- **����**: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020) +- **����**: [OpenAI CLIP](https://github.com/openai/CLIP) -### LLaVAĿ +### LLaVA������Ŀ -ƻLLaVAģͣģ̬ģ͵ѵ̺ͼϸڡ +�ƻ�����LLaVAģ�ͣ����������ģ̬ģ�͵�ѵ�����̺ͼ���ϸ�ڡ� -## Ƚ +## ������Ƚ��� -### Ӿָ +### �Ӿ�ָ����� -**˼**: ģѧִлͼָ +**����˼��**: ��ģ��ѧ�������ִ�л���ͼ���ָ� -**ݹ**: +**���ݹ���**: -- ͼ -- Ӿʴ -- -- ָѭ +- ͼ���������� +- �Ӿ��ʴ����� +- ������������ +- ָ����ѭ���� -### ģ̬ +### ��ģ̬���� -**ս**: Ӿģ̬ռ +**������ս**: �Ӿ�������ģ̬������ռ���� -****: +**�������**: -- ͶӰӳ -- ԱѧϰԤѵ -- ѵ -- ʽ +- ����ͶӰ��ӳ�� +- �Ա�ѧϰԤѵ�� +- ����������ѵ�� +- ����ʽ������� -## Ӧó +## Ӧ�ó��� -### ͼ +### ͼ������ -- **ͼ**: Զͼϸ -- **Ӿʴ**: ͼݻش -- ****: ⸴ӳΪ -- **ϸڼ**: ʶͼеĹؼϸ +- **ͼ������**: �Զ�����ͼ�����ϸ���� +- **�Ӿ��ʴ�**: ����ͼ�����ݻش����� +- **��������**: ���⸴�ӳ�������Ϊ +- **ϸ�ڼ��**: ʶ��ͼ���еĹؼ�ϸ�� -### +### �������� -- **Ӿѧ**: ͼ֪ʶ -- **ҵ**: ͼʾ -- ****: ӾݵĴ -- **ѧϰ**: ӾѧϰЧ +- **�Ӿ���ѧ**: ����ͼ���֪ʶ���� +- **��ҵ����**: ��������ͼ����ʾ�� +- **��������**: �����Ӿ����ݵĴ������� +- **ѧϰ����**: �Ӿ���ѧϰЧ������ -### ݴ +### ���ݴ��� -- **´**: ͼ -- **Ӫİ**: Ʒͼ -- **罻ý**: ͼƬĺhashtag -- ****: ˼·͸ +- **���´���**: ����ͼ�������� +- **Ӫ���İ�**: ��Ʒͼ����������� +- **�罻ý��**: ͼƬ���ĺ�hashtag���� +- **�������**: ���˼·�͸������ -## ѧϰ +## ѧϰ���� -1. **CLIP**: ģ̬Ԥѵ -2. **ľ**: оLLaVAϸ -3. ****: Ķٷʵִ -4. **ʵ**: Լ򻯰汾ʵ -5. **Ӧÿ**: ʵӦó +1. **CLIP����**: �����ģ̬Ԥѵ�� +2. **���ľ���**: �����о�LLaVA����ϸ�� +3. **�������**: �Ķ��ٷ�ʵ�ִ��� +4. **����ʵ��**: ���Լ򻯰汾ʵ�� +5. **Ӧ�ÿ���**: ����ʵ��Ӧ�ó��� -LLaVAΪģ̬ģ͵̱ΪӾԽ͹ܶģ̬ϵͳṩҪ +LLaVA��Ϊ��ģ̬��ģ�͵���̱�������Ϊ�����Ӿ����Խ����͹������ܶ�ģ̬ϵͳ�ṩ����Ҫ������ diff --git a/public/readme_docs_structure.png b/public/readme_docs_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..9a21b4f7912c07fc5c6f31002350b2bd802c4661 GIT binary patch literal 10538 zcmaia1ymeC&?X@`1Pc(HpuydOv%x*MED|KRF3tkMgDlSC7TkkdfCP7UcY+7E05|#H z{qO9*yZ5$sc6)ldr@N|4zN&CF71>u9q!KK)~+)M=z3Cu*nb*-Xh3LNosm#9ItpmU+-m#|B=06Q-sqW)jqoeW<>VH~dn_pnEFEX9j2zZ4nF%Z$OU5(s(shs{;j5?NzYUORisIsLcN{7* zmxsHprKPS>xtN=oZ+$@Ng|1aS;VI@#Y%x)Zk3~h}(9?^Oy+Du)^e#e0KuAHBV?{7W z3`hQ=^tKuy7M+d=fjgiZQGy{>7oi^s9}^)}!t}-4*Z(?!I;LJWy+opM{xvxn-Y7_s z_vqa=H4;TWkN`4A)WHV-cD>Z2 zUl^!?)>k*Yq3+(S-RaRk{W>(w`4tzV#dmJ4dEDz8bq!VHFYU{18!hX_pF5RzQK+$) z=)3b@N242pP>tHW3bjgk*SuWD$Tv$>lwXhfHDq+rm&x1UK>3Jtf^k)JTa`3XVDIo~ zg6}yRHLQ6p!uef_=wwF|Fh<&cML8TEB`b?ct zCR&wV1+C)gs5it9SfZ}@UDaSAc0F*oMz1Ir^e0rW^Zx`y*V|#N7@tsk_je zT2DITs~FqmYQ%C$K$3s&QmGl6m`8rhWgrw4m$7zJTVZPi)lDly^9p+0TM`zx#kvDs zkYK%~2w;S$(WD)j@xOkL0{Q6G)Od)C-A~qKyq4Hl6%~{?z5IKxi4TjcS>RZmu)L?$y$~VKX+Re@5FtU z-pzTB)kC`f9#!=0lVM`j-~T=r9Gn)!y+k_0<@W;yR7}s^2pYZPwn7@iVM)p)wp%O>yR=;}hCsj#{Uva3A7tao+ z>lesej&IO{Lw1HRhm!k#cxZb$_;gerKepVxf^f+##L^oV+W0=C*)obnCQIYs3m1g4 z)MhDK8kM2aAPpeGzXUNliCPpAshZX^r8yCkaT6D=DHRP#pt-rZt+|9b>pddk8b{Jq zlh)sdP(8njHtSe_Z?s3ind6z?39DkfNOip|rg%;trBi=rb9i+0+kbg64I-XnF9I!W55D{>s# z8@MG9LdG%KJ^WB{Izt_t0#X?@Q2qPA$?gn~8vM5LVtDqG!^3y3 z-}6%w6cD}%7Jse{rfl25WX{76mh!{uIoJ>;f`;sWkN+pd>`tyt?CeUXrd+ z1le9HYi{IBKNXtB+h#&2%roC&9=h@+j7z=}9M8gID6szI)c^V$mo*mllvDNG;^&%r zgXON#PRpSJ#l!~o+GdA-$1@iT5uI!{m&Bi&sepw`E~|VSoaJ>8zISsBSQ}XH)yPZK z!^*iaXyd>^Gt|!$n*(uTzV5rQ?(-rcE3ph(b692zMaVgR{h5MzlDy)x411FNsUX<# zhmbZVgF)%=qx;#dU~*}`Vp8E^0NkTWx^QRXK9{~Vxopl{H%+PVo?5;gtevl9R9fQ! z%U@33=WA|Nty>Ynnv1P{bn2SVSC9_z8GH>f-(=l)8ZlANjR)l&JWmRNd1g_%*SYaP z;i>XqrnO0{sNqI5YSM)0En*Pk<1a8gWzPAxx9dEKfeYUu51~a`0AwXg<5+|#n`MU# zIHMeD=$fDa89*NhwChpau`#+X9^pKjHWZKEa_AV|bp-^0vG?wLeQ5W}@Q{(c#*VyQ z*qyaXkPtWQ!nnc|wb7_)qR7~9q&FjQYdj3NRKE-s(lT$d1Vcl{CxmT&@UL!KI&e0< z8y2ZiC&cT0rS&+WRhQRyQ0qA#E&`7Yamp88J@B~ay*Zq0+~!77dOU-Ff9H9@xrj3- zJNFY!Hrj|RM8zV!87=qAlxprIJ|uzZvUy`;!WI^|lmk8_ir$Cks zbRPQEPst^Ie>VdQyofPFLTo+`2jO0aaQUaOY$&)+R^edENazh9MOsF~n)D&Bs4tIB z3@1_>a^5=JE$=mJucB}G0=HfXbBP4kq}8MoBLE{MNv25K+&G1u@3 z=g`nM&o|pcPalRR%ZnW}nqSM8;nv~pIa%wJznn65LUu*UexM<+LvUiS!zGL|dWmIx z_^wlG7wR@4kd~(ZxwY&)Yj~@^!PZrKv3#c_ z^!^nnYyD5s)1SFoS0ub{DF5j^5w~nn{RWN> zO^nOclB|B{JJF9O-NG}D6CN!?o^F7cmgm!0j5a3QFi(*gw{DVUx;*%d&?p_ja(&&q zZG0X_UP8hTGvM7TkTjE`Fyt<@zB?uXtkA)HWs$xvMB*d`3Tv(pdv@N^c3yIGbN6 zp0f1UfxJ}WO_;PBrSQZ|n(gyV`kJ5j-|16hUwdrl%%%8wUO_8T(zGs!^4N?cj!+=F zbv#lgop)X-ct|VCa(?l1VU6`*MB@B&viiqGIJofZl1EM3gD2qh+^A8)-rt?_@q`D3 zam9GqIjfjW(jyN{_*mO{yY7t|rPt2M6lkO%GK7ddHPRlT>sOND%5dWAQq~TM9&HXH z4@eJM`w5c<^~#eP&}sZm{9XK&^HVv=&%d7vWBJna>igM7-}8T~XsfMggP@T0@`uqo zdpvA&&o`k7zU-QLEHQA2XgBy&$#a`&e&6=>7}LIEs0jXw;@ngx&eTJeIut;Eu+ z(Q|s1=PI^^_zbPc6yL*O$La~&EU;165{-PeuiO>~?7a_R~wYY98gr&qWNRSJB|=|5jWMPW^{OLw8( zxQ%4!_gK=%P6u-4{i<7as5{Z_X3r_8+PCQj+c6` z9-NmmI%S3TB0n{k9p*aeiN2tvn3*}XW*Aod`H1f%pkJqxg`k`G|BBD= zzrKGu4Qi_;aA|h-JP(sb&?PsC{;y+tvDVk9Q4jC@sq{6QD#O2b^?i|8mN4CmJX`6r zDoHCjDM;K#78eV@e5qlN@U#49sizSm&4#9Pk3Dv|#}{T5lau-2$-*dJgj&S#%r~G6 z&K*N+%iDhR$q;^v0*Ag|r)+C)^27v~B@=G{IA(jV>%>61e=G^|elgMd@g1#|2fB^4 z*qjP4{d6*4X9e$bWe5Tl44ZU?&~PX)K}`$PlD;wqJ9e+Q)X6;W%->weaqonvmJ<}o zTPu-v##weR_?1^a3=su$#8)web+eaCljz$v?vIHJdKP7gS}1GppFDutO>PEIf@4cT zjh7{Sy2OX#YXP|N<}84od~${SBwLU0EIp6fTscfZbU%C-xlI;BU5)ARG4xvH)A_=C z9JU_lFU*eH=~aP1T+-qby}QPANFyA3?h19~DO&YVNUS2#L0oO7ep5DmW@t)StgH`l z!}>-U+li7)plO6_#xQPNX4Km8G>3@HD>tnQ7iZ>x7O-BhIue;h*fL!^ouc2x^(eK$ z&;7jGRHYM|yEK;+v#pn9_SnAAKpOl>4~J;5Xkmek6-OKA9!2HjzP5bfQD>ESZC$i) zlg(ZGRoYDIt5fiS$OLY4!R8$-!qncJj3L|b)-$A3BDaVTbqb2&d7{rQ&g_<#O;xKbsWsY3sb#W5q?>+1ubIe95yZ zTXSR^p>)dg5Fj|KK_=g+xlEf=xiLOSJ1CLXXG&72-gmTOCOPIV8=S=F#Ro>tKh{I! z&+GzF2VefN5g&tg$F^^DSDjTcMU?}Sn&#Y|!{5d297vn4cXj+)QK=JaD|j7c^+w2y zF|_bkLjU%oJib}7T@z9b?z4$CrR)mqV?ilvDZR;V-}#lnCT9l?78AouRs2xIF?HR& zQS-f|+PFa~Gm$NZ4&GxKIhn5szz9_l|NdZV&^aWS)%A5JVQ6>Uss(aSEQ;FMaF1IN zSXkTWTV$y*i9JVlNPXx`2%kJ1Yr9?RlWkQ(qn0XQUGnL0*J>EDIuxA?#Z`wF?uu(u zvZZGv1yASalkcdx34r8kxp|(`hnmy_3Prk{D&{ONN(a?7<}9`X)Q%1I@Ic@alND8} z-2wdJkB7LA)kV_b&px$X~q6(h&^az zn!Di()o~3{hS*wh>*-%zyf|N{X z&&%+NnvP>qMJRMbqWDnhVmW$4=_ffERb4!Ekw6cYwWCri85M@q2`372fPDI{g2CFIx)AXA z{>5Tnq;9ji-}te|^%8?vMBwY!-uWMQ#u^oC2qvg1D0=&w4@bZ@hQ$1Y&j*&Gv4dY-f;yzf>3z#xTVagr*I~Xd zqEq?^oZ9jz+rgegX2iy^^T+{L*f}SXi}!?a&(Rq>qws>2lrxC{I>il2cNhG-Hsg9> z)N;x)gZPSd1!R%taW0dq1rgRyN9`Bsct~RVFeJgyt&M%7i>C8d4q|fm%f8q#@sZxz zK-CBRNV9(L>$Mp5;w{Stu`*0*0+$ECn(0v2xySUy0C3tq?~PNG?Uhkv*{P|{>S#?q zSbDe{A1NTjOw&5dmnW0X(F ztQ;k>jYk>x9J5k~(;Q(g`p6bSL4j{U)nq5uN-b(hCZ}ZLdUDh9BzB>;^F)vbc_(s9 zOb2IR38Q9PcBbGiH)Tk)t@(@3a|VL)h|+qc5T|MDI@#F*O$NhSkZkbxIDT4CzrA~u zdn&Sm^KN118L9?T%ZMVeU1+u6IVR;tcmp+QSe#L6Wc$-z&vq@(?4E{c#rv&(3l~o% z>;UGEwIa0MdC(GVxX)qd>PF~`(_-0L&^xuLwwlbuJf%Tx0Mulo=uL0jQ$SwSYRTa- zu?~NH70k<^TuZ}q&oZSKbvY=)8kI_FdC8s}$3JB+7RuSzJN2rT1$6Oje&pnv-2!?y zsgNdK9?fbc+saC9%*mzUFl9hPB7-*5;qft%W_0Nz9GoBB-a@PP^?|*?Bg-zsX0L5; zv97F^w~YJlJb0s*njyVXKMQba-<*`nBN3r_kXLam8R(s_JZ$&q7yV+;{0f-Um>K%m z$U^_}xy?!ch4^QyyB7ate$ADjVuF!DrQp{iREE-mzwj=!AGd#f5$0U9r*=|ed_(`H zfO9?yo;P>&s2v>pIi&K)Cuuf%Bl6i-guBg5QYoHk?RIiBchQd&_DwH$i|!17!-)^> zKcBa{X4`0TD7I$y;o_4CBy5ttiHodsur3Q4=5$yJE}go zpJmcFLFss=m>(~n?jMqcV&+sYusyR!AlC(E1wONb7`)*v+p+Hi)n=@Z$=MftjiLUL z>94>i7+yhuQ>U4H$cW}8lL4dBl!qiX0swOHRf6q{Msdf!ny3*)T*>MiAa3&%bBQLs zL@77iO-xE1P@E>f2tnJLb$}dBq;1?km0X7+ck%h(a@!6~F5A<81tDcV&Bj#n0miXQ z6?^%~Oio^Y%qYijjj8eKA!3U6NHk`Iv+^j}NCNF{?Jw@C4!K6p1(}w`{>(!Manq!) zPZrmCRuP1QBHa@Jo&is5=tle6_^9cAG1+<99%;Dx0uE9`M~J|1WV;UUGBD&Ghuh(@ji z6F(oelX^HEL`5iwokcBN!@{Vn?Cg=*#rG1NNTc`E1zW{SR(7O!QYudx#F!>vE&xpZuujn~ z0R0*VuaIQz2;&CW&P$pEw}55MCeZ@XyaIjSETv;>sa~M0w`%;{xruIy=O#S=25fL3 zGFSR4_hWsfS0IZjkQDq8g!q5W3jPml{6A8M9!$M2c*TO2P=IO2Z?PQS=@O1%{%gPj z_G0`NN*0EKTORRd*7P{{M><)yqeMaZaKQAxBgAPBooAqnY zoo@>0-CHu*g^d9|OLfnWfFTX-iFktYh^?2T*{7wwdx?B{i+l@)@5nw(H9SacU9@2W z?ODB+K-T72uB6Q||4OyKWh6x7$Qp}RY%Mb9?C!)GT3AcG^DxWfd2}#07TwsX@(OrH z8dy>zub2iCz0YjKq{ZA?o0Wac#|$?fD`>l{&xAkMx~I~-cYNrWS|FHK6e(AMzS^^?X zV7y<)NM)N|0YF1KFaa`Yw&T_~LZH@Jr+(s2wTJ)KMZTl7ELt-xy7u`jd~7$oo+m!w zKZ3o**5SSJV)8aB)^>ivZs&E$SI)2#V`O>>*pqJ8X>2oSKY`OLIjzFkK{F5^m9w5wzz>?ua#b*nX zcw2-NfWQNG^scj)>E0yw>@EuhHpWw(0~^ybh?r!7D8CAN;rNARvt*Em7_5ER#|P(s z37lmzKrn{cz+1IzC%KS?7Uhty5nnk^IOo%DviYY~n#Y0Jg>itq4F+kpCF1Tpz~?K?|WBo5B3IBzp#m)8?jjIM*T3ChY5LBbjGo58@qD9}x19!ZRmh+||QaZ#N(bj#6lCF6Ms? zwpm<0N{bOvYtBUJ)UO6O2l`;pxhsgThSLUXs@w2pcdW|#x>5je?oR*hXTrPbemDoM zjh5}v;0J_FWPBe?pr?2XQN_Kj>u=*?LN8~`?TZ+#qxsm5=Ps784l5Aa;~Wl^Zd%^$ z&M!t=>b92NWqVO4d94>*(c&?`914#H&tZ(9pe(j32Eu()7>$slfM_ zU;|eiYlX8azaI&w|9%H&-Yt8wu)0Ujksuh9yrhE>mg!6Z1+Fb z7HV9V9-HHlC_z^J+ai9sqAg>VjT}CkxdrtVAN+65E#uZK`tE;FiRe$PTHYNRLnGo2 z3^GqpV&@GshSb(j(Q-pF#Y;2~Rw*n5L&aL>f0=P<6BQjT>za7nt%-h&!PVDEyWgC$ z)x)}<*?keTP?31nsIxm^J-t4+B1Kz`FIIl zisONad)95nU3ppBcyngV`}sQ*^6%Th{7k`l-Jt86cD>m}o=q!KUA>f=N>-R;5#jIn zs}Ff^4(`w01{7V--*{9+?;eN6RC(reuWg)XmL6Q9b|RD16g4J|@0-!h{k9*}!Io9@Y+MVlAHib}Q=Ub7(dYrrwIi61CTc^Ts7q6VorVJFd32lLu zacoz2T&QtHN@r9|t+$tTahL>)B-$s5AX1*9o&e!C-d_HO{qLFW^Vm)KbVC(w1bdH}|OC7BJ#`bw5^#QnW z7@a;At@#FQ1cR78??Tv^Ts{V{H@BO4x>dWINf$00+!w9+PfQ5l-z}YS_^_NB!zmZi zr?&i0r5+8m$tYtt(R;e>0d+edX|zZ4=T`zU46E%p zy(%8GhkN~Ogv}5yxEk@pw^Css%pAYM&Rcr3{hgSXxj^EbqP)s)((jaaraz?i<`-tE zr#zoG0U_StbgwC85pvF~I_A?`sgW{DQ;*u{riz(a$b1T$+c{a)X4tM>6V`I9>weno z{1|rHc~w1|8j77TvVVmN$~N8)KU%PIz9KueWB9l-Lr-xU<7f zzq>fCdcM1vQ(dm;dn2c?eekLwx;@TpPv66yZGstpRP&}Aj3X;3@^n(wk+QabhT27} zg3EsDIYuA-nwKku$=c+QsbmeG(P!CZXR5Pa7M{# ztd2i0Y*(jVo?$@z-wx+!lp2JNYW7v84o)(NK1t;E?7De9k`21m+lSWj-QUHGY~cVp zDpTleGWc|w$Z0`y@Rd%Dha=0tG(%?9O!AKHwH*sA%Pwdk@M4~+4H`^(x5nFul{SG{ z6qS1kHh9&+jTrNsc_1o7`fU6`OvPw5cQlpanUE%fGJnszK#OuWu7v!qOmLSLPnHBpB&DDP_Tjwf_;amrQ zENtkRQ^!xPopq!7`NPBs$ zq<^Q9D~MGR_sXci3AH}91J8duB86x7j+gW%M6GDVp zx`}=EhUmAs|CL1U)Q&$ewmIAV#-8%$GRTK!hs}IZ0hO5NQ-8>TlA8x&_9O5hPI*6X zbkmxM_p83XrxssFpHvmv_c~Lr^@0W5Qag%I;=g+NG$i$cvA^uIb*@q%5OJlL{r57# e|FFqV;;9{WmnLl>U7+|8L0(!#suE-p_}>88j@V%U literal 0 HcmV?d00001 diff --git a/scripts/check-images.mjs b/scripts/check-images.mjs index a5192a9..39fcf1d 100644 --- a/scripts/check-images.mjs +++ b/scripts/check-images.mjs @@ -1,6 +1,6 @@ -#!/usr/bin/env node +#!/usr/bin/env node /** - * MDX 图片路径校验脚本(中文注释) + * MDX 图片路径校验脚本 * * 功能 * - 扫描 `app/docs/??/?.mdx`(含 .md) @@ -26,10 +26,10 @@ import path from "path"; const ROOT = process.cwd(); const DOCS_DIR = path.join(ROOT, "app", "docs"); -// 允许保留的绝对路径前缀(站点通用/组件演示) +// 允许的绝对路径前缀(站点级 & 组件演示级别) const ALLOWED_ABSOLUTE_PREFIXES = ["/images/site/", "/images/components/"]; -// 认定为“图片”的后缀名(忽略大小写) +// 图片文件扩展名 const IMAGE_FILE_EXTS = new Set([ ".png", ".jpg", @@ -54,7 +54,7 @@ function* walk(dir) { } /** - * 推断文档的“路由路径”(仅用于参考/信息) + * 将文件路径转换为路由路径 */ function toRoutePath(file) { const rel = path.relative(DOCS_DIR, file).split(path.sep).join("/"); @@ -110,23 +110,25 @@ function checkFile(file, refs) { const content = fs.readFileSync(file, "utf8"); const routePath = toRoutePath(file); const baseDir = path.dirname(file); + const baseName = path.basename(file, path.extname(file)); + const expectedRelPrefix = `./${baseName}.assets/`; // Markdown 图片语法:![alt](src) const re = /!\[[^\]]*\]\(([^)]+)\)/g; - // HTML 图片语法: + // HTML 图片语法: const inlineRe = /]*src=["']([^"']+)["'][^>]*>/gi; const problems = []; function checkUrl(url, loc) { const urlNorm = url.replace(/\\/g, "/"); if (/^https?:\/\//i.test(urlNorm)) return; // 外链忽略 - // 绝对路径:仅允许站点级前缀;或“确认为被多个文档共享”的图片 + // 绝对路径 if (urlNorm.startsWith("/images/")) { const okAbs = ALLOWED_ABSOLUTE_PREFIXES.some((p) => urlNorm.startsWith(p)) || (refs.get(urlNorm)?.size || 0) > 1; if (!okAbs) { problems.push( - `${loc}: prefer co-located images; use relative path like ./images/ (avoid ${urlNorm})`, + `${loc}: prefer co-located images; use ${expectedRelPrefix} (avoid ${urlNorm})`, ); } const fname = urlNorm.split("/").pop() || ""; @@ -144,7 +146,7 @@ function checkFile(file, refs) { ); } const ext = path.extname(abs).toLowerCase(); - if (!IMAGE_FILE_EXTS.has(ext)) return; // 非图片链接跳过 + if (!IMAGE_FILE_EXTS.has(ext)) return; // 非图片文件忽略 if (!fs.existsSync(abs)) { problems.push(`${loc}: image file not found -> ${urlNorm}`); } diff --git a/scripts/move-doc-images.mjs b/scripts/move-doc-images.mjs index e9bb95f..1c9fa0d 100644 --- a/scripts/move-doc-images.mjs +++ b/scripts/move-doc-images.mjs @@ -106,7 +106,8 @@ function moveForFile(file, refs) { if (urls.size === 0) return { moved: 0, updated: false }; let moved = 0; const dir = path.dirname(file); - const destDir = path.join(dir, "images"); + const baseName = path.basename(file, path.extname(file)); + const destDir = path.join(dir, `${baseName}.assets`); for (const url of urls) { // 仅处理以 /images/ 开头的绝对路径 @@ -168,13 +169,55 @@ function moveForFile(file, refs) { } // 将文中的绝对路径替换为相对路径 ./images/<文件名> - const rel = `./images/${base}`; + const rel = `./${baseName}.assets/${base}`; // 转义正则中的特殊字符,确保全量替换 const escaped = url.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const re = new RegExp(escaped, "g"); content = content.replace(re, rel); } + // 额外处理:将历史相对路径 ./images/* 迁移至 ./.assets/* 并更新引用 + for (const url of urls) { + if (!url.startsWith("./images/")) continue; + const absSrc = path.resolve(dir, url); + if (!fs.existsSync(absSrc)) continue; + const base = path.basename(absSrc); + ensureDir(destDir); + const dest = path.join(destDir, base); + if (fs.existsSync(dest)) { + try { + if (sha1(absSrc) === sha1(dest)) { + fs.unlinkSync(absSrc); + } else { + console.warn( + `Conflict: ${path.relative(ROOT, dest)} already exists with different content.`, + ); + continue; + } + } catch (e) { + console.warn(`Compare failed for ${absSrc} vs ${dest}: ${e.message}`); + continue; + } + } else { + try { + fs.renameSync(absSrc, dest); + } catch (e) { + try { + fs.copyFileSync(absSrc, dest); + fs.unlinkSync(absSrc); + } catch (e2) { + console.warn(`Move failed for ${absSrc} -> ${dest}: ${e2.message}`); + continue; + } + } + } + moved++; + const newRel = `./${baseName}.assets/${base}`; + const escapedRel = url.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const reRel = new RegExp(escapedRel, "g"); + content = content.replace(reRel, newRel); + } + if (content !== raw) fs.writeFileSync(file, content); return { moved, updated: content !== raw }; } From 0ecf7c516e31d5feb925f87ef44293c452cc5971 Mon Sep 17 00:00:00 2001 From: Siz Long Date: Sun, 14 Sep 2025 01:23:20 +0800 Subject: [PATCH 3/5] Update CONTRIBUTING.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ab82a78..2182e9c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -138,7 +138,7 @@ docsA.assets/ imgA ``` -![img](public\readme_docs_structure.png) +![img](public/readme_docs_structure.png) 每个文档都需要一个 Frontmatter,例如: From bb20913dc93b5df6a3fac89d8f0874f6f08cfa56 Mon Sep 17 00:00:00 2001 From: Siz Long Date: Sun, 14 Sep 2025 01:23:33 +0800 Subject: [PATCH 4/5] Update scripts/check-images.mjs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/check-images.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check-images.mjs b/scripts/check-images.mjs index 39fcf1d..e3b74e1 100644 --- a/scripts/check-images.mjs +++ b/scripts/check-images.mjs @@ -1,4 +1,4 @@ -#!/usr/bin/env node +#!/usr/bin/env node /** * MDX 图片路径校验脚本 * From 484cf8b93d63da4225b7008a810de72ecfe125f3 Mon Sep 17 00:00:00 2001 From: Loong Loong Date: Sun, 14 Sep 2025 01:34:51 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=AD=E6=96=87?= =?UTF-8?q?=E7=BC=96=E7=A0=81=E7=9A=84=E9=97=AE=E9=A2=98,=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0CICD=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/content-check.yml | 7 ++ app/docs/ai/multimodal/llava/index.mdx | 134 ++++++++++++------------- package.json | 2 +- 3 files changed, 75 insertions(+), 68 deletions(-) diff --git a/.github/workflows/content-check.yml b/.github/workflows/content-check.yml index 1e85829..2cad0a7 100644 --- a/.github/workflows/content-check.yml +++ b/.github/workflows/content-check.yml @@ -35,5 +35,12 @@ jobs: cache: "pnpm" - run: pnpm install --frozen-lockfile + # Non-blocking image migration + lint (visibility only) + - name: Migrate images next to MDX (check only) + run: pnpm migrate:images || echo "[warn] migrate:images failed (non-blocking)" + + - name: Lint image references (non-blocking) + run: pnpm lint:images || echo "[warn] image lint found issues (non-blocking)" + # Build the site to validate MDX and docs using Fumadocs - run: pnpm build diff --git a/app/docs/ai/multimodal/llava/index.mdx b/app/docs/ai/multimodal/llava/index.mdx index 7bd78eb..a3e76ff 100644 --- a/app/docs/ai/multimodal/llava/index.mdx +++ b/app/docs/ai/multimodal/llava/index.mdx @@ -1,6 +1,6 @@ --- title: "LLaVA" -description: "LLaVA��ģ̬��ģ�Ϳ�ܣ��ܹ�������CLIP���������ľ���������ʵ��" +description: "LLaVA多模态大模型框架:架构解析、CLIP基础、论文精读、复现实践" date: "2025-01-27" tags: - llava @@ -10,109 +10,109 @@ tags: - visual-instruction-tuning --- -LLaVA (Large Language and Vision Assistant) �Ƕ�ģ̬��ģ�͵Ŀ����Կ�ܣ��������Ӿ�ָ����ŵ��·�ʽ�� +LLaVA (Large Language and Vision Assistant) 是多模态大模型的开创性框架,开启了视觉指令调优的新范式。 -![](./index.assets/word-img-03.png) +![](index.assets/word-img-03.png) -## ���ļܹ� +## 核心架构 -### �����ṹ +### 基本结构 ``` -ViT�Ӿ������� �� ͶӰ���ģ̬���� �� LLM�������� +ViT视觉编码器 → 投影层跨模态对齐 → LLM语言生成 ``` -![](./index.assets/word-img-04.png) +![](index.assets/word-img-04.png) -![](./index.assets/word-img-05.png) +![](index.assets/word-img-05.png) -### �����ص� +### 技术特点 -- **�Ӿ�����**: ʹ��Ԥѵ����Vision Transformer����ͼ�� -- **��ģ̬����**: ͨ��ͶӰ�㽫�Ӿ�����ӳ�䵽���Կռ� -- **��������**: ����LLM���ж�ģ̬��������� -- **ָ�����**: �������Ӿ�ָ����ŵ��·�ʽ +- **视觉编码**: 使用预训练的Vision Transformer处理图像 +- **跨模态对齐**: 通过投影层将视觉特征映射到语言空间 +- **语言生成**: 基于LLM进行多模态理解和生成 +- **指令调优**: 开创了视觉指令调优的新范式 -## ѧϰ��Դ +## 学习资源 -### �������� +### 核心论文 -- **����**: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) -- **����**: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA) -- **��ɫ**: �״�����Ӿ�ָ����Ÿ��� +- **论文**: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) +- **代码**: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA) +- **特色**: 首次提出视觉指令调优概念 -### CLIP���� +### CLIP基础 -**CLIP (Contrastive Language-Image Pre-training)** �Ƕ�ģ̬ѧϰ����Ҫ���������� +**CLIP (Contrastive Language-Image Pre-training)** 是多模态学习的重要基础技术。 -**�ܹ����**: +**架构设计**: -- **˫���ṹ**: Text Encoder + Image Encoder -- **�Ա�ѧϰ**: ͨ��(image, text)���ݶԽ���Ԥѵ�� -- **����������**: ǿ���ͼ��ƥ��ͷ������� +- **双塔结构**: Text Encoder + Image Encoder +- **对比学习**: 通过(image, text)数据对进行预训练 +- **零样本能力**: 强大的图文匹配和分类能力 -**ѧϰ��Դ**: +**学习资源**: -- **����**: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020) -- **����**: [OpenAI CLIP](https://github.com/openai/CLIP) +- **论文**: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020) +- **代码**: [OpenAI CLIP](https://github.com/openai/CLIP) -### LLaVA������Ŀ +### LLaVA复现项目 -�ƻ�����LLaVAģ�ͣ����������ģ̬ģ�͵�ѵ�����̺ͼ���ϸ�ڡ� +计划复现LLaVA模型,深入理解多模态模型的训练流程和技术细节。 -## ������Ƚ��� +## 技术深度解析 -### �Ӿ�ָ����� +### 视觉指令调优 -**����˼��**: ��ģ��ѧ�������ִ�л���ͼ���ָ� +**核心思想**: 让模型学会理解和执行基于图像的指令。 -**���ݹ���**: +**数据构建**: -- ͼ���������� -- �Ӿ��ʴ����� -- ������������ -- ָ����ѭ���� +- 图像描述任务 +- 视觉问答任务 +- 复杂推理任务 +- 指令遵循任务 -### ��ģ̬���� +### 跨模态对齐 -**������ս**: �Ӿ�������ģ̬������ռ���� +**对齐挑战**: 视觉和语言模态的语义空间差异 -**�������**: +**解决方案**: -- ����ͶӰ��ӳ�� -- �Ա�ѧϰԤѵ�� -- ����������ѵ�� -- ����ʽ������� +- 线性投影层映射 +- 对比学习预训练 +- 多任务联合训练 +- 渐进式对齐策略 -## Ӧ�ó��� +## 应用场景 -### ͼ������ +### 图像理解 -- **ͼ������**: �Զ�����ͼ�����ϸ���� -- **�Ӿ��ʴ�**: ����ͼ�����ݻش����� -- **��������**: ���⸴�ӳ�������Ϊ -- **ϸ�ڼ��**: ʶ��ͼ���еĹؼ�ϸ�� +- **图像描述**: 自动生成图像的详细描述 +- **视觉问答**: 基于图像内容回答问题 +- **场景分析**: 理解复杂场景和行为 +- **细节检测**: 识别图像中的关键细节 -### �������� +### 教育辅助 -- **�Ӿ���ѧ**: ����ͼ���֪ʶ���� -- **��ҵ����**: ��������ͼ����ʾ�� -- **��������**: �����Ӿ����ݵĴ������� -- **ѧϰ����**: �Ӿ���ѧϰЧ������ +- **视觉教学**: 基于图像的知识讲解 +- **作业辅导**: 帮助理解图表和示例 +- **创意启发**: 基于视觉内容的创意引导 +- **学习评估**: 视觉化学习效果评估 -### ���ݴ��� +### 内容创作 -- **���´���**: ����ͼ�������� -- **Ӫ���İ�**: ��Ʒͼ����������� -- **�罻ý��**: ͼƬ���ĺ�hashtag���� -- **�������**: ���˼·�͸������ +- **故事创作**: 基于图像创作故事 +- **营销文案**: 商品图像的描述生成 +- **社交媒体**: 图片配文和hashtag生成 +- **创意设计**: 设计思路和概念阐释 -## ѧϰ���� +## 学习建议 -1. **CLIP����**: �����ģ̬Ԥѵ�� -2. **���ľ���**: �����о�LLaVA����ϸ�� -3. **�������**: �Ķ��ٷ�ʵ�ִ��� -4. **����ʵ��**: ���Լ򻯰汾ʵ�� -5. **Ӧ�ÿ���**: ����ʵ��Ӧ�ó��� +1. **CLIP基础**: 理解跨模态预训练 +2. **论文精读**: 深入研究LLaVA技术细节 +3. **代码分析**: 阅读官方实现代码 +4. **复现实践**: 尝试简化版本实现 +5. **应用开发**: 构建实际应用场景 -LLaVA��Ϊ��ģ̬��ģ�͵���̱�������Ϊ�����Ӿ����Խ����͹������ܶ�ģ̬ϵͳ�ṩ����Ҫ������ +LLaVA作为多模态大模型的里程碑工作,为理解视觉语言交互和构建智能多模态系统提供了重要基础。 diff --git a/package.json b/package.json index 1247058..51de677 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "postinstall": "fumadocs-mdx", "prepare": "husky", "lint:images": "node scripts/check-images.mjs", - "migrate:images": "node scripts/move-doc-images.mjs && node scripts/check-images.mjs" + "migrate:images": "node scripts/move-doc-images.mjs" }, "dependencies": { "@types/mdx": "^2.0.13",