Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
23c0bed
feat: v0.3.0 IR S1 — PictureBlock + Furniture 채움
DanMeon Apr 26, 2026
95c900c
chore: sync rhwp upstream
DanMeon Apr 27, 2026
1aaab95
feat: v0.3.0 IR S2 — FormulaBlock + Footnote/Endnote
DanMeon Apr 27, 2026
f2dc290
docs: 업스트림 이슈 문서 추가
DanMeon Apr 27, 2026
130f560
feat: v0.3.0 IR S3 — ListItem/Caption/Toc/Field 일괄 도입
DanMeon Apr 28, 2026
7f0a319
feat: v0.3.0 IR S4 — rhwp-py CLI + LangChain include_furniture + Sche…
DanMeon Apr 28, 2026
c24c51d
docs: 업스트림 컨텍스트 잔재 제거 + 컨벤션 정합성 정리
DanMeon Apr 28, 2026
4253200
chore: docs 컨벤션 자동 검증 hook 도입
DanMeon Apr 28, 2026
29f546a
docs: 컨벤션 정책 보강 (Archive + Branch naming)
DanMeon Apr 28, 2026
a86c4b2
refactor: aparse 가 stdlib asyncio.to_thread 사용 (aiofiles 의존성 제거)
DanMeon Apr 28, 2026
6805c3c
docs: CONVENTIONS § Implementation log 구조 절 추가
DanMeon Apr 28, 2026
054aee2
chore: Cargo.toml 0.3.0 bump
DanMeon Apr 28, 2026
307022c
fix: docs-lint 정규식 ReDoS 회피 + codeql 분석 범위 정리
DanMeon Apr 28, 2026
4edb02f
ci: branch protection 단일 게이트 잡 추가
DanMeon Apr 28, 2026
d4f253d
docs: CLAUDE.md 개선
DanMeon Apr 28, 2026
04f4f08
docs: 알려진 제약 섹션을 KNOWN_ISSUES.md 로 분리
DanMeon Apr 28, 2026
a96c2e7
perf: build_raw_document Vec pre-sizing + 0.3.0 릴리즈 노트 정정
DanMeon Apr 28, 2026
1796932
ci: Linux abi3 wheel 1회 빌드 + 캐시 저장 main 한정 최적화
DanMeon Apr 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions .claude/hooks/docs-lint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""docs/*.md 편집 후 자동 검증 — CONVENTIONS.md 정책 enforcement.

PostToolUse hook 으로 Edit / Write / MultiEdit 후 실행. stdin 으로 받은 hook
event 의 ``tool_input.file_path`` 가 ``docs/*.md`` 면 검증, 그 외는 즉시 종료.

검증 항목 (CONVENTIONS.md 의 hard rule 4 종):

1. **Status 헤더** — Living 외 모든 spec 은 ``**Status**: ...`` 메타 라인 보유
2. **업스트림 monorepo 잔재 키워드** — 분사 리포 컨벤션 위배 (``사용자 Fork`` /
``rhwp 본체`` / ``pyo3-sandbox`` 등). v0.1.0 historical Frozen 본문은 예외
3. **같은 vX.Y.Z 디렉토리 내 spec ↔ spec 직접 link** — pair 페어
(``<topic>.md`` ↔ ``<topic>-research.md``) 만 예외
4. **깨진 .md 링크** — relative path 가 실제 파일을 가리키는지

위반 발견 시 exit 2 + stderr — Claude Code 가 stderr 를 LLM 컨텍스트에
주입하여 모델이 위반 사항을 인지하고 후속 조치 결정. exit 1 은 non-blocking
이라 LLM 에 노출되지 않으므로 사용 금지 (hooks 명세).
"""

import json
import re
import sys
from pathlib import Path

# * stdin 에서 hook event 파싱
try:
event = json.loads(sys.stdin.read() or "{}")
except json.JSONDecodeError:
sys.exit(0)

tool_input = event.get("tool_input") or {}
file_path = tool_input.get("file_path") or ""
if not file_path:
sys.exit(0)

repo = Path(__file__).resolve().parents[2]
try:
rel = Path(file_path).resolve().relative_to(repo)
except ValueError:
sys.exit(0)

rel_str = str(rel).replace("\\", "/")
if not (rel_str.startswith("docs/") and rel.suffix == ".md"):
sys.exit(0)

target = repo / rel
if not target.is_file():
sys.exit(0)

text = target.read_text(encoding="utf-8")
errors: list[str] = []


# * 1. Status header (required outside Living docs)
LIVING_FILES = {"docs/CONVENTIONS.md", "docs/roadmap/README.md"}
if rel_str not in LIVING_FILES:
if not re.search(r"^\*\*Status\*\*:", text, re.MULTILINE):
errors.append(
"missing Status header — add '**Status**: "
"<Active|Draft|Frozen|Superseded by [link]> · "
"**GA|Target**: vX.Y.Z · **Last updated**: YYYY-MM-DD' "
"(CONVENTIONS § Status header format)"
)


# * 2. Upstream monorepo residue keywords (v0.1.0 Frozen historical exempted)
HISTORICAL_FROZEN = ("docs/implementation/v0.1.0/",)
if not any(rel_str.startswith(p) for p in HISTORICAL_FROZEN):
forbidden = [
"사용자 Fork",
"rhwp 본체",
"pyo3-sandbox",
"/Cargo.toml (루트)",
"pyo3-bindings.md",
]
for kw in forbidden:
if kw in text:
errors.append(
f"upstream monorepo residue keyword {kw!r} — "
"this is a spinoff binding repo, not the source-of-truth repo"
)


# * 3. Same-version spec ↔ spec direct link (pair files exempted)
# ^ SemVer 정확 매칭 (vMAJOR.MINOR.PATCH) — 이전의 [\d.]+ 기반은 catastrophic
# backtracking 위험 (CodeQL py/redos). v0.3.0 / v0.3.1 등 모두 cover.
m = re.match(r"docs/(roadmap|design)/(v\d+\.\d+\.\d+)/(.+)\.md$", rel_str)
if m:
base = m.group(3)
pair_topic = base.removesuffix("-research")
# ^ pair: <topic>.md ↔ <topic>-research.md (the only allowed direct link)
if base.endswith("-research"):
allowed_link = f"{pair_topic}.md"
else:
allowed_link = f"{base}-research.md"
self_link = f"{base}.md"
for link in re.findall(r"\]\(([^)]+\.md)[^)]*\)", text):
link_target = link.split("#")[0]
# only same-directory .md candidates qualify
if "/" in link_target:
continue
if link_target in (allowed_link, self_link):
continue
errors.append(
f"same-version spec direct link {link!r} — "
"route through phase-N.md or roadmap/README.md "
"(CONVENTIONS § Cross-link direction rule)"
)


# * 4. Broken .md link
dir_path = target.parent
for link in re.findall(r"\]\(([^)]+\.md)[^)]*\)", text):
link_target = link.split("#")[0].split("?")[0]
if not link_target or link_target.startswith("http"):
continue
resolved = (dir_path / link_target).resolve()
if not resolved.exists():
errors.append(f"broken .md link {link!r} (resolved: {resolved})")


if errors:
sys.stderr.write(f"\ndocs-lint: {rel_str} — {len(errors)} violation(s)\n")
for i, e in enumerate(errors, 1):
sys.stderr.write(f" {i}. {e}\n")
sys.stderr.write("policy: docs/CONVENTIONS.md\n")
sys.exit(2)
15 changes: 15 additions & 0 deletions .claude/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"hooks": {
"PostToolUse": [
{
"matcher": "Edit|Write|MultiEdit",
"hooks": [
{
"type": "command",
"command": "python3 ${CLAUDE_PROJECT_DIR}/.claude/hooks/docs-lint.py"
}
]
}
]
}
}
3 changes: 3 additions & 0 deletions .github/codeql-config.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
paths-ignore:
- tests
- external
- examples
- benches
121 changes: 96 additions & 25 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,36 +27,60 @@ concurrency:
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
# * 메인 테스트 + 린트 + 타입체크
# abi3-py310 wheel 로 빌드는 한 번이지만 런타임 동작은 버전별 검증 필요 → Linux × 전 버전.
# macOS / Windows 는 OS 레이어 스모크이므로 py3.12 하나만.
# * Linux abi3 wheel 1회 빌드 → 모든 Linux 잡(test×4 / slow / core-only)이 공유
# abi3-py310 이라 py3.10/3.11/3.12/3.13 가 동일 wheel 재사용 가능.
# macOS/Windows 는 단일 잡이라 빌드/테스트 분리 이득이 없어 그대로 매번 빌드.
build-linux-wheel:
name: Build Linux abi3 wheel
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
save-if: ${{ github.ref == 'refs/heads/main' }}
- uses: astral-sh/setup-uv@v8.1.0
with:
python-version: "3.12"
- run: uv sync --no-install-project --group all
- run: uv run maturin build --release --out dist
- uses: actions/upload-artifact@v7
with:
name: rhwp-python-linux-wheel
path: dist/*.whl
retention-days: 1

# * 메인 테스트 + 린트 + 타입체크 (Linux × 전 Python 버전 — wheel 공유)
test:
name: Test (${{ matrix.os }} / py${{ matrix.python }})
runs-on: ${{ matrix.os }}
name: Test (Linux / py${{ matrix.python }})
needs: build-linux-wheel
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- { os: ubuntu-latest, python: "3.10", lint: true }
- { os: ubuntu-latest, python: "3.11" }
- { os: ubuntu-latest, python: "3.12" }
- { os: ubuntu-latest, python: "3.13" }
- { os: macos-latest, python: "3.12" }
- { os: windows-latest, python: "3.12" }
- { python: "3.10", lint: true }
- { python: "3.11" }
- { python: "3.12" }
- { python: "3.13" }
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: astral-sh/setup-uv@v8.1.0
with:
python-version: ${{ matrix.python }}
- run: uv sync --no-install-project --group all
- run: uv run maturin develop --release
- uses: actions/download-artifact@v8
with:
name: rhwp-python-linux-wheel
path: dist/
- run: uv pip install --reinstall dist/*.whl
- name: Run pytest (not slow) with coverage
run: uv run pytest tests/ -m "not slow" --cov=rhwp --cov-report=term-missing -v
- name: Run pyright (normal)
Expand All @@ -68,6 +92,11 @@ jobs:
tests/test_langchain_loader.py tests/test_langchain_loader_ir.py \
tests/test_ir_schema.py tests/test_ir_roundtrip.py tests/test_ir_tables.py \
tests/test_ir_iter_blocks.py tests/test_ir_schema_export.py \
tests/test_ir_picture.py tests/test_ir_furniture.py \
tests/test_ir_formula.py tests/test_ir_footnote.py \
tests/test_ir_list.py tests/test_ir_caption.py \
tests/test_ir_toc.py tests/test_ir_field.py \
tests/test_cli.py \
tests/conftest.py tests/type_check_samples.py
- name: Run pyright (intentional errors — expect 4)
if: matrix.lint
Expand All @@ -81,9 +110,36 @@ jobs:
exit 1
fi

# * PDF 렌더링 — 느려서 별도 잡
# * macOS / Windows 스모크 — 단일 잡이라 wheel 분리 이득 없음 → 직접 maturin develop
test-other-os:
name: Test (${{ matrix.os }} / py3.12)
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest, windows-latest]
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
save-if: ${{ github.ref == 'refs/heads/main' }}
- uses: astral-sh/setup-uv@v8.1.0
with:
python-version: "3.12"
- run: uv sync --no-install-project --group all
- run: uv run maturin develop --release
- run: uv run pytest tests/ -m "not slow" -v

# * PDF 렌더링 — 느려서 별도 잡, Linux wheel 재사용
test-slow:
name: Test slow (Linux / py3.12 — PDF)
needs: build-linux-wheel
runs-on: ubuntu-latest
defaults:
run:
Expand All @@ -92,18 +148,21 @@ jobs:
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: astral-sh/setup-uv@v8.1.0
with:
python-version: "3.12"
- run: uv sync --no-install-project --group testing
- run: uv run maturin develop --release
- uses: actions/download-artifact@v8
with:
name: rhwp-python-linux-wheel
path: dist/
- run: uv pip install --reinstall dist/*.whl
- run: uv run pytest tests/ -m slow -v

# * extras 미설치 시 langchain 테스트가 importorskip 로 auto-skip 되는지 검증
test-core-only:
name: Test without extras (importorskip auto-skip)
needs: build-linux-wheel
runs-on: ubuntu-latest
defaults:
run:
Expand All @@ -112,24 +171,36 @@ jobs:
- uses: actions/checkout@v6
with:
submodules: recursive
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: astral-sh/setup-uv@v8.1.0
with:
python-version: "3.12"
- name: Install pytest only (no langchain extras — intentional)
run: |
uv venv
uv pip install pytest
- run: uv run maturin develop --release
- uses: actions/download-artifact@v8
with:
name: rhwp-python-linux-wheel
path: dist/
- run: uv pip install dist/*.whl
- name: Run pytest — extras-gated tests must auto-skip via importorskip
# ^ 파일-레벨 importorskip 은 해당 파일 전체를 skip 1개로 카운트.
# 현재 gated 파일: test_langchain_loader.py + test_langchain_loader_ir.py
# (langchain-core), test_ir_schema_export.py (jsonschema),
# test_async.py (aiofiles) → 총 4 파일
# v0.3.0 기준 gated 파일: test_langchain_loader.py + test_langchain_loader_ir.py
# (langchain-core), test_ir_schema_export.py (jsonschema), test_cli.py (typer)
# → 총 4 파일. test_async.py 는 v0.3.0 부터 stdlib 만 사용 (aiofiles 의존성 제거)
run: |
uv run pytest tests/ -m "not slow" -v | tee pytest-output.txt
if ! grep -qE '(^|[^0-9])4 skipped([^0-9]|$)' pytest-output.txt; then
echo "::error::expected 4 extras-gated files to auto-skip via importorskip (langchain×2, jsonschema, aiofiles)"
echo "::error::expected 4 extras-gated files to auto-skip via importorskip (langchain×2, jsonschema, typer)"
exit 1
fi

all-tests-passed:
name: All tests passed
if: always()
runs-on: ubuntu-latest
needs: [build-linux-wheel, test, test-other-os, test-slow, test-core-only]
steps:
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
15 changes: 12 additions & 3 deletions .github/workflows/publish-schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ on:
push:
branches: [main]
paths:
- 'python/rhwp/ir/schema/hwp_ir_v1.json'
# ^ glob — v2 도입 시 hwp_ir_v2.json 만 변경되어도 자동 트리거
- 'python/rhwp/ir/schema/hwp_ir_v*.json'
- 'python/rhwp/ir/nodes.py'
- 'python/rhwp/ir/schema.py'
- '.github/workflows/publish-schema.yml'
Expand Down Expand Up @@ -54,11 +55,16 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Prepare pages directory — copy every versioned schema
- name: Prepare pages directory — copy versioned schema + content-addressed alias
# ^ 불변 경로 정책: repo 의 hwp_ir_v*.json 을 모두 각 버전 URL 로 배포.
# v2 도입 시 python/rhwp/ir/schema/hwp_ir_v2.json 을 추가하기만 하면
# 이 루프가 자동으로 v1/v2 양쪽 모두를 pages 아티팩트에 포함한다.
# `actions/deploy-pages@v4` 의 replace-all 동작으로 v1 이 누락되는 것을 원천 차단.
#
# Content-addressed alias (v0.3.0 S4 추가, ir-expansion.md § 스키마 버저닝):
# 같은 v1 URL 안에서 minor bump (1.0 → 1.1) 가 발생할 때마다 hash-tagged
# immutable alias 를 alongside 발행. 구 hash 는 영구 보존되어 SchemaStore /
# 외부 도구가 정확한 스냅샷을 reproducible 하게 참조 가능.
run: |
set -euo pipefail
mkdir -p pages/schema/hwp_ir
Expand All @@ -69,7 +75,10 @@ jobs:
ver="${name#hwp_ir_}" # v1, v2, ...
mkdir -p "pages/schema/hwp_ir/$ver"
cp "$f" "pages/schema/hwp_ir/$ver/schema.json"
echo "Published $f -> pages/schema/hwp_ir/$ver/schema.json"
sha=$(shasum -a 256 "$f" | awk '{print $1}')
alias="pages/schema/hwp_ir/${name}-sha256-${sha}.json"
cp "$f" "$alias"
echo "Published $f -> $ver/schema.json + alias ${name}-sha256-${sha}.json"
copied=$((copied + 1))
done
if [ "$copied" -eq 0 ]; then
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,8 @@ coverage.xml
# * MCP
.serena/

# * Claude Code — settings.json 은 팀 공유 (hooks 등록), local override 만 ignore
.claude/settings.local.json

# * Examples 산출물
render_output/
Loading
Loading