diff --git a/.github/workflows/bindings-node.yml b/.github/workflows/bindings-node.yml new file mode 100644 index 0000000..68d2b74 --- /dev/null +++ b/.github/workflows/bindings-node.yml @@ -0,0 +1,62 @@ +name: bindings-node +on: + push: + branches: [main, feature/bindings] + paths: + - 'crates/engine/**' + - 'crates/js/**' + - 'bindings/node/**' + - 'bindings/CHROME_VERSION' + - '.github/workflows/bindings-node.yml' + pull_request: + paths: + - 'crates/engine/**' + - 'crates/js/**' + - 'bindings/node/**' + - 'bindings/CHROME_VERSION' + - '.github/workflows/bindings-node.yml' + workflow_dispatch: + +jobs: + smoke: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: { node-version: "20" } + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + with: + workspaces: ". -> target" + - name: Install npm deps + working-directory: bindings/node + run: npm install + - name: Build native module + working-directory: bindings/node + run: npm run build:debug + - name: Run smoke tests + working-directory: bindings/node + run: npm test + + e2e: + if: github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: { node-version: "20" } + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + with: + workspaces: ". -> target" + - name: Build + E2E + working-directory: bindings/node + env: { FOLIO_E2E: "1" } + run: | + npm install + npm run build:debug + npm test diff --git a/.github/workflows/bindings-python.yml b/.github/workflows/bindings-python.yml new file mode 100644 index 0000000..c0d24c5 --- /dev/null +++ b/.github/workflows/bindings-python.yml @@ -0,0 +1,62 @@ +name: bindings-python +on: + push: + branches: [main, feature/bindings] + paths: + - 'crates/engine/**' + - 'crates/py/**' + - 'bindings/python/**' + - 'bindings/CHROME_VERSION' + - '.github/workflows/bindings-python.yml' + pull_request: + paths: + - 'crates/engine/**' + - 'crates/py/**' + - 'bindings/python/**' + - 'bindings/CHROME_VERSION' + - '.github/workflows/bindings-python.yml' + workflow_dispatch: + +jobs: + smoke: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: "3.10" } + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + with: + workspaces: ". -> target" + - name: Install build deps + run: pip install --upgrade pip maturin pytest + - name: Build wheel + run: maturin build --release --features chromium,libreoffice,chrome-fetch -m crates/py/Cargo.toml --out dist + - name: Install built wheel + shell: bash + run: pip install --find-links dist folio + - name: Run smoke tests + run: pytest -v bindings/python/tests/test_smoke.py + + e2e: + if: github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: "3.10" } + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + with: + workspaces: ". -> target" + - name: Install build deps + run: pip install --upgrade pip maturin pytest + - name: Install + run E2E + env: { FOLIO_E2E: "1" } + run: | + maturin develop --release --features chromium,chrome-fetch -m crates/py/Cargo.toml + pytest -v -k e2e bindings/python/tests/ diff --git a/Cargo.lock b/Cargo.lock index b2f942f..0b66912 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -747,6 +747,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -840,6 +849,16 @@ dependencies = [ "typenum", ] +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "cucumber" version = "0.21.1" @@ -1082,15 +1101,20 @@ version = "0.1.0" dependencies = [ "axum 0.8.9", "chromiumoxide", + "dirs", + "flate2", "futures-util", "humantime-serde", "image", "lopdf", "proptest", "pulldown-cmark", + "reqwest 0.12.28", "serde", "serde_json", + "sha2", "static_assertions", + "tar", "tempfile", "thiserror 2.0.18", "tokio", @@ -1098,7 +1122,9 @@ dependencies = [ "tracing", "tracing-subscriber", "urlencoding", + "walkdir", "which 7.0.3", + "zip", ] [[package]] @@ -1138,6 +1164,17 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -1904,6 +1941,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "inflections" version = "1.1.1" @@ -1978,6 +2024,15 @@ dependencies = [ [[package]] name = "js" version = "0.1.0" +dependencies = [ + "engine", + "napi", + "napi-build", + "napi-derive", + "serde", + "serde_json", + "tokio", +] [[package]] name = "js-sys" @@ -2032,13 +2087,26 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libredox" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ + "bitflags", "libc", + "plain", + "redox_syscall 0.7.4", ] [[package]] @@ -2158,6 +2226,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -2228,6 +2305,66 @@ dependencies = [ "version_check", ] +[[package]] +name = "napi" +version = "2.16.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55740c4ae1d8696773c78fdafd5d0e5fe9bc9f1b071c7ba493ba5c413a9184f3" +dependencies = [ + "bitflags", + "ctor", + "napi-derive", + "napi-sys", + "once_cell", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "napi-build" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d376940fd5b723c6893cd1ee3f33abbfd86acb1cd1ec079f3ab04a2a3bc4d3b1" + +[[package]] +name = "napi-derive" +version = "2.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cbe2585d8ac223f7d34f13701434b9d5f4eb9c332cccce8dee57ea18ab8ab0c" +dependencies = [ + "cfg-if", + "convert_case", + "napi-derive-backend", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "napi-derive-backend" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1639aaa9eeb76e91c6ae66da8ce3e89e921cd3885e99ec85f4abacae72fc91bf" +dependencies = [ + "convert_case", + "once_cell", + "proc-macro2", + "quote", + "regex", + "semver", + "syn", +] + +[[package]] +name = "napi-sys" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427802e8ec3a734331fec1035594a210ce1ff4dc5bc1950530920ab717964ea3" +dependencies = [ + "libloading", +] + [[package]] name = "nom" version = "7.1.3" @@ -2411,7 +2548,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -2516,6 +2653,12 @@ version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "png" version = "0.18.1" @@ -2529,6 +2672,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.5" @@ -2692,6 +2841,92 @@ checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f" [[package]] name = "py" version = "0.1.0" +dependencies = [ + "engine", + "parking_lot", + "pyo3", + "pyo3-async-runtimes", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", +] + +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-async-runtimes" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2529f0be73ffd2be0cc43c013a640796558aa12d7ca0aab5cc14f375b4733031" +dependencies = [ + "futures", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] [[package]] name = "quick-error" @@ -2878,6 +3113,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.6" @@ -3655,6 +3899,23 @@ dependencies = [ "windows", ] +[[package]] +name = "tar" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "tempfile" version = "3.27.0" @@ -4156,6 +4417,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" +[[package]] +name = "unicode-segmentation" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" + [[package]] name = "unicode-width" version = "0.2.2" @@ -4168,6 +4435,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "untrusted" version = "0.9.0" @@ -4922,6 +5195,16 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index dae59e0..8e02be2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,10 +83,17 @@ walkdir = "2" lopdf = "0.34" zip = "2" +# Chrome auto-download +sha2 = "0.10" +flate2 = "1" +tar = "0.4" +dirs = "5" + # Bindings pyo3 = { version = "0.22", features = ["extension-module"] } -napi = { version = "2", features = ["napi8"] } +napi = { version = "2", features = ["napi8", "tokio_rt", "serde-json"] } napi-derive = "2" +napi-build = "2" [profile.release] opt-level = 3 diff --git a/README.md b/README.md index db61948..2d42938 100644 --- a/README.md +++ b/README.md @@ -1,154 +1,162 @@ -# Folio -

- Folio Logo + Folio

+

Folio

+

- - CI Status - - - Crates.io - - Rust Version - License - - Release - + A Rust-native, Gotenberg-compatible PDF service β€” with a live operator console.

- A modern, Rust-native PDF generation engine
- True browser-grade fidelity β€’ Gotenberg-compatible API β€’ Memory safe + Rust 1.75+ + Gotenberg parity ~85% + MIT

--- -## πŸ“– Table of Contents - -- [What is Folio?](#what-is-folio) -- [Why Folio?](#why-folio) -- [Quick Start](#quick-start) -- [Usage Modes](#usage-modes) -- [Features](#features) -- [Documentation](#documentation) -- [Project Structure](#project-structure) -- [Development](#development) -- [Testing](#testing) -- [Roadmap](#roadmap) -- [Contributing](#contributing) -- [License](#license) +Folio converts **HTML, URLs, Markdown, and Office documents** into PDFs using +real Chrome under the hood. It speaks the same HTTP API as +[Gotenberg](https://github.com/gotenberg/gotenberg), so most existing +clients can point at Folio with only a base-URL change. + +Unlike Gotenberg, Folio also runs as a **Rust library, a CLI, and a single +binary** β€” and ships with a live operator console at `/_/` so you can see +what your PDF service is actually doing without wiring up Grafana first. + +> **Status:** active. Core conversions and PDF ops are production-ready. +> Webhook callback delivery, batch ZIP output, and a few advanced Chromium +> options are still in progress β€” see the [feature comparison](./comparison.md). --- -## What is Folio? +## Why Folio + +- **Gotenberg-compatible.** Same routes (`/forms/chromium/*`, + `/forms/libreoffice/convert`, `/forms/pdfengines/*`), same multipart + contract. Drop-in for ~85% of workloads. +- **Memory-safe.** Rust core; no GC pauses, no parser-level CVEs from + malformed inputs. +- **Four ways to run it.** HTTP server, CLI, Rust library, Docker β€” pick + whichever fits your shape. The library is the source of truth; the + server and CLI are thin wrappers. +- **Observability-first.** Prometheus metrics, OpenTelemetry traces, and + a built-in Svelte SPA at `/_/` showing live RPS, p95 latency, + per-engine health, concurrency, and active batches over SSE. +- **Slim deployment targets.** Multi-stage Dockerfile produces full, + Chromium-only, LibreOffice-only, Cloud Run, and Lambda images. + +For the honest comparison against Gotenberg (what's parity, what's behind, +what's ahead) read [`comparison.md`](./comparison.md). -**Folio** (from Latin *folium*, meaning "leaf" or "sheet of paper") is a high-performance PDF generation engine built in Rust. It converts HTML, URLs, Markdown, and Office documents to PDF with **true browser-grade fidelity** by leveraging Chrome's rendering engine via the Chrome DevTools Protocol (CDP). +--- -> Like a printer's folio marks the beginning of a new page, Folio marks a new chapter in document conversion technology. +## 60-second quickstart -### Key Highlights +```bash +# Run the server (Docker, full image) +docker run --rm -p 3000:3000 ghcr.io/__deesh_reddy__/folio:latest -- **True Browser Fidelity**: Renders using real Chrome/Chromium β€” full CSS3, JavaScript, Web Fonts support -- **Gotenberg-Compatible**: Drop-in replacement for existing Gotenberg deployments -- **Memory Safe**: Rust's compile-time guarantees prevent entire classes of bugs -- **Multiple Interfaces**: HTTP API, CLI, Rust library, and language bindings (Python/Node.js) -- **Self-Contained**: Library mode requires no external HTTP services +# Convert a URL to PDF +curl -X POST http://localhost:3000/forms/chromium/convert/url \ + -F "url=https://example.com" \ + -F "landscape=true" \ + -o out.pdf ---- +# Open the operator console +open http://localhost:3000/_/ +``` -## Why Folio? +That's it. Same multipart contract for HTML, Markdown, Office, merge, +split, watermark, etc. -### Comparison Table +--- -| Feature | **Folio** | Gotenberg | WeasyPrint | wkhtmltopdf | -|---------|------------|-----------|-------------|-------------| -| **Language** | Rust πŸ¦€ | Go | Python | C++ | -| **Rendering** | Chrome (CDP) | Chrome | Custom engine | QtWebKit (2012) | -| **Modern CSS** | βœ… Full | βœ… Full | ⚠️ Limited | ❌ Legacy | -| **JavaScript** | βœ… Full V8 | βœ… Full | ❌ None | ⚠️ ES3 | -| **Usage Modes** | 4 (Server/CLI/Lib/Bindings) | Server only | Library only | CLI only | -| **Memory Safety** | βœ… Compile-time | GC | Runtime | Manual | -| **Gotenberg API** | βœ… Compatible | βœ… Native | ❌ | ❌ | -| **Screenshots** | βœ… Done | βœ… | ❌ | ❌ | -| **Structured Logging** | βœ… Full (tracing) | βœ… (slog) | ❌ | ❌ | -| **Prometheus Metrics** | βœ… `/prometheus/metrics` | βœ… | ❌ | ❌ | -| **OpenTelemetry** | βœ… OTLP HTTP | βœ… | ❌ | ❌ | -| **Process Supervision** | 🚧 In Progress | βœ… | ❌ | ❌ | +## Install -### Architecture Pattern +| Surface | Command | +|------------------|---------------------------------------------------------------| +| Docker (full) | `docker pull ghcr.io/__deesh_reddy__/folio:latest` | +| Docker (slim) | `docker pull ghcr.io/__deesh_reddy__/folio:latest-chromium` | +| CLI (cargo) | `cargo install --path crates/cli` β†’ `folio --help` | +| Server (cargo) | `cargo run -p server -- serve --port 3000` | +| Library | `folio-engine = { path = "crates/engine" }` in `Cargo.toml` | -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ USAGE MODES β”‚ -β”‚ Server CLI Rust Lib Python Node.js β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ engine β”‚ ← Single source β”‚ -β”‚ β”‚ β€’ ChromiumEngine β”‚ of truth β”‚ -β”‚ β”‚ β€’ LibreOfficeEngine β”‚ β”‚ -β”‚ β”‚ β€’ PdfOperations β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Chrome (CDP) β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` +**Prerequisites for non-Docker installs:** Rust 1.75+, Chrome/Chromium +(auto-detected, or set `CHROME_PATH`), and optionally LibreOffice for +Office conversion. ---- +### Embeddable bindings (v1: conversion) -## Quick Start +| Surface | Install | +|---|---| +| Python | `pip install folio` β€” see `bindings/python/README.md` | +| Node.js | `npm install @folio/folio` β€” see `bindings/node/README.md` | -### Prerequisites +Both bindings auto-download a pinned Chrome on first use if no system +Chrome is found. v1 supports HTML / URL / Markdown / Office β†’ PDF; +PDF ops and screenshots ship in v2 (spec: +`docs/superpowers/specs/2026-05-01-bindings-design.md`). -- **Rust** 1.75+ ([install](https://rustup.rs/)) -- **Chrome/Chromium** (auto-detected) or set `CHROME_PATH` -- **LibreOffice** (optional, for Office document conversion) +--- -### Option 1: HTTP Server (Gotenberg-Compatible) +## HTTP API at a glance -```bash -# Build and run -cargo run -p server -- serve --port 3000 +All routes are `POST` and accept multipart/form-data unless noted. -# Or with Docker (full image β€” Chromium + LibreOffice) -docker build --target folio -t folio:latest . -docker run -p 3000:3000 folio:latest +### Chromium (HTML / URL / Markdown β†’ PDF or screenshot) +``` +/forms/chromium/convert/{html,url,markdown} +/forms/chromium/screenshot/{html,url,markdown} +``` -# Convert URL to PDF -curl -X POST http://localhost:3000/forms/chromium/convert/url \ - -F "url=https://example.com" \ - -F "landscape=true" \ - -o output.pdf +### LibreOffice (100+ Office formats β†’ PDF) +``` +/forms/libreoffice/convert ``` -### Option 2: CLI +### PDF operations +``` +/forms/pdfengines/{merge,split,flatten,rotate,watermark,convert,encrypt} +/forms/pdfengines/metadata/{read,write} +/forms/pdfengines/bookmarks/{read,write} +``` -```bash -# Install -cargo install --path crates/cli +### Operational +``` +GET /health β†’ JSON health + per-engine status +GET /version β†’ plain text +GET /prometheus/metrics β†’ Prometheus text format +GET /_/ β†’ operator console (SPA) +GET /_/sse β†’ Server-Sent Events stream +``` + +For the gap analysis vs Gotenberg, see [`comparison.md`](./comparison.md). -# Convert HTML to PDF -folio convert --html index.html --output out.pdf +--- -# Convert URL to PDF -folio convert --url https://example.com --output out.pdf +## CLI -# Batch conversion -folio batch --input-dir ./docs/ --output-dir ./pdfs/ +```bash +folio convert --html index.html --output out.pdf +folio convert --url https://example.com --output out.pdf +folio convert --markdown README.md --output out.pdf +folio convert --office report.docx --output out.pdf + +folio merge a.pdf b.pdf c.pdf --output combined.pdf +folio split input.pdf --mode uniform --span 1 --output-dir ./pages/ +folio flatten input.pdf --output flat.pdf +folio rotate input.pdf --angle 90 --output rotated.pdf +folio metadata read input.pdf +folio metadata write input.pdf '{"Title":"Q2 Review"}' ``` -### Option 3: Rust Library +Shell completions: `folio completion zsh > ~/.zfunc/_folio`. -```toml -# Cargo.toml -[dependencies] -folio-engine = { path = "crates/engine" } -``` +--- + +## Library ```rust use engine::ChromiumEngine; @@ -156,459 +164,167 @@ use engine::ChromiumEngine; #[tokio::main] async fn main() -> anyhow::Result<()> { let engine = ChromiumEngine::launch().await?; - let pdf = engine.html_to_pdf("

Hello World

", None, &Default::default(), &Default::default()).await?; - std::fs::write("output.pdf", pdf)?; + let pdf = engine + .html_to_pdf("

Hello

", None, &Default::default(), &Default::default()) + .await?; + std::fs::write("out.pdf", pdf)?; Ok(()) } ``` -### Option 4: Docker Compose (Development) - -```bash -# Copy example environment file -cp .env.example .env - -# Start Folio with all dependencies -make run - -# Run tests -make test-integration - -# Stop -make stop -``` +The engine crate has zero dependency on `axum` or `tower` β€” it's the same +code path the server uses, just without an HTTP layer in front. --- -## Usage Modes +## Operator console -### 1. Server Mode (HTTP API) +`GET /_/` serves a Svelte SPA driven by Server-Sent Events. In one screen: -Gotenberg-compatible REST API for document conversion: +- **Ticker:** RPS, p95 latency, error %, in-flight count +- **Routes table:** per-endpoint p50 / p95 / p99, error %, load % +- **Engines:** Chromium / LibreOffice up-down + restart count +- **Concurrency grid:** active vs cap, with warn/crit thresholds +- **Throughput strip:** 30-min RPS + p95 trend with SLA overlay +- **Resources:** CPU % and memory MB +- **Batches:** progress + per-item state for active batches +- **Logs:** last 20 requests, last 10 errors -| Endpoint | Method | Input | Output | -|----------|--------|-------|--------| -| `/forms/chromium/convert/html` | POST | HTML file | PDF | -| `/forms/chromium/convert/url` | POST | URL | PDF | -| `/forms/chromium/convert/markdown` | POST | Markdown | PDF | -| `/forms/chromium/screenshot/html` | POST | HTML | PNG/JPEG/WebP | -| `/forms/libreoffice/convert` | POST | Office docs | PDF | -| `/forms/pdfengines/merge` | POST | PDFs | Merged PDF | -| `/forms/pdfengines/split` | POST | PDF | Split PDFs | -| `/health` | GET | - | Health status | +This is the cleanest lead Folio has over Gotenberg today; it's where the +last 30 commits have lived. If you've ever bolted Grafana onto Gotenberg +just to see whether it's healthy β€” this replaces that step. -See [API Documentation](./docs/gotenberg-spec.md) for full details. +--- -### 2. CLI Mode +## Configuration -Command-line interface for batch operations and scripting: +Common flags (every flag is also `FOLIO_*` env-overridable): ```bash -# Convert various formats -folio convert --html file.html --output out.pdf -folio convert --url https://example.com --output out.pdf -folio convert --markdown README.md --output readme.pdf - -# PDF operations -folio merge --output combined.pdf file1.pdf file2.pdf -folio split input.pdf --output-dir ./split/ -folio flatten input.pdf --output flat.pdf -folio metadata read input.pdf -``` - -### 3. Library Mode (Rust) - -Use Folio as a Rust library in your applications: - -```rust -// HTML to PDF -let engine = ChromiumEngine::launch().await?; -let pdf = engine.html_to_pdf(html, None, &opts, &ctx).await?; - -// URL to PDF -let pdf = engine.url_to_pdf("https://example.com", &opts, &ctx).await?; - -// Markdown to PDF -let pdf = engine.markdown_to_pdf(markdown, &opts, &ctx).await?; -``` - -### 4. Language Bindings - -**Python** ([Planned]): -```python -import folio - -engine = folio.ChromiumEngine() -pdf = engine.html_to_pdf("

Hello

") -``` - -**Node.js** ([Planned]): -```javascript -const folio = require('folio'); -const engine = new folio.ChromiumEngine(); -const pdf = await engine.htmlToPdf('

Hello

'); +folio-server serve \ + --host 0.0.0.0 --port 3000 \ + --concurrency 8 \ + --max-body-bytes 52428800 \ # 50 MiB + --request-timeout 120s \ + --chrome /usr/bin/google-chrome --no-sandbox \ + --soffice /usr/bin/soffice \ + --log-level info --log-format json \ + --api-basic-auth-username admin --api-basic-auth-password secret \ + --otel-enabled --otel-endpoint http://localhost:4318/v1/traces ``` ---- +Run `folio-server serve --help` for the full flag reference. -## Features - -### βœ… Implemented - -- **HTML/URL to PDF**: Full Chrome rendering with print CSS support -- **Markdown to PDF**: GitHub Flavored Markdown with syntax highlighting -- **Office Documents**: Convert 100+ formats via LibreOffice (DOC, DOCX, PPT, XLS, ODT, etc.) -- **PDF Operations**: Merge, split, flatten, rotate, watermark -- **PDF Metadata**: Read/write PDF metadata -- **Gotenberg Compatibility**: Drop-in API replacement -- **Health Checks**: `/health` endpoint with engine status -- **Concurrent Rendering**: Thread-safe browser instance sharing -- **Screenshots**: URL/HTML/Markdown to PNG/JPEG/WebP -- **BDD Testing**: Port Gotenberg's Gherkin scenarios to Rust -- **Webhook System**: Async job dispatch with retry, full engine integration (spec 15) -- **Structured Logging**: Context-aware logs with request_id, engine type, duration (text/JSON formats) -- **Prometheus Metrics**: `/prometheus/metrics` endpoint with conversion, queue, and engine metrics - -### 🚧 In Progress / Partially Done - -- **Advanced Wait Conditions**: `skipNetworkIdleEvent`, `failOnResourceLoadingFailed`, etc. (spec 36) -- **Advanced LibreOffice Fields**: 30+ missing export options (spec 37) -- **Full CLI Flag Parity**: Many Gotenberg flags still missing (spec 39) -- **Actionable Errors**: Structured error responses, room for enhancement (spec 44) -- **BDD Test Suite**: Framework exists, scenario coverage incomplete (spec 50) -- **Batch API**: CLI batch works; server-side bulk endpoint pending (spec 50-batch) -- **Health Dashboard**: JSON `/health` works; visual HTML dashboard pending (spec 51) - -### ❌ Not Started (Spec-Only) - -- **Python / Node.js Bindings**: Empty placeholders only (specs 40, 41) -- **Multi-Backend PDF Engines**: qpdf, pdfcpu, pdftk backends (spec 38) -- **Special Features**: TLS, auth, cloud-run, remote URL download (spec 40-special) -- **Smart PDF Optimiser**: Automatic bloat detection & compression (spec 42) -- **Font Doctor**: Font rendering diagnostics (spec 43) -- **Live Preview**: HTMLβ†’image debug preview (spec 45) -- **PDF Size Estimator**: Pre-flight size prediction (spec 46) -- **One-Command Install**: `curl | bash` installer (spec 47) -- **Interactive Docs**: Built-in `/docs` API explorer (spec 48) -- **Template Library**: Pre-built document templates (spec 49) - -> **Note:** This README is a high-level overview. For a ground-truth audit of what is actually built vs. spec claims, see [`docs/implementation-status.md`](./docs/implementation-status.md). The `20-missing-features-roadmap.md` spec is currently stale and should not be relied upon for current status. +**TLS is intentionally not handled in-process.** Put nginx, Caddy, or +envoy in front. Cert rotation, OCSP stapling, and ALPN are not things +Folio is positioned to do better than they do. --- -## Documentation - -### Core Documentation - -| Document | Description | -|----------|-------------| -| [Technical Specification](./docs/proposal.md) | Full architecture and design | -| [Gotenberg API Spec](./docs/gotenberg-spec.md) | API compatibility details | -| [Gap Analysis](./docs/gap-analysis.md) | Research findings | - -### Specs (Implementation Guides) - -| Spec | Description | Status | -|------|-------------|--------| -| [00-overview](./docs/specs/00-overview.md) | Spec system overview & conventions | πŸ“‹ Reference | -| [10-engine-types](./docs/specs/10-engine-types.md) | Core types, errors, options | βœ… Done | -| [11-engine-chromium](./docs/specs/11-engine-chromium.md) | Chromium engine (HTML/URL/Markdownβ†’PDF + screenshots) | βœ… Done | -| [12-engine-libreoffice](./docs/specs/12-engine-libreoffice.md) | LibreOffice engine (Officeβ†’PDF) | βœ… Done | -| [13-engine-pdfops](./docs/specs/13-engine-pdfops.md) | PDF operations (merge, split, flatten, metadata, watermark, rotate) | βœ… Done | -| [14-engine-pdfa](./docs/specs/14-engine-pdfa.md) | PDF/A & PDF/UA conformance conversion | βœ… Done | -| [15-webhook](./docs/specs/15-webhook.md) | Async webhook callback system | 🚧 Partially Done | -| [16-bookmarks](./docs/specs/16-bookmarks.md) | PDF bookmarks/outline read & write | βœ… Done | -| [17-watermark](./docs/specs/17-watermark.md) | PDF watermark & stamp overlay | βœ… Done *(via spec 13)* | -| [18-screenshot](./docs/specs/18-screenshot.md) | Chromium screenshot API (PNG/JPEG/WebP) | βœ… Done *(via spec 11)* | -| [19-encrypt](./docs/specs/19-encrypt.md) | PDF encryption & password protection | βœ… Done | -| [20-cli](./docs/specs/20-cli.md) | Command-line interface (`folio` binary) | βœ… Done | -| [20-bdd-testing](./docs/specs/20-bdd-testing.md) | BDD test strategy | 🚧 Partially Done | -| [20-missing-features-roadmap](./docs/specs/20-missing-features-roadmap.md) | Feature parity roadmap vs Gotenberg | πŸ“‹ Reference | -| [30-server](./docs/specs/30-server.md) | HTTP server (Gotenberg-compatible API) | βœ… Done | -| [36-chromium-wait-conditions](./docs/specs/36-chromium-wait-conditions.md) | Advanced wait conditions & options | 🚧 Partially Done | -| [37-libreoffice-advanced](./docs/specs/37-libreoffice-advanced.md) | Advanced LibreOffice form fields | 🚧 Partially Done | -| [38-pdfengines-backends](./docs/specs/38-pdfengines-backends.md) | Multi-backend support (qpdf, pdfcpu, pdftk) | ❌ Not Done | -| [39-config-flags](./docs/specs/39-config-flags.md) | Full Gotenberg CLI flag parity | 🚧 Partially Done | -| [40-bindings-py](./docs/specs/40-bindings-py.md) | Python bindings (`py` crate) | ❌ Not Done *(placeholder)* | -| [40-special-features](./docs/specs/40-special-features.md) | TLS, auth, cloud-run, remote URL download | ❌ Not Done | -| [41-bindings-js](./docs/specs/41-bindings-js.md) | Node.js bindings (`js` crate) | ❌ Not Done *(placeholder)* | -| [41-github-issues-analysis](./docs/specs/41-github-issues-analysis.md) | User pain-point research from GitHub issues | πŸ“‹ Research | -| [42-smart-pdf-optimiser](./docs/specs/42-smart-pdf-optimiser.md) | Automatic PDF size optimisation | ❌ Not Done | -| [43-font-doctor](./docs/specs/43-font-doctor.md) | Font rendering diagnostics & fixes | ❌ Not Done | -| [44-crystal-clear-errors](./docs/specs/44-crystal-clear-errors.md) | Actionable error messages (replace generic 500s) | 🚧 Partially Done | -| [45-live-preview-mode](./docs/specs/45-live-preview-mode.md) | Live HTMLβ†’image preview for debugging | ❌ Not Done | -| [46-pdf-size-estimator](./docs/specs/46-pdf-size-estimator.md) | Pre-flight PDF size prediction | ❌ Not Done | -| [47-one-command-install](./docs/specs/47-one-command-install.md) | Frictionless install (`curl | bash`) | ❌ Not Done | -| [48-interactive-docs](./docs/specs/48-interactive-docs.md) | Built-in API explorer at `/docs` | ❌ Not Done | -| [49-template-library](./docs/specs/49-template-library.md) | Pre-built document templates | ❌ Not Done | -| [50-batch-api](./docs/specs/50-batch-api.md) | Bulk conversion API (100+ docs in one request) | 🚧 Partially Done *(CLI batch only)* | -| [50-testing-bdd](./docs/specs/50-testing-bdd.md) | BDD integration test suite (Gherkinβ†’Rust) | 🚧 Partially Done | -| [51-health-dashboard](./docs/specs/51-health-dashboard.md) | Visual health dashboard beyond JSON `/health` | 🚧 Partially Done | - -**Legend:** `βœ… Done` = fully implemented & tested. `🚧 Partially Done` = core working, gaps remain. `❌ Not Done` = spec only, no code. `πŸ“‹ Reference` = meta-doc or research, no code expected. - -### API Reference - -- **Chromium Routes**: `/forms/chromium/*` (convert HTML/URL/Markdown, screenshots) -- **LibreOffice Routes**: `/forms/libreoffice/*` (convert Office docs) -- **PDF Engine Routes**: `/forms/pdfengines/*` (merge, split, flatten, etc.) +## Docker variants ---- +Single `Dockerfile`, multiple `--target` stages β€” pick the smallest one +that does what you need. -## Project Structure +| Target | Contains | Use case | +|------------------------------|----------------------|----------------------| +| `folio` | Chromium + LO | Default | +| `folio-chromium` | Chromium | HTML/URL/Markdown only (~30% smaller) | +| `folio-libreoffice` | LO | Office docs only (~40% smaller) | +| `folio-cloudrun` | Full + Cloud Run env | Google Cloud Run | +| `folio-lambda` | Full + Lambda Web Adapter | AWS Lambda | +| `folio-{cloudrun,lambda}-{chromium,libreoffice}` | Slim + platform | Mix-and-match | -``` -folio/ -β”œβ”€β”€ Cargo.toml # Workspace definition -β”œβ”€β”€ README.md # This file -β”œβ”€β”€ Dockerfile # Single file, 9 named --target variants (see Docker section) -β”œβ”€β”€ Dockerfile.test # Test environment (poppler, JRE, verapdf) -β”œβ”€β”€ docker-compose.yml # Development environment -β”œβ”€β”€ Makefile # Build/test/docker automation -β”œβ”€β”€ .env.example # Configuration template -β”‚ -β”œβ”€β”€ crates/ -β”‚ β”œβ”€β”€ engine/ # Core PDF generation engine -β”‚ β”‚ β”œβ”€β”€ src/ -β”‚ β”‚ β”‚ β”œβ”€β”€ chromium/ # Chrome/Chromium integration -β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ launch.rs # Browser discovery & launch -β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ render.rs # HTML/URL β†’ PDF -β”‚ β”‚ β”‚ β”‚ └── screenshot.rs # Screenshots (βœ…) -β”‚ β”‚ β”‚ β”œβ”€β”€ libreoffice/ # LibreOffice integration -β”‚ β”‚ β”‚ └── pdfops/ # PDF manipulation -β”‚ β”‚ └── Cargo.toml -β”‚ β”‚ -β”‚ β”œβ”€β”€ server/ # HTTP server (Gotenberg-compatible) -β”‚ β”‚ β”œβ”€β”€ src/ -β”‚ β”‚ β”‚ β”œβ”€β”€ routes/ # API route handlers -β”‚ β”‚ β”‚ └── app.rs # Router configuration -β”‚ β”‚ └── tests/ # Integration tests -β”‚ β”‚ -β”‚ β”œβ”€β”€ cli/ # Command-line interface -β”‚ β”‚ └── src/commands/ # CLI subcommands -β”‚ β”‚ -β”‚ -β”œβ”€β”€ docs/ -β”‚ β”œβ”€β”€ proposal.md # Technical specification -β”‚ β”œβ”€β”€ gotenberg-spec.md # Gotenberg API analysis -β”‚ β”œβ”€β”€ gap-analysis.md # Research findings -β”‚ β”œβ”€β”€ assets/ # Images, logos -β”‚ └── specs/ # Implementation specs (32 files, see table above) -β”‚ -└── crates/*/tests/ # Crate-local tests (unit + integration) - └── server/tests/bdd/ # BDD integration tests +```bash +docker build --target folio-chromium -t folio:chromium . +make docker-push-all DOCKER_REGISTRY=ghcr.io/me VERSION=1.0.0 ``` --- -## Development - -### Building from Source - -```bash -# Clone the repository -git clone https://github.com/yourusername/folio.git -cd folio - -# Build all crates -cargo build --release +## Where things stand -# Run tests -cargo test +A short, honest scorecard. The full version is [`comparison.md`](./comparison.md). -# Run with specific features -cargo run -p server -- serve --help -``` - -### Docker Image Variants +**Ready to use:** +HTML/URL/Markdownβ†’PDF Β· Officeβ†’PDF Β· screenshots Β· merge Β· split Β· flatten Β· +rotate Β· watermark Β· metadata Β· bookmarks Β· encrypt Β· PDF/A & PDF/UA Β· +Basic Auth Β· Prometheus Β· OpenTelemetry Β· operator console Β· CLI Β· Rust +library Β· multi-target Docker. -All variants are built from a single `Dockerfile` using named `--target` stages, following Gotenberg's pattern. Each platform-specific variant (Cloud Run, Lambda) is a thin layer on top of the base variant β€” just environment variables. +**In progress:** +Webhook callback delivery (scaffold ready, delivery TODO) Β· +batch API ZIP/merge output (endpoints + worker exist) Β· +advanced Chromium wait/fail conditions (`waitForSelector`, `failOn*`) Β· +long tail of LibreOffice export filters Β· `embed` and full `stamp` routes. -| Target | Tag | Description | -|--------|-----|-------------| -| `folio` | `latest`, `vX.Y.Z` | Full: Chromium + LibreOffice | -| `folio-chromium` | `latest-chromium` | Chromium only (~30% smaller) | -| `folio-libreoffice` | `latest-libreoffice` | LibreOffice only (~40% smaller) | -| `folio-cloudrun` | `latest-cloudrun` | Full + Google Cloud Run env vars | -| `folio-cloudrun-chromium` | `latest-chromium-cloudrun` | Chromium + Cloud Run | -| `folio-cloudrun-libreoffice` | `latest-libreoffice-cloudrun` | LibreOffice + Cloud Run | -| `folio-lambda` | `latest-lambda` | Full + [Lambda Web Adapter](https://github.com/awslabs/aws-lambda-web-adapter) | -| `folio-lambda-chromium` | `latest-chromium-lambda` | Chromium + Lambda | -| `folio-lambda-libreoffice` | `latest-libreoffice-lambda` | LibreOffice + Lambda | +**Deliberate gaps:** +TLS in-process (use a reverse proxy) Β· OAuth/JWT/RBAC (use a reverse +proxy) Β· workflow/DAG engine on top of batch (out of scope). -```bash -# Build a specific variant -docker build --target folio-chromium -t myrepo/folio:chromium . +**Empty placeholders (will be removed if not built):** +Python bindings (`crates/py/`), Node bindings (`crates/js/`). -# Build + push all 9 variants -make docker-push-all DOCKER_REGISTRY=myrepo/folio VERSION=1.0.0 +--- -# Run with Docker Compose (default: full image) -docker compose up folio +## Documentation -# Run Chromium-only profile -docker compose --profile chromium up folio-chromium -``` +- [`comparison.md`](./comparison.md) β€” in-depth audit vs Gotenberg +- [`docs/markdown-plus.md`](./docs/markdown-plus.md) β€” proposed + enhanced Markdown route (front-matter, math, mermaid, themes) -### Development Commands - -| Command | Description | -|---------|-------------| -| `make docker-build` | Build full Docker image | -| `make docker-build-all` | Build all 9 variants | -| `make docker-push-all` | Build and push all variants | -| `make run` | Start Folio via Docker Compose | -| `make test-unit` | Run unit tests | -| `make test-integration` | Run integration tests (requires Chrome) | -| `make fmt` | Format code | -| `make lint` | Lint with Clippy | -| `make check` | Run format + lint + unit tests | -| `make clean` | Clean build artifacts | - -### Environment Variables - -| Variable | Description | Default | -|----------|-------------|---------| -| `CHROME_PATH` | Path to Chrome/Chromium executable | Auto-detected | -| `LIBREOFFICE_PATH` | Path to LibreOffice (soffice) | Auto-detected | -| `RUST_LOG` | Log level (trace, debug, info, warn, error) | `info` | -| `FOLIO_PORT` | Server port | `3000` | -| `FOLIO_CONCURRENCY` | Max concurrent renders | CPU count | -| `FOLIO_OTEL_ENABLED` | Enable OpenTelemetry trace export | `false` | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP HTTP trace endpoint | `http://localhost:4318/v1/traces` | +> **Note on specs.** The previous 32-file `docs/specs/` tree has been +> archived to [`docs/specs-archive-2026-05-01.zip`](./docs/specs-archive-2026-05-01.zip). +> Fresh, better-organised contributor-facing specs are being written and +> will reappear under `docs/` shortly. --- -## Testing - -### Test Structure - -``` -tests/ -β”œβ”€β”€ unit/ # Unit tests (cargo test --lib) -β”œβ”€β”€ integration/ # BDD integration tests (🚧) -β”‚ β”œβ”€β”€ scenarios/ # Test scenarios (ported from Gotenberg) -β”‚ β”œβ”€β”€ common/ # Test helpers -β”‚ └── testdata/ # Test fixtures -└── e2e/ # End-to-end tests -``` - -### Running Tests +## Development ```bash -# Unit tests (no Chrome required) -cargo test --lib +git clone https://github.com/__deesh_reddy__/folio.git && cd folio -# Integration tests (skip gracefully if deps missing) -cargo test -p server --test bdd - -# E2E tests (skip gracefully if deps missing) -cargo test -p server --test e2e - -# All tests (skip gracefully if deps missing) -cargo test -- --test-threads=1 - -# All tests with Docker -make docker-test +cargo build --release # build everything +cargo test # unit + integration (skips gracefully if Chrome missing) +make check # fmt + clippy + unit tests (run before PRs) +make run # docker-compose up, full image +make test-integration # BDD scenarios in Docker ``` -### Test Coverage +| Command | What it does | +|-------------------------|---------------------------------------| +| `make docker-build` | Build full image | +| `make docker-build-all` | Build all 9 image variants | +| `make test-unit` | `cargo test --lib` | +| `make test-integration` | BDD + e2e in container | +| `make fmt` / `make lint`| `cargo fmt` / `cargo clippy` | -We're porting Gotenberg's comprehensive BDD test suite: - -- βœ… Unit tests: 50+ test cases -- 🚧 Integration tests: BDD framework with 25+ feature files (scenario pass rate unverified) -- βœ… E2E tests: Server + CLI smoke tests - -See [BDD Testing Spec](./docs/specs/50-testing-bdd.md) for details. - ---- - -## Roadmap - -### Phase 1: Core Features βœ… -- [x] HTML/URL/Markdown β†’ PDF (Chromium) β€” spec 11 -- [x] Office documents β†’ PDF (LibreOffice) β€” spec 12 -- [x] PDF operations (merge, split, flatten, rotate, watermark) β€” spec 13 -- [x] PDF metadata read/write β€” spec 13 -- [x] Gotenberg-compatible API β€” spec 30 -- [x] Screenshots (HTML/URL/Markdown β†’ PNG/JPEG/WebP) β€” spec 11 / 18 -- [x] Structured Logging (tracing with text/JSON formats) -- [x] Prometheus Metrics (`/prometheus/metrics` endpoint) -- [x] OpenTelemetry Traces (OTLP HTTP exporter) -- [x] CLI (`folio` binary) β€” spec 20 - -### Phase 2: Advanced Engine Features 🚧 -- [x] PDF/A & PDF/UA conformance conversion β€” spec 14 -- [x] PDF bookmarks read/write β€” spec 16 -- [x] PDF encryption & password protection β€” spec 19 -- [ ] Advanced Chromium wait conditions β€” spec 36 -- [ ] Advanced LibreOffice form fields β€” spec 37 -- [ ] Multi-backend PDF engines (qpdf, pdfcpu, pdftk) β€” spec 38 - -### Phase 3: Server & Infrastructure 🚧 -- [ ] Webhook system with retry β€” spec 15 -- [ ] Full CLI flag parity with Gotenberg β€” spec 39 -- [ ] Batch API (server-side bulk conversion) β€” spec 50-batch -- [ ] Actionable error messages β€” spec 44 -- [ ] Visual health dashboard β€” spec 51 - -### Phase 4: Bindings & Ecosystem ❌ -- [ ] Python bindings (`py` crate) β€” spec 40 -- [ ] Node.js bindings (`js` crate) β€” spec 41 -- [ ] TLS, auth, cloud-run, remote URL download β€” spec 40-special - -### Phase 5: Unique Folio Features ❌ -- [ ] Smart PDF optimiser β€” spec 42 -- [ ] Font doctor / diagnostics β€” spec 43 -- [ ] Live preview mode β€” spec 45 -- [ ] PDF size estimator β€” spec 46 -- [ ] One-command install (`curl | bash`) β€” spec 47 -- [ ] Interactive API docs (`/docs`) β€” spec 48 -- [ ] Template library β€” spec 49 - -See [Full Roadmap](./docs/specs/20-missing-features-roadmap.md) and detailed specs in [docs/specs/](./docs/specs/) for planning. +**Useful env vars:** `CHROME_PATH`, `LIBREOFFICE_PATH`, `RUST_LOG`, +`FOLIO_PORT`, `FOLIO_CONCURRENCY`, `OTEL_EXPORTER_OTLP_ENDPOINT`. --- ## Contributing -Contributions are welcome! Please read our [contributing guidelines](./CONTRIBUTING.md) before submitting a PR. - -### Quick Contribution Guide +PRs welcome. Three things that make a PR easy to land: -1. Fork the repository -2. Create a feature branch (`git checkout -b feature/amazing-feature`) -3. Commit your changes (`git commit -m 'feat: add amazing feature'`) -4. Push to the branch (`git push origin feature/amazing-feature`) -5. Open a Pull Request +1. `make check` passes locally. +2. Conventional Commits style (`feat:`, `fix:`, `docs:`, `chore:`). +3. One feature or fix per PR β€” split mixed work. -### Development Workflow - -- Use [Conventional Commits](https://www.conventionalcommits.org/) for commit messages -- Ensure `make check` passes before submitting PR -- Add tests for new functionality -- Update documentation as needed -- Keep PRs focused on a single feature/fix +For larger changes, open an issue first so we can agree on the shape +before code. --- -## Acknowledgments - -- **[Gotenberg](https://github.com/gotenberg/gotenberg)** - The original PDF generation API that inspired this project -- **[chromiumoxide](https://github.com/mattsse/chromiumoxide)** - Chrome DevTools Protocol client for Rust -- **[lopdf](https://github.com/Hopding/lopdf)** - Pure Rust PDF manipulation library -- **[Axum](https://github.com/tokio-rs/axum)** - Ergonomic HTTP server framework +## Acknowledgements ---- +- [Gotenberg](https://github.com/gotenberg/gotenberg) β€” the API contract Folio implements +- [chromiumoxide](https://github.com/mattsse/chromiumoxide) β€” Chrome DevTools Protocol client +- [lopdf](https://github.com/J-F-Liu/lopdf) β€” pure-Rust PDF manipulation +- [axum](https://github.com/tokio-rs/axum) β€” HTTP server ## License -Folio is licensed under the MIT License - see [LICENSE](LICENSE) for details. - ---- - -

- Built with ❀️ in Rust πŸ¦€
- Folio: A new page in PDF generation. -

+MIT. See [LICENSE](./LICENSE). diff --git a/bindings/CHROME_VERSION b/bindings/CHROME_VERSION new file mode 100644 index 0000000..ba102db --- /dev/null +++ b/bindings/CHROME_VERSION @@ -0,0 +1 @@ +131.0.6778.204 \ No newline at end of file diff --git a/bindings/README.md b/bindings/README.md new file mode 100644 index 0000000..b8d5e0d --- /dev/null +++ b/bindings/README.md @@ -0,0 +1,15 @@ +# Folio bindings + +This directory ships Folio as embeddable libraries. + +- `bindings/python/` β€” maturin project producing the `folio` PyPI package. +- `bindings/node/` β€” napi-rs project producing the `@folio/folio` npm package. +- `bindings/fixtures/` β€” shared HTML/Office fixtures used by tests. +- `CHROME_VERSION` β€” pinned Chrome-for-Testing version. Bumped per release. + +The Rust glue lives in `crates/py` and `crates/js`. The Folio engine +itself is unchanged; bindings reuse `crates/engine` plus the new +`engine::chrome_fetch` module. + +See `docs/superpowers/specs/2026-05-01-bindings-design.md` for the full +design (v1 + v2). diff --git a/bindings/fixtures/hello.html b/bindings/fixtures/hello.html new file mode 100644 index 0000000..66c352f --- /dev/null +++ b/bindings/fixtures/hello.html @@ -0,0 +1 @@ +

folio e2e

diff --git a/bindings/node/.gitignore b/bindings/node/.gitignore new file mode 100644 index 0000000..4505758 --- /dev/null +++ b/bindings/node/.gitignore @@ -0,0 +1,6 @@ +node_modules/ +*.node +dist/ +*.log +_native.js +_native.d.ts diff --git a/bindings/node/README.md b/bindings/node/README.md new file mode 100644 index 0000000..7a10d64 --- /dev/null +++ b/bindings/node/README.md @@ -0,0 +1,14 @@ +# @folio/folio + +Rust-native PDF conversion, embeddable in Node. See spec at +`docs/superpowers/specs/2026-05-01-bindings-design.md`. + + npm install @folio/folio + + import { Folio } from '@folio/folio'; + const f = await Folio.create(); + try { + const pdf = await f.htmlToPdf('

hi

'); + } finally { + await f.close(); + } diff --git a/bindings/node/index.d.ts b/bindings/node/index.d.ts new file mode 100644 index 0000000..c978e07 --- /dev/null +++ b/bindings/node/index.d.ts @@ -0,0 +1,31 @@ +/* tslint:disable */ +/* eslint-disable */ + +/* auto-generated by NAPI-RS */ + +/** Options passed to [`Folio::create`]. */ +export interface CreateOptions { + /** Which engines to enable. Defaults to `["chromium", "office"]`. */ + engines?: Array + /** Explicit path to a Chrome/Chromium executable. */ + chromePath?: string + /** Automatically download Chrome if no system Chrome is found. */ + autoDownloadChrome?: boolean + /** Directory used to cache downloaded Chrome binaries. */ + chromeCacheDir?: string +} +/** Async Folio client that wraps the PDF/document engines. */ +export declare class Folio { + /** Create a new Folio instance, launching the requested engines. */ + static create(opts?: CreateOptions | undefined | null): Promise + /** Convert an HTML string to a PDF buffer. */ + htmlToPdf(html: string, options?: Json | undefined | null): Promise + /** Convert a URL to a PDF buffer. */ + urlToPdf(url: string, options?: Json | undefined | null): Promise + /** Convert a Markdown string to a PDF buffer. */ + markdownToPdf(md: string, options?: Json | undefined | null): Promise + /** Convert an office document at `path` to a PDF buffer. */ + officeToPdf(path: string, options?: Json | undefined | null): Promise + /** Shut down the Folio instance and release resources. */ + close(): Promise +} diff --git a/bindings/node/index.js b/bindings/node/index.js new file mode 100644 index 0000000..ac7a4b5 --- /dev/null +++ b/bindings/node/index.js @@ -0,0 +1,60 @@ +'use strict'; + +const { Folio: NativeFolio } = require('./_native.js'); + +class FolioError extends Error { constructor(m){ super(m); this.name='FolioError'; } } +class ChromeNotFoundError extends FolioError { constructor(m){ super(m); this.name='ChromeNotFoundError'; } } +class ChromeFetchError extends FolioError { constructor(m){ super(m); this.name='ChromeFetchError'; } } +class ChromiumError extends FolioError { constructor(m){ super(m); this.name='ChromiumError'; } } +class OfficeError extends FolioError { constructor(m){ super(m); this.name='OfficeError'; } } +class EngineDisabledError extends FolioError { constructor(m){ super(m); this.name='EngineDisabledError'; } } +class TimeoutError extends FolioError { constructor(m){ super(m); this.name='TimeoutError'; } } +class ValidationError extends FolioError { constructor(m){ super(m); this.name='ValidationError'; } } + +const tagMap = { + ChromeNotFound: ChromeNotFoundError, + ChromeFetch: ChromeFetchError, + Chromium: ChromiumError, + Office: OfficeError, + EngineDisabled: EngineDisabledError, + Timeout: TimeoutError, + Validation: ValidationError, +}; + +function decorate(err) { + if (!(err instanceof Error)) return err; + const m = err.message || ''; + const match = m.match(/^\[(\w+)\]\s*(.*)$/); + if (!match) return err; + const Cls = tagMap[match[1]]; + if (!Cls) return err; + const decorated = new Cls(match[2]); + decorated.cause = err; + return decorated; +} + +function wrapMethod(fn) { + return async function(...args) { + try { return await fn.apply(this, args); } + catch (e) { throw decorate(e); } + }; +} + +class Folio { + constructor(inner) { this._inner = inner; } + static async create(opts) { + try { + const inner = await NativeFolio.create(opts); + return new Folio(inner); + } catch (e) { throw decorate(e); } + } +} +for (const m of ['htmlToPdf', 'urlToPdf', 'markdownToPdf', 'officeToPdf', 'close']) { + Folio.prototype[m] = wrapMethod(function(...args) { return this._inner[m](...args); }); +} + +module.exports = { + Folio, + FolioError, ChromeNotFoundError, ChromeFetchError, ChromiumError, + OfficeError, EngineDisabledError, TimeoutError, ValidationError, +}; diff --git a/bindings/node/package-lock.json b/bindings/node/package-lock.json new file mode 100644 index 0000000..3e25ac0 --- /dev/null +++ b/bindings/node/package-lock.json @@ -0,0 +1,1890 @@ +{ + "name": "@folio/folio", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@folio/folio", + "version": "0.1.0", + "license": "MIT", + "devDependencies": { + "@napi-rs/cli": "^2.18.0", + "@types/node": "^20.0.0", + "vitest": "^1.6.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", + "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz", + "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", + "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz", + "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", + "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", + "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", + "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", + "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", + "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", + "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", + "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", + "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", + "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", + "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", + "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", + "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", + "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", + "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", + "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", + "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", + "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", + "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", + "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@jest/schemas": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz", + "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@sinclair/typebox": "^0.27.8" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@napi-rs/cli": { + "version": "2.18.4", + "resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-2.18.4.tgz", + "integrity": "sha512-SgJeA4df9DE2iAEpr3M2H0OKl/yjtg1BnRI5/JyowS71tUWhrfSu2LT0V3vlHET+g1hBVlrO60PmEXwUEKp8Mg==", + "dev": true, + "license": "MIT", + "bin": { + "napi": "scripts/index.js" + }, + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz", + "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz", + "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz", + "integrity": "sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz", + "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz", + "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz", + "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz", + "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==", + "cpu": [ + "arm" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz", + "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==", + "cpu": [ + "arm" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz", + "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz", + "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz", + "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==", + "cpu": [ + "loong64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz", + "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz", + "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz", + "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz", + "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==", + "cpu": [ + "riscv64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz", + "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz", + "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==", + "cpu": [ + "s390x" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz", + "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz", + "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz", + "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz", + "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz", + "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz", + "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz", + "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz", + "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@sinclair/typebox": { + "version": "0.27.10", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.10.tgz", + "integrity": "sha512-MTBk/3jGLNB2tVxv6uLlFh1iu64iYOQ2PbdOSK3NW8JZsmlaOh2q6sdtKowBhfw8QFLmYNzTW4/oK4uATIi6ZA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "20.19.39", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.39.tgz", + "integrity": "sha512-orrrD74MBUyK8jOAD/r0+lfa1I2MO6I+vAkmAWzMYbCcgrN4lCrmK52gRFQq/JRxfYPfonkr4b0jcY7Olqdqbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/@vitest/expect": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-1.6.1.tgz", + "integrity": "sha512-jXL+9+ZNIJKruofqXuuTClf44eSpcHlgj3CiuNihUF3Ioujtmc0zIa3UJOW5RjDK1YLBJZnWBlPuqhYycLioog==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/spy": "1.6.1", + "@vitest/utils": "1.6.1", + "chai": "^4.3.10" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/runner": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-1.6.1.tgz", + "integrity": "sha512-3nSnYXkVkf3mXFfE7vVyPmi3Sazhb/2cfZGGs0JRzFsPFvAMBEcrweV1V1GsrstdXeKCTXlJbvnQwGWgEIHmOA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/utils": "1.6.1", + "p-limit": "^5.0.0", + "pathe": "^1.1.1" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/snapshot": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-1.6.1.tgz", + "integrity": "sha512-WvidQuWAzU2p95u8GAKlRMqMyN1yOJkGHnx3M1PL9Raf7AQ1kwLKg04ADlCa3+OXUZE7BceOhVZiuWAbzCKcUQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "magic-string": "^0.30.5", + "pathe": "^1.1.1", + "pretty-format": "^29.7.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/spy": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-1.6.1.tgz", + "integrity": "sha512-MGcMmpGkZebsMZhbQKkAf9CX5zGvjkBTqf8Zx3ApYWXr3wG+QvEu2eXWfnIIWYSJExIp4V9FCKDEeygzkYrXMw==", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyspy": "^2.2.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/utils": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-1.6.1.tgz", + "integrity": "sha512-jOrrUvXM4Av9ZWiG1EajNto0u96kWAhJ1LmPmJhXXQx/32MecEKd10pOLYgS2BQx1TgkGhloPU1ArDW2vvaY6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "diff-sequences": "^29.6.3", + "estree-walker": "^3.0.3", + "loupe": "^2.3.7", + "pretty-format": "^29.7.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/acorn": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.5", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.5.tgz", + "integrity": "sha512-HEHNfbars9v4pgpW6SO1KSPkfoS0xVOM/9UzkJltjlsHZmJasxg8aXkuZa7SMf8vKGIBhpUsPluQSqhJFCqebw==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/assertion-error": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", + "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/cac": { + "version": "6.7.14", + "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", + "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/chai": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/chai/-/chai-4.5.0.tgz", + "integrity": "sha512-RITGBfijLkBddZvnn8jdqoTypxvqbOLYQkGGxXzeFjVHvudaPw0HNFD9x928/eUwYWd2dPCugVqspGALTZZQKw==", + "dev": true, + "license": "MIT", + "dependencies": { + "assertion-error": "^1.1.0", + "check-error": "^1.0.3", + "deep-eql": "^4.1.3", + "get-func-name": "^2.0.2", + "loupe": "^2.3.6", + "pathval": "^1.1.1", + "type-detect": "^4.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/check-error": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz", + "integrity": "sha512-iKEoDYaRmd1mxM90a2OEfWhjsjPpYPuQ+lMYsoxB126+t8fw7ySEO48nmDg5COTjxDI65/Y2OWpeEHk3ZOe8zg==", + "dev": true, + "license": "MIT", + "dependencies": { + "get-func-name": "^2.0.2" + }, + "engines": { + "node": "*" + } + }, + "node_modules/confbox": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/confbox/-/confbox-0.1.8.tgz", + "integrity": "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/deep-eql": { + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-4.1.4.tgz", + "integrity": "sha512-SUwdGfqdKOwxCPeVYjwSyRpJ7Z+fhpwIAtmCUdZIWZ/YP5R9WAsyuSgpLVDi9bjWoN2LXHNss/dk3urXtdQxGg==", + "dev": true, + "license": "MIT", + "dependencies": { + "type-detect": "^4.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/diff-sequences": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", + "integrity": "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/esbuild": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", + "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.21.5", + "@esbuild/android-arm": "0.21.5", + "@esbuild/android-arm64": "0.21.5", + "@esbuild/android-x64": "0.21.5", + "@esbuild/darwin-arm64": "0.21.5", + "@esbuild/darwin-x64": "0.21.5", + "@esbuild/freebsd-arm64": "0.21.5", + "@esbuild/freebsd-x64": "0.21.5", + "@esbuild/linux-arm": "0.21.5", + "@esbuild/linux-arm64": "0.21.5", + "@esbuild/linux-ia32": "0.21.5", + "@esbuild/linux-loong64": "0.21.5", + "@esbuild/linux-mips64el": "0.21.5", + "@esbuild/linux-ppc64": "0.21.5", + "@esbuild/linux-riscv64": "0.21.5", + "@esbuild/linux-s390x": "0.21.5", + "@esbuild/linux-x64": "0.21.5", + "@esbuild/netbsd-x64": "0.21.5", + "@esbuild/openbsd-x64": "0.21.5", + "@esbuild/sunos-x64": "0.21.5", + "@esbuild/win32-arm64": "0.21.5", + "@esbuild/win32-ia32": "0.21.5", + "@esbuild/win32-x64": "0.21.5" + } + }, + "node_modules/estree-walker": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", + "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/execa": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/execa/-/execa-8.0.1.tgz", + "integrity": "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==", + "dev": true, + "license": "MIT", + "dependencies": { + "cross-spawn": "^7.0.3", + "get-stream": "^8.0.1", + "human-signals": "^5.0.0", + "is-stream": "^3.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^5.1.0", + "onetime": "^6.0.0", + "signal-exit": "^4.1.0", + "strip-final-newline": "^3.0.0" + }, + "engines": { + "node": ">=16.17" + }, + "funding": { + "url": "https://github.com/sindresorhus/execa?sponsor=1" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-func-name": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.2.tgz", + "integrity": "sha512-8vXOvuE167CtIc3OyItco7N/dpRtBbYOsPsXCz7X/PMnlGjYjSGuZJgM1Y7mmew7BKf9BqvLX2tnOVy1BBUsxQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/get-stream": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-8.0.1.tgz", + "integrity": "sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/human-signals": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-5.0.0.tgz", + "integrity": "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=16.17.0" + } + }, + "node_modules/is-stream": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-3.0.0.tgz", + "integrity": "sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/js-tokens": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", + "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/local-pkg": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/local-pkg/-/local-pkg-0.5.1.tgz", + "integrity": "sha512-9rrA30MRRP3gBD3HTGnC6cDFpaE1kVDWxWgqWJUN0RvDNAo+Nz/9GxB+nHOH0ifbVFy0hSA1V6vFDvnx54lTEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "mlly": "^1.7.3", + "pkg-types": "^1.2.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/loupe": { + "version": "2.3.7", + "resolved": "https://registry.npmjs.org/loupe/-/loupe-2.3.7.tgz", + "integrity": "sha512-zSMINGVYkdpYSOBmLi0D1Uo7JU9nVdQKrHxC8eYlV+9YKK9WePqAlL7lSlorG/U2Fw1w0hTBmaa/jrQ3UbPHtA==", + "dev": true, + "license": "MIT", + "dependencies": { + "get-func-name": "^2.0.1" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "dev": true, + "license": "MIT" + }, + "node_modules/mimic-fn": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-4.0.0.tgz", + "integrity": "sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mlly": { + "version": "1.8.2", + "resolved": "https://registry.npmjs.org/mlly/-/mlly-1.8.2.tgz", + "integrity": "sha512-d+ObxMQFmbt10sretNDytwt85VrbkhhUA/JBGm1MPaWJ65Cl4wOgLaB1NYvJSZ0Ef03MMEU/0xpPMXUIQ29UfA==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.16.0", + "pathe": "^2.0.3", + "pkg-types": "^1.3.1", + "ufo": "^1.6.3" + } + }, + "node_modules/mlly/node_modules/pathe": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", + "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", + "dev": true, + "license": "MIT" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/nanoid": { + "version": "3.3.12", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.12.tgz", + "integrity": "sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/npm-run-path": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-5.3.0.tgz", + "integrity": "sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^4.0.0" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm-run-path/node_modules/path-key": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-4.0.0.tgz", + "integrity": "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/onetime": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-6.0.0.tgz", + "integrity": "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "mimic-fn": "^4.0.0" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-limit": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-5.0.0.tgz", + "integrity": "sha512-/Eaoq+QyLSiXQ4lyYV23f14mZRQcXnxfHrN0vCai+ak9G0pp9iEQukIIZq5NccEvwRB8PUnZT0KsOoDCINS1qQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "yocto-queue": "^1.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/pathe": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/pathe/-/pathe-1.1.2.tgz", + "integrity": "sha512-whLdWMYL2TwI08hn8/ZqAbrVemu0LNaNNJZX73O6qaIdCTfXutsLhMkjdENX0qhsQ9uIimo4/aQOmXkoon2nDQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/pathval": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.1.tgz", + "integrity": "sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/pkg-types": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/pkg-types/-/pkg-types-1.3.1.tgz", + "integrity": "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "confbox": "^0.1.8", + "mlly": "^1.7.4", + "pathe": "^2.0.1" + } + }, + "node_modules/pkg-types/node_modules/pathe": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", + "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", + "dev": true, + "license": "MIT" + }, + "node_modules/postcss": { + "version": "8.5.13", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.13.tgz", + "integrity": "sha512-qif0+jGGZoLWdHey3UFHHWP0H7Gbmsk8T5VEqyYFbWqPr1XqvLGBbk/sl8V5exGmcYJklJOhOQq1pV9IcsiFag==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/pretty-format": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz", + "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jest/schemas": "^29.6.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", + "dev": true, + "license": "MIT" + }, + "node_modules/rollup": { + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.2.tgz", + "integrity": "sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.60.2", + "@rollup/rollup-android-arm64": "4.60.2", + "@rollup/rollup-darwin-arm64": "4.60.2", + "@rollup/rollup-darwin-x64": "4.60.2", + "@rollup/rollup-freebsd-arm64": "4.60.2", + "@rollup/rollup-freebsd-x64": "4.60.2", + "@rollup/rollup-linux-arm-gnueabihf": "4.60.2", + "@rollup/rollup-linux-arm-musleabihf": "4.60.2", + "@rollup/rollup-linux-arm64-gnu": "4.60.2", + "@rollup/rollup-linux-arm64-musl": "4.60.2", + "@rollup/rollup-linux-loong64-gnu": "4.60.2", + "@rollup/rollup-linux-loong64-musl": "4.60.2", + "@rollup/rollup-linux-ppc64-gnu": "4.60.2", + "@rollup/rollup-linux-ppc64-musl": "4.60.2", + "@rollup/rollup-linux-riscv64-gnu": "4.60.2", + "@rollup/rollup-linux-riscv64-musl": "4.60.2", + "@rollup/rollup-linux-s390x-gnu": "4.60.2", + "@rollup/rollup-linux-x64-gnu": "4.60.2", + "@rollup/rollup-linux-x64-musl": "4.60.2", + "@rollup/rollup-openbsd-x64": "4.60.2", + "@rollup/rollup-openharmony-arm64": "4.60.2", + "@rollup/rollup-win32-arm64-msvc": "4.60.2", + "@rollup/rollup-win32-ia32-msvc": "4.60.2", + "@rollup/rollup-win32-x64-gnu": "4.60.2", + "@rollup/rollup-win32-x64-msvc": "4.60.2", + "fsevents": "~2.3.2" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/siginfo": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", + "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", + "dev": true, + "license": "ISC" + }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/stackback": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", + "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", + "dev": true, + "license": "MIT" + }, + "node_modules/std-env": { + "version": "3.10.0", + "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", + "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==", + "dev": true, + "license": "MIT" + }, + "node_modules/strip-final-newline": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-3.0.0.tgz", + "integrity": "sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/strip-literal": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-2.1.1.tgz", + "integrity": "sha512-631UJ6O00eNGfMiWG78ck80dfBab8X6IVFB51jZK5Icd7XAs60Z5y7QdSd/wGIklnWvRbUNloVzhOKKmutxQ6Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "js-tokens": "^9.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/tinybench": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", + "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", + "dev": true, + "license": "MIT" + }, + "node_modules/tinypool": { + "version": "0.8.4", + "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-0.8.4.tgz", + "integrity": "sha512-i11VH5gS6IFeLY3gMBQ00/MmLncVP7JLXOw1vlgkytLmJK7QnEr7NXf0LBdxfmNPAeyetukOk0bOYrJrFGjYJQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tinyspy": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-2.2.1.tgz", + "integrity": "sha512-KYad6Vy5VDWV4GH3fjpseMQ/XU2BhIYP7Vzd0LG44qRWm/Yt2WCOTicFdvmgo6gWaqooMQCawTtILVQJupKu7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/type-detect": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.1.0.tgz", + "integrity": "sha512-Acylog8/luQ8L7il+geoSxhEkazvkslg7PSNKOX59mbB9cOveP5aq9h74Y7YU8yDpJwetzQQrfIwtf4Wp4LKcw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/ufo": { + "version": "1.6.4", + "resolved": "https://registry.npmjs.org/ufo/-/ufo-1.6.4.tgz", + "integrity": "sha512-JFNbkD1Svwe0KvGi8GOeLcP4kAWQ609twvCdcHxq1oSL8svv39ZuSvajcD8B+5D0eL4+s1Is2D/O6KN3qcTeRA==", + "dev": true, + "license": "MIT" + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/vite": { + "version": "5.4.21", + "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz", + "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "^0.21.3", + "postcss": "^8.4.43", + "rollup": "^4.20.0" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || >=20.0.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "sass-embedded": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.4.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + } + } + }, + "node_modules/vite-node": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-1.6.1.tgz", + "integrity": "sha512-YAXkfvGtuTzwWbDSACdJSg4A4DZiAqckWe90Zapc/sEX3XvHcw1NdurM/6od8J207tSDqNbSsgdCacBgvJKFuA==", + "dev": true, + "license": "MIT", + "dependencies": { + "cac": "^6.7.14", + "debug": "^4.3.4", + "pathe": "^1.1.1", + "picocolors": "^1.0.0", + "vite": "^5.0.0" + }, + "bin": { + "vite-node": "vite-node.mjs" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/vitest": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/vitest/-/vitest-1.6.1.tgz", + "integrity": "sha512-Ljb1cnSJSivGN0LqXd/zmDbWEM0RNNg2t1QW/XUhYl/qPqyu7CsqeWtqQXHVaJsecLPuDoak2oJcZN2QoRIOag==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/expect": "1.6.1", + "@vitest/runner": "1.6.1", + "@vitest/snapshot": "1.6.1", + "@vitest/spy": "1.6.1", + "@vitest/utils": "1.6.1", + "acorn-walk": "^8.3.2", + "chai": "^4.3.10", + "debug": "^4.3.4", + "execa": "^8.0.1", + "local-pkg": "^0.5.0", + "magic-string": "^0.30.5", + "pathe": "^1.1.1", + "picocolors": "^1.0.0", + "std-env": "^3.5.0", + "strip-literal": "^2.0.0", + "tinybench": "^2.5.1", + "tinypool": "^0.8.3", + "vite": "^5.0.0", + "vite-node": "1.6.1", + "why-is-node-running": "^2.2.2" + }, + "bin": { + "vitest": "vitest.mjs" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "@edge-runtime/vm": "*", + "@types/node": "^18.0.0 || >=20.0.0", + "@vitest/browser": "1.6.1", + "@vitest/ui": "1.6.1", + "happy-dom": "*", + "jsdom": "*" + }, + "peerDependenciesMeta": { + "@edge-runtime/vm": { + "optional": true + }, + "@types/node": { + "optional": true + }, + "@vitest/browser": { + "optional": true + }, + "@vitest/ui": { + "optional": true + }, + "happy-dom": { + "optional": true + }, + "jsdom": { + "optional": true + } + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/why-is-node-running": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", + "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", + "dev": true, + "license": "MIT", + "dependencies": { + "siginfo": "^2.0.0", + "stackback": "0.0.2" + }, + "bin": { + "why-is-node-running": "cli.js" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/yocto-queue": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", + "integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + } + } +} diff --git a/bindings/node/package.json b/bindings/node/package.json new file mode 100644 index 0000000..9df96b4 --- /dev/null +++ b/bindings/node/package.json @@ -0,0 +1,26 @@ +{ + "name": "@folio/folio", + "version": "0.1.0", + "description": "Folio: Rust-native PDF conversion, embeddable in Node.", + "main": "index.js", + "types": "index.d.ts", + "license": "MIT", + "engines": { "node": ">= 18" }, + "napi": { + "name": "folio-node", + "triples": { + "defaults": true, + "additional": ["aarch64-apple-darwin", "x86_64-unknown-linux-gnu", "aarch64-unknown-linux-gnu"] + } + }, + "scripts": { + "build": "napi build --platform --release --cargo-cwd ../../crates/js --cargo-name folio_js --js _native.js --dts _native.d.ts", + "build:debug": "napi build --platform --cargo-cwd ../../crates/js --cargo-name folio_js --js _native.js --dts _native.d.ts", + "test": "vitest run" + }, + "devDependencies": { + "@napi-rs/cli": "^2.18.0", + "@types/node": "^20.0.0", + "vitest": "^1.6.0" + } +} diff --git a/bindings/node/tests/e2e.test.mjs b/bindings/node/tests/e2e.test.mjs new file mode 100644 index 0000000..2bcd466 --- /dev/null +++ b/bindings/node/tests/e2e.test.mjs @@ -0,0 +1,36 @@ +import { describe, it, expect } from 'vitest'; +import { readFile } from 'node:fs/promises'; +import { fileURLToPath } from 'node:url'; +import { dirname, resolve } from 'node:path'; +import { Folio } from '../index.js'; + +const E2E = process.env.FOLIO_E2E === '1'; +const here = dirname(fileURLToPath(import.meta.url)); +const fixture = resolve(here, '../../fixtures/hello.html'); + +describe.skipIf(!E2E)('e2e', () => { + it('htmlToPdf', async () => { + const f = await Folio.create({ engines: ['chromium'] }); + try { + const html = await readFile(fixture, 'utf8'); + const pdf = await f.htmlToPdf(html); + expect(pdf.subarray(0, 4).toString()).toBe('%PDF'); + } finally { await f.close(); } + }, 120_000); + + it('urlToPdf', async () => { + const f = await Folio.create({ engines: ['chromium'] }); + try { + const pdf = await f.urlToPdf('about:blank'); + expect(pdf.subarray(0, 4).toString()).toBe('%PDF'); + } finally { await f.close(); } + }, 120_000); + + it('markdownToPdf', async () => { + const f = await Folio.create({ engines: ['chromium'] }); + try { + const pdf = await f.markdownToPdf('# hello'); + expect(pdf.subarray(0, 4).toString()).toBe('%PDF'); + } finally { await f.close(); } + }, 120_000); +}); diff --git a/bindings/node/tests/smoke.test.mjs b/bindings/node/tests/smoke.test.mjs new file mode 100644 index 0000000..677426a --- /dev/null +++ b/bindings/node/tests/smoke.test.mjs @@ -0,0 +1,38 @@ +import { describe, it, expect } from 'vitest'; +import { + Folio, + FolioError, + ChromeNotFoundError, + ChromeFetchError, + ChromiumError, + OfficeError, + EngineDisabledError, + TimeoutError, + ValidationError, +} from '../index.js'; + +describe('module exports', () => { + it('exposes Folio class with create static method', () => { + expect(typeof Folio.create).toBe('function'); + }); + + it('exposes Folio instance methods on prototype', () => { + for (const m of ['htmlToPdf', 'urlToPdf', 'markdownToPdf', 'officeToPdf', 'close']) { + expect(typeof Folio.prototype[m]).toBe('function'); + } + }); + + it('error subclasses extend FolioError', () => { + expect(new ChromeNotFoundError('x')).toBeInstanceOf(FolioError); + expect(new ChromeFetchError('x')).toBeInstanceOf(FolioError); + expect(new ChromiumError('x')).toBeInstanceOf(FolioError); + expect(new OfficeError('x')).toBeInstanceOf(FolioError); + expect(new EngineDisabledError('x')).toBeInstanceOf(FolioError); + expect(new TimeoutError('x')).toBeInstanceOf(FolioError); + expect(new ValidationError('x')).toBeInstanceOf(FolioError); + }); + + it('FolioError extends Error', () => { + expect(new FolioError('x')).toBeInstanceOf(Error); + }); +}); diff --git a/bindings/python/.gitignore b/bindings/python/.gitignore new file mode 100644 index 0000000..f1e48b8 --- /dev/null +++ b/bindings/python/.gitignore @@ -0,0 +1,6 @@ +.venv/ +__pycache__/ +*.so +*.pyd +dist/ +*.egg-info/ diff --git a/bindings/python/README.md b/bindings/python/README.md new file mode 100644 index 0000000..9add61f --- /dev/null +++ b/bindings/python/README.md @@ -0,0 +1,30 @@ +# folio (Python) + +Rust-native PDF conversion, embeddable. See spec at +`docs/superpowers/specs/2026-05-01-bindings-design.md`. + +## Install + + pip install folio + +## Quick start + + from folio import Folio + with Folio() as f: + pdf = f.html_to_pdf("

hi

") + open("out.pdf", "wb").write(pdf) + +## Async + + import asyncio + from folio import AsyncFolio + + async def main(): + f = await AsyncFolio.create() + try: + pdf = await f.html_to_pdf("

hi

") + finally: + await f.close() + return pdf + + asyncio.run(main()) diff --git a/bindings/python/folio/__init__.py b/bindings/python/folio/__init__.py new file mode 100644 index 0000000..f0e02a7 --- /dev/null +++ b/bindings/python/folio/__init__.py @@ -0,0 +1,26 @@ +"""Folio β€” Rust-native PDF conversion.""" +from ._native import ( + Folio, + AsyncFolio, + FolioError, + ChromeNotFoundError, + ChromeFetchError, + ChromiumError, + OfficeError, + EngineDisabledError, + TimeoutError, + ValidationError, +) + +__all__ = [ + "Folio", + "AsyncFolio", + "FolioError", + "ChromeNotFoundError", + "ChromeFetchError", + "ChromiumError", + "OfficeError", + "EngineDisabledError", + "TimeoutError", + "ValidationError", +] diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml new file mode 100644 index 0000000..3209eff --- /dev/null +++ b/bindings/python/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" + +[project] +name = "folio" +version = "0.1.0" +description = "Folio: Rust-native PDF conversion, embeddable in Python." +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Rust", + "License :: OSI Approved :: MIT License", +] + +[project.urls] +Homepage = "https://github.com/__deesh_reddy__/folio" +Repository = "https://github.com/__deesh_reddy__/folio" + +[tool.maturin] +manifest-path = "../../crates/py/Cargo.toml" +module-name = "folio._native" +features = ["pyo3/extension-module"] +python-source = "." +strip = true diff --git a/bindings/python/tests/test_e2e.py b/bindings/python/tests/test_e2e.py new file mode 100644 index 0000000..730700f --- /dev/null +++ b/bindings/python/tests/test_e2e.py @@ -0,0 +1,35 @@ +import os, pathlib, pytest +import folio + +E2E = os.environ.get("FOLIO_E2E") == "1" +pytestmark = pytest.mark.skipif(not E2E, reason="FOLIO_E2E not set") + +FIXTURE = pathlib.Path(__file__).resolve().parents[2] / "fixtures" / "hello.html" + +def test_html_to_pdf_sync(): + with folio.Folio(engines=["chromium"]) as f: + pdf = f.html_to_pdf(FIXTURE.read_text()) + assert pdf[:4] == b"%PDF" + +def test_url_to_pdf_sync(): + with folio.Folio(engines=["chromium"]) as f: + pdf = f.url_to_pdf("about:blank") + assert pdf[:4] == b"%PDF" + +def test_markdown_to_pdf_sync(): + with folio.Folio(engines=["chromium"]) as f: + pdf = f.markdown_to_pdf("# hello\n\nfolio e2e") + assert pdf[:4] == b"%PDF" + +import asyncio + +def test_html_to_pdf_async(): + async def run(): + f = await folio.AsyncFolio.create(engines=["chromium"]) + try: + pdf = await f.html_to_pdf(FIXTURE.read_text()) + finally: + await f.close() + return pdf + pdf = asyncio.run(run()) + assert pdf[:4] == b"%PDF" diff --git a/bindings/python/tests/test_smoke.py b/bindings/python/tests/test_smoke.py new file mode 100644 index 0000000..1882fe3 --- /dev/null +++ b/bindings/python/tests/test_smoke.py @@ -0,0 +1,38 @@ +import folio + +def test_module_exports(): + assert hasattr(folio, "Folio") + assert hasattr(folio, "AsyncFolio") + assert issubclass(folio.ChromeNotFoundError, folio.FolioError) + assert issubclass(folio.ChromiumError, folio.FolioError) + assert issubclass(folio.OfficeError, folio.FolioError) + assert issubclass(folio.ValidationError, folio.FolioError) + +def test_validation_error_class_exists(): + assert folio.ValidationError is not None + assert issubclass(folio.ValidationError, folio.FolioError) + +def test_folio_class_methods(): + # Don't instantiate (would launch Chrome). Just check the class exists. + assert hasattr(folio.Folio, "html_to_pdf") + assert hasattr(folio.Folio, "url_to_pdf") + assert hasattr(folio.Folio, "markdown_to_pdf") + assert hasattr(folio.Folio, "office_to_pdf") + assert hasattr(folio.Folio, "close") + assert hasattr(folio.Folio, "__enter__") + assert hasattr(folio.Folio, "__exit__") + +def test_async_folio_class_exists(): + assert hasattr(folio.AsyncFolio, "create") + assert hasattr(folio.AsyncFolio, "html_to_pdf") + assert hasattr(folio.AsyncFolio, "url_to_pdf") + assert hasattr(folio.AsyncFolio, "markdown_to_pdf") + assert hasattr(folio.AsyncFolio, "office_to_pdf") + assert hasattr(folio.AsyncFolio, "close") + +def test_async_folio_create_returns_coroutine(): + """AsyncFolio.create() must return an awaitable, not eagerly launch.""" + import folio, inspect + # Don't call it (would launch chrome). Just confirm it's a static method + # and the signature accepts our args. + assert callable(folio.AsyncFolio.create) diff --git a/comparison.md b/comparison.md new file mode 100644 index 0000000..d73a1c4 --- /dev/null +++ b/comparison.md @@ -0,0 +1,559 @@ +# Folio vs Gotenberg β€” In-Depth Feature Comparison + +> **Snapshot date:** 2026-05-01 +> **Folio commit:** `spec/operator-console` (HEAD: `209a444`) +> **Gotenberg snapshot:** vendored at `tmp/gotenberg/` +> **Companion:** `docs/markdown-plus.md` (the new Markdown variation +> referenced in this comparison's recommendations). + +This document is an audit, not a sales sheet. It records what each project +does *today*, what Folio has chosen not to do (deliberately or not), and +what is missing relative to Gotenberg parity. It is structured so that any +single section can be read in isolation by someone deciding whether Folio +is ready for their workload. + +--- + +## 0. TL;DR + +| Axis | Folio | Gotenberg | Verdict | +|-----------------------------------------|------------------------------------|------------------------------------|------------------------| +| Core conversions (HTML/URL/MD/Office) | βœ… Implemented | βœ… Implemented | **Parity** | +| Screenshot routes (PNG/JPEG/WebP) | βœ… Implemented | βœ… Implemented | **Parity** | +| PDF ops (merge/split/flatten/rotate/…) | βœ… Implemented (single backend) | βœ… Implemented (multi backend) | Folio behind on choice | +| PDF/A & PDF/UA | βœ… via Ghostscript | βœ… via LibreOffice + engines | Different paths, OK | +| Metadata read/write | βœ… | βœ… | **Parity** | +| Bookmarks read/write | βœ… | βœ… | **Parity** | +| Encrypt | βœ… | βœ… | **Parity** | +| Watermark / stamp | βœ… (watermark) / partial (stamp) | βœ… both | Folio behind on stamp | +| Webhook async delivery | 🚧 Scaffolded, callback TODO | βœ… Production-grade | **Folio missing** | +| Batch API | 🚧 Endpoints + worker, ZIP TODO | ❌ Not offered | Folio ahead (in spec) | +| Prometheus metrics | βœ… Rich set | βœ… Standard set | **Parity** | +| Structured logs | βœ… JSON/text + request IDs | βœ… slog | **Parity** | +| OpenTelemetry traces | βœ… OTLP HTTP | βœ… OTel SDK | **Parity** | +| Operator console (live UI) | βœ… Svelte SPA, SSE, charts | ❌ JSON only | **Folio ahead** | +| Auth (Basic) | βœ… | βœ… | **Parity** | +| TLS | ❌ (rely on reverse proxy) | βœ… (cert/key flags) | **Folio missing** | +| SSRF / download allow-deny | partial | βœ… rich | **Folio behind** | +| Multi-engine fallback per op | ❌ (lopdf only) | βœ… qpdf/pdfcpu/pdftk/exiftool | **Folio missing** | +| Python / Node bindings | ❌ Empty crates | ❌ Not offered | Both miss | +| CLI (convert/merge/split/…) | βœ… | ❌ Not offered | **Folio ahead** | +| Library (Rust crate) usage | βœ… | ❌ Server-only | **Folio ahead** | + +**Bottom line.** Folio reaches roughly **85% of Gotenberg's HTTP-surface +capability** while exceeding it on observability, in-process usage, and +CLI ergonomics. The remaining 15% β€” webhook callback delivery, multi-engine +fallback chains, TLS, fine-grained SSRF controls, advanced Chromium wait +conditions, the long tail of LibreOffice export filters β€” is what blocks a +clean drop-in replacement claim today. + +--- + +## 1. Architecture comparison + +### 1.1 Gotenberg +- **Language:** Go +- **Framework:** Echo HTTP, modular plugin system +- **Concurrency model:** Process pools per engine (Chromium / LibreOffice + supervised externally), goroutines per request +- **Rendering:** Each Chromium conversion launches/uses a managed Chrome + subprocess; LibreOffice spawns `soffice` per conversion +- **Deployment shape:** Container-only β€” the project is explicitly a + Docker product +- **Distribution:** Single binary inside a Debian image with all engines + preinstalled + +### 1.2 Folio +- **Language:** Rust +- **Framework:** axum / tower +- **Concurrency model:** Tokio tasks, semaphore-bounded; engines wrapped in + `SupervisedEngine` with lazy-start / idle-shutdown +- **Rendering:** Chromium via `chromiumoxide` (CDP) β€” Folio holds the + client; LibreOffice via `soffice` subprocess +- **Deployment shape:** Container *or* binary *or* Rust library *or* CLI +- **Distribution:** Multi-target Dockerfile (`folio`, `folio-chromium`, + `folio-libreoffice`, `folio-cloudrun`, `folio-lambda`) + +### 1.3 What this means in practice +Folio's choice to live as a *library* is the real architectural divergence +β€” it is a strict superset of "PDF microservice", whereas Gotenberg only +exists as the microservice form. That choice shapes a lot of what +follows: the supervised-engine wrapper, the operator console, the CLI all +flow from "we are not married to the HTTP surface." + +--- + +## 2. HTTP API comparison + +### 2.1 Endpoint matrix + +| Route | Folio | Gotenberg | Notes | +|---------------------------------------------------|-------|-----------|-------| +| `POST /forms/chromium/convert/url` | βœ… | βœ… | parity | +| `POST /forms/chromium/convert/html` | βœ… | βœ… | parity | +| `POST /forms/chromium/convert/markdown` | βœ… | βœ… | parity, see Β§3.4 | +| `POST /forms/chromium/screenshot/url` | βœ… | βœ… | parity | +| `POST /forms/chromium/screenshot/html` | βœ… | βœ… | parity | +| `POST /forms/chromium/screenshot/markdown` | βœ… | βœ… | parity | +| `POST /forms/libreoffice/convert` | βœ… | βœ… | parity, filter coverage differs (see Β§3.5) | +| `POST /forms/pdfengines/merge` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/split` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/flatten` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/convert` (PDF/A, PDF/UA) | βœ… | βœ… | different backend | +| `POST /forms/pdfengines/rotate` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/metadata/read` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/metadata/write` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/bookmarks/read` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/bookmarks/write` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/encrypt` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/embed` | ❌ | βœ… | **Folio missing** β€” attach files inside PDF | +| `POST /forms/pdfengines/watermark` | βœ… | βœ… | parity | +| `POST /forms/pdfengines/stamp` | 🚧 | βœ… | **Folio partial** β€” overlay-on-pages variant | +| `POST /forms/batch/submit` | 🚧 | ❌ | **Folio ahead in spec** | +| `GET /forms/batch/{id}/status` | 🚧 | ❌ | **Folio ahead in spec** | +| `GET /forms/batch/{id}/download` | 🚧 | ❌ | **Folio ahead in spec** | +| `GET /health` | βœ… | βœ… | parity | +| `GET /version` | βœ… | ❌ | **Folio ahead** (Gotenberg ships version on root) | +| `GET /prometheus/metrics` | βœ… | βœ… | parity | +| `GET /_/`, `/_/sse`, `/_/metrics.json` | βœ… | ❌ | **Folio ahead** β€” operator console | +| Webhook headers (`Webhook-Url`, etc.) | 🚧 | βœ… | callback delivery TODO in Folio | + +**Visible gaps in HTTP surface:** `embed`, full `stamp`, complete webhook +callback delivery, batch ZIP/merge output. Everything else exists. + +### 2.2 Request/response shape + +Gotenberg insists on multipart/form-data for *every* conversion. Folio +follows the same convention for all core routes β€” operators using +Gotenberg client SDKs (`gotenberg-php`, `gotenberg-js-client`, +`gotenberg-go-client`) can point at Folio with only a base-URL change for +the parity routes. This is a deliberate compatibility choice, not an +accident. + +--- + +## 3. Conversion engines, feature by feature + +### 3.1 Chromium β€” PDF generation + +| Feature | Folio | Gotenberg | Notes | +|-----------------------------------------|-------|-----------|-------| +| Paper size (named + custom WxH) | βœ… | βœ… | parity | +| Margins (per side, inches) | βœ… | βœ… | parity | +| Landscape | βœ… | βœ… | parity | +| Print background | βœ… | βœ… | parity | +| Omit background (transparency) | βœ… | βœ… | parity | +| Single-page mode | βœ… | βœ… | parity | +| Scale (0.1–2.0) | βœ… | βœ… | parity | +| Page ranges | βœ… | βœ… | parity | +| Custom header/footer HTML w/ tokens | βœ… | βœ… | parity | +| Prefer CSS page size | βœ… | βœ… | parity | +| Tagged PDF / outline | partial | βœ… | Folio passes flags but limited testing | +| Cookies (with sameSite) | βœ… | βœ… | parity | +| Extra HTTP headers (scoped) | partial | βœ… | Folio: flat headers; Gotenberg: regex scope | +| User-Agent override | βœ… | βœ… | parity | +| Emulated media type | βœ… | βœ… | parity | +| Emulated media features (color-scheme…) | ❌ | βœ… | **Folio missing** | + +### 3.2 Chromium β€” wait / failure conditions + +| Feature | Folio | Gotenberg | +|--------------------------------------------------|-------|-----------| +| `waitDelay` (fixed) | βœ… | βœ… | +| `waitForExpression` / custom JS predicate | partial | βœ… | +| `waitWindowStatus` | ❌ | βœ… | +| `waitForSelector` | ❌ | βœ… | +| `skipNetworkIdleEvent` | ❌ | βœ… | +| `skipNetworkAlmostIdleEvent` | ❌ | βœ… | +| `failOnHttpStatusCodes` | ❌ | βœ… | +| `failOnResourceHttpStatusCodes` | ❌ | βœ… | +| `ignoreResourceHttpStatusDomains` | ❌ | βœ… | +| `failOnResourceLoadingFailed` | ❌ | βœ… | +| `failOnConsoleExceptions` | ❌ | βœ… | + +This is the most concrete Chromium feature gap. Spec +(archived spec) already exists; it just hasn't +been implemented past the stub. **Recommendation:** prioritise. + +### 3.3 Chromium β€” Screenshots + +Both projects support PNG/JPEG/WebP, dimensions, JPEG quality, viewport +clipping, optimize-for-speed. **Parity.** The only gap is that Folio's +"capture beyond viewport" code path has fewer integration tests covered +than Gotenberg's. + +### 3.4 Markdown route + +Both implementations are minimal. Both produce a wrapped HTML document and +hand it to Chromium. Differences: + +- **Folio:** `pulldown_cmark` with `Options::all()` + a single embedded + `markdown.css`. No template injection point. +- **Gotenberg:** `gomarkdown` + `bluemonday` (sanitised HTML). Requires + the user to supply a wrapper HTML file (named `index.html` in the + multipart) that pulls the rendered Markdown in via a documented + mechanism, so the user can inject CSS/fonts/JS. + +Each has a different opinion: Folio is "we own the template, give us +markdown"; Gotenberg is "you own the template, give us markdown + a +template." + +This comparison's companion document `docs/markdown-plus.md` proposes a +**third route** that combines both philosophies plus front-matter, math, +mermaid, syntax highlighting, includes, and named themes. That work is +designed to ship alongside the existing route, not replace it. + +### 3.5 LibreOffice β€” input formats + +Both projects exercise LibreOffice's full ~100-format input matrix (DOC, +DOCX, ODT, ODS, ODP, XLS, XLSX, PPT, PPTX, RTF, CSV, EPUB, etc.). The +difference is in **export options**: + +| Export option | Folio | Gotenberg | +|---------------------------------------------|-------|-----------| +| Landscape | βœ… | βœ… | +| Native page ranges | partial | βœ… | +| Single-page mode (Calc/Sheet) | βœ… | βœ… | +| Password-protected input documents | ❌ | βœ… | +| Update indexes on conversion | ❌ | βœ… | +| Export form fields | ❌ | βœ… | +| Export bookmarks | partial | βœ… | +| Export notes / placeholders | ❌ | βœ… | +| Bookmarks β†’ PDF destinations | ❌ | βœ… | +| Image compression (lossless / JPEG quality) | ❌ | βœ… | +| Image resolution reduction | ❌ | βœ… | +| Viewer preferences (initial view, zoom…) | ❌ | βœ… | +| Native LibreOffice watermark | ❌ | βœ… | +| PDF/A-1b / 2b / 3b output | βœ… | βœ… | +| PDF/UA output | βœ… | βœ… | + +Spec (archived spec) lists most of these as +explicit TODOs. + +### 3.6 PDF engine ops + +Gotenberg's killer feature here is **per-operation engine selection with +fallback chains**: `qpdf β†’ pdfcpu β†’ pdftk` for merge, etc. If qpdf +chokes on a malformed PDF, pdfcpu retries transparently. Folio uses a +single backend (`lopdf`, pure Rust) for *every* op, which is operationally +simpler but means a malformed input has no recovery path other than +"return an error and let the caller deal with it." + +This is the largest pure-feature gap. Three options for closing it: + +- **(A) Re-implement engine fallback in Rust** by shelling out to qpdf / + pdfcpu / pdftk binaries. Cheapest. Loses some of the "no external tools" + posture but Folio already shells out to `soffice` and `gs`, so the + posture is already mixed. +- **(B) Stay single-backend and harden lopdf** β€” file upstream patches for + the malformed-input cases that arise. Highest engineering cost, slowest + return. +- **(C) Punt** β€” say in the README that Folio is "well-formed PDF only" + and let users pre-validate. Honest, but caps the addressable workload. + +Spec (archived spec) exists and points at (A). + +--- + +## 4. Async delivery β€” webhooks + +Gotenberg's webhook module is mature: middleware POSTs the produced file +to a user-supplied URL with retry logic, allow/deny lists (literal and +regex), private/public IP filtering for SSRF, configurable retry windows, +sync vs async modes. + +Folio has the **shape** of this β€” `Webhook-Url` and friends parse, +`crates/server/src/webhook/` exists, the worker runs β€” but the actual +callback delivery path is marked TODO. Until that lands, an operator +sending `Webhook-Url` headers will see a 202 and then... nothing. + +**Status:** spec (archived spec) is the source of truth; the +gap is implementation, not design. + +--- + +## 5. Batch API (Folio-only) + +Folio has a server-side batch surface that Gotenberg has no equivalent +for: submit a JSON manifest of N jobs, get back a `batch_id`, poll for +progress, download a ZIP when done. The endpoints exist; the worker runs; +ZIP packaging and per-item-failure semantics are TODO. + +This is a real differentiator, not just parity-plus. Worth finishing. + +--- + +## 6. Operator console (Folio-only) + +This is where Folio is unambiguously ahead. + +Gotenberg gives you `/health` (JSON) and `/prometheus/metrics` +(Prometheus text). That is the entire operability surface. To get any +actual visibility you wire it into Grafana yourself. + +Folio ships a Svelte SPA at `/_/` driven by Server-Sent Events that +shows, live, in one screen: + +- RPS, p95 latency, error %, in-flight count +- Per-route table (RPS, p50/p95/p99, error %, load %) +- Engine status (Chromium / LibreOffice up/down + restart count) +- Concurrency grid (active vs cap, with warn/crit thresholds) +- Throughput strip (30-min windowed RPS + p95 with SLA overlay) +- Activity strip (error % + queue depth) +- Resources (CPU %, memory MB) +- Active batches (progress + per-item state) +- Last-20 request log + last-10 error log + +The recent commit history (last 30 commits, all dashboard-focused) shows +this is the team's current focus and it is in active polish. + +This shifts the value proposition: Folio is not "Gotenberg in Rust", it +is "Gotenberg-compatible PDF service that you can run without immediately +needing a dashboards engineer." + +--- + +## 7. Configuration / CLI flags + +Gotenberg has a wide and stable flag surface (api, webhook, pdfengines, +prometheus, basic auth). Folio's flags cover the same axes but are +narrower: + +| Knob | Folio | Gotenberg | +|---------------------------------------------|-------|-----------| +| API port / bind / TLS | port + bind βœ…, TLS ❌ | βœ… | +| Body limit (multipart) | βœ… | βœ… | +| Per-request timeout | βœ… | βœ… | +| Root path (reverse-proxy mount) | ❌ | βœ… | +| Correlation ID header | βœ… | βœ… | +| Basic-auth user/pass (env) | βœ… | βœ… | +| Download allow/deny lists | partial | βœ… | +| Download deny private/public IPs | partial | βœ… | +| Download max retries | βœ… | βœ… | +| Disable downloads entirely | ❌ | βœ… | +| Enable debug route | ❌ | βœ… | +| Webhook allow/deny + SSRF filters | partial | βœ… | +| Webhook retry waits / counts / timeouts | partial | βœ… | +| Per-op engine selection (merge/split/…) | ❌ | βœ… | +| Disable specific PDF engine routes | ❌ | βœ… | +| Prometheus namespace / collect interval | partial | βœ… | +| Disable route telemetry | βœ… | βœ… | + +**Recommendation:** the gaps here are individually small; add them one +by one as `--root-path`, `--api-disable-debug`, `--api-disable-download`, +and SSRF flags. Spec (archived spec) already exists. + +--- + +## 8. Auth & security posture + +| Concern | Folio | Gotenberg | +|--------------------------------------------|-------|-----------| +| HTTP Basic Auth | βœ… | βœ… | +| Token / JWT auth | ❌ | ❌ | +| Per-route authorisation | ❌ | ❌ | +| TLS in-process | ❌ | βœ… | +| `file://` rejected on URL routes | βœ… | βœ… | +| SSRF: private IP block | partial | βœ… | +| SSRF: public IP block | ❌ | βœ… | +| Download URL allow/deny regex | ❌ | βœ… | +| Webhook URL allow/deny regex | partial | βœ… | +| Multipart body limit enforcement | βœ… | βœ… | +| Memory-safe core | βœ… (Rust) | ❌ (Go GC) | + +Folio's Rust core is a real security advantage at the parser level; +Gotenberg's mature SSRF/download/webhook filter stack is a real security +advantage at the network edge. They are not the same thing and Folio +should not pretend memory-safety substitutes for the network filters β€” +both matter. + +--- + +## 9. Observability + +| Surface | Folio | Gotenberg | +|--------------------------------------|-------|-----------| +| Structured logs (JSON / text) | βœ… | βœ… | +| Request ID propagation | βœ… | βœ… | +| Prometheus counters/histograms | βœ… | βœ… | +| OpenTelemetry traces | βœ… (OTLP HTTP) | βœ… | +| OpenTelemetry metrics | βœ… | βœ… | +| Live operator UI | βœ… | ❌ | +| SSE event stream | βœ… | ❌ | +| Per-engine health endpoint detail | βœ… (per-engine) | βœ… | + +**Parity, with Folio ahead on the live UI.** No gaps to call out here. + +--- + +## 10. Distribution surfaces + +| Surface | Folio | Gotenberg | +|----------------------------------|-------|-----------| +| HTTP server (Docker) | βœ… | βœ… | +| HTTP server (raw binary) | βœ… | ❌ (officially Docker-only) | +| CLI binary (`folio convert …`) | βœ… | ❌ | +| Rust library (in-process) | βœ… | ❌ | +| Python bindings | ❌ (placeholder) | ❌ | +| Node.js bindings | ❌ (placeholder) | ❌ | +| Cloud Run image | βœ… (`folio-cloudrun`) | ❌ | +| AWS Lambda image | βœ… (`folio-lambda`) | ❌ | +| Slim images (Chromium-only / LO-only) | βœ… | ❌ | + +Folio has done real work here that Gotenberg has explicitly said no to +(Gotenberg's stance is that it is a Docker product; everything else is +the user's problem). The *empty* Python/Node bindings undercut that +narrative β€” the placeholder crates (`crates/py/`, `crates/js/`) imply a +roadmap commitment that has no actual code. Either ship them or remove +the placeholders; the worst state is "empty crate that suggests a +feature." + +--- + +## 11. Test coverage + +- **Folio:** ~43 unit tests passing across types, engine, pdfops, routes; + ~25 BDD scenarios ported from Gotenberg (runner partially complete); + 5 e2e smoke tests; Docker-based PDF/A validation via verapdf. + `TEST_STATUS.md` and `TEST_ISSUES.md` are surprisingly honest about + what is and isn't passing. +- **Gotenberg:** mature integration test suite that has been running for + years; thousands of cumulative production deployments worth of + battle-testing. + +The maturity gap is real. Folio's BDD harness is the right move (re-using +Gotenberg's scenarios is the cheapest path to credibility), it just needs +to finish. + +--- + +## 12. What Folio did well, with credit + +- **Library-first architecture.** Being usable as a Rust crate, a CLI, + and a server is a substantial superset of Gotenberg's positioning, and + was clearly an early decision rather than a retrofit (the engine crates + have no axum imports). +- **Operator console.** The SSE-driven Svelte dashboard is a genuinely + better operator experience than Gotenberg's bare metrics endpoint. + This was the right thing to invest in last. +- **Supervised engines with lazy-start / idle-shutdown.** Memory profile + on idle should be substantially better than Gotenberg's eager + process-pool model β€” relevant for serverless deploys (Cloud Run / + Lambda images exist for a reason). +- **Atomic concurrency tracking** (commit `209a444`) over sampled + semaphore reads. Small fix, but it's the kind of correctness work that + shows the team has actually been driving the dashboard against real + load. +- **Honest test status docs.** `TEST_STATUS.md` and `TEST_ISSUES.md` + exist and are not propaganda. Easy to underestimate how rare this is. + +## 13. What Folio did not do, deliberately + +- **No multi-engine fallback** for PDF ops. Single backend (`lopdf`) + keeps the dependency surface small. Defensible until you hit the first + malformed-input bug report, at which point the answer becomes "punt or + shell out." Decide before users force the decision. +- **No batch-of-batches / DAG job system.** The batch API is a flat list + of jobs, not a workflow. This is the right call for a PDF service β€” + workflow tools belong elsewhere. +- **No template engine for Markdown.** The basic Markdown route does not + let users inject Liquid/Handlebars/etc. The companion proposal + (`docs/markdown-plus.md`) preserves this stance: front-matter + substitution only, no full templating. +- **No cross-request server-side state.** Includes resolve from the + upload only. This is a security posture, not laziness. + +## 14. What Folio did not do, but should + +In rough priority order (cheapest-impact-per-LOC first): + +1. **Finish webhook callback delivery** ((archived spec)). The + Async-202 path is half-built; finishing it unblocks Gotenberg client + compatibility. +2. **Wire advanced Chromium wait conditions** (spec 36): `waitForSelector`, + `waitWindowStatus`, `failOn*` family. Each is a single CDP call. +3. **Finish batch ZIP packaging + per-item failure semantics** + (spec 50-batch). The endpoints already exist; finishing them turns a + stub into a differentiator. +4. **Add `embed` + finish `stamp`** routes. Last gaps in the + `/forms/pdfengines/*` matrix. +5. **Implement `--root-path` and SSRF/download filter flags** + (spec 39). Small individual changes; collectively close the + security/operations gap. +6. **Decide on multi-engine PDF ops** (spec 38). Either ship qpdf/pdfcpu + shellout or commit to "well-formed PDFs only" in the README. Current + middle ground is the worst of both. +7. **Either ship the Python/Node bindings or remove the placeholder + crates.** Empty crates are a roadmap lie. +8. **Fill in LibreOffice export filters** (spec 37). Long tail; do as + user demand surfaces, not preemptively. +9. **Build Markdown+** (`docs/markdown-plus.md`). Net-new feature, not + Gotenberg parity, but uses the operator-console + observability + investment as a foundation. + +## 15. What Folio did not do, and arguably should not + +- **TLS in-process.** Use a reverse proxy. Adding TLS to the binary adds + cert rotation, OCSP stapling, ALPN β€” none of which Folio is positioned + to do better than nginx/Caddy/envoy. The current "not implemented" + status is correct; it should be made *explicit* in the README. +- **OAuth / JWT / RBAC.** PDF services are not where you want to be doing + identity. Stay with Basic Auth + reverse-proxy auth headers; document + the pattern. +- **A workflow / DAG engine on top of batch.** Out of scope. Forever. +- **A web-UI document editor.** Folio's UI is an operator console, not an + end-user product. The line should stay there. + +--- + +## 16. What we did vs what we did not β€” concise scorecard + +### Done +- Six Chromium routes (HTML/URL/Markdown Γ— convert+screenshot) +- LibreOffice convert route + 100+ input formats +- All standard PDF ops bar `embed` and full `stamp` +- PDF/A and PDF/UA via Ghostscript +- Bookmarks, metadata, encrypt +- HTTP Basic Auth +- Prometheus metrics + OpenTelemetry traces + structured logs +- Operator console (Svelte + SSE) β€” distinct lead over Gotenberg +- CLI with convert/merge/split/flatten/rotate/metadata +- Multi-target Docker images (full / chromium-only / lo-only / cloudrun / + lambda) +- Library usage as a Rust crate +- BDD test harness (in progress) + +### Not done +- Webhook callback delivery (scaffold only) +- Batch ZIP output / per-item failure semantics (scaffold only) +- `embed` route, full `stamp` route +- Advanced Chromium wait/fail conditions (spec 36) +- Long tail of LibreOffice export options (spec 37) +- Multi-engine PDF op fallback (spec 38) +- Several CLI flags (`--root-path`, full SSRF filters) (spec 39) +- Python and Node.js bindings (empty placeholder crates) +- Cookie/header-scope regex filtering on Chromium routes +- Emulated media features (color-scheme, prefers-reduced-motion) +- TLS in-process *(deliberately not done; document the choice)* + +### Should be added (new) +- **Markdown+** β€” see `docs/markdown-plus.md`. Builds on existing + Chromium pipeline; uses existing observability stack; ships standalone + without blocking on webhook/batch/bindings. +- **Stage-level histograms** for any multi-stage route (Markdown+ is the + obvious first user). Genuine new information, not just parity. +- **Operator console "Markdown+" panel**, conditionally rendered when + traffic exists. Avoids polluting empty deployments. + +### Should *not* be added +- TLS in-process +- Identity/RBAC inside Folio +- Workflow/DAG engine on top of batch +- A document editor +- A second Markdown route that is "just like the first but with an + option" β€” extension, not duplication + +--- + +*End of comparison. The companion proposal in `docs/markdown-plus.md` +implements the "should be added (new)" section's first item.* diff --git a/crates/engine/Cargo.toml b/crates/engine/Cargo.toml index 399103d..dc04673 100644 --- a/crates/engine/Cargo.toml +++ b/crates/engine/Cargo.toml @@ -9,6 +9,7 @@ description = "Folio core engine: ChromiumEngine, LibreOfficeEngine, PdfOperatio default = ["chromium", "libreoffice"] chromium = ["dep:chromiumoxide", "dep:futures-util", "dep:pulldown-cmark", "dep:urlencoding"] libreoffice = [] +chrome-fetch = ["chromium", "dep:reqwest", "dep:sha2", "dep:zip", "dep:flate2", "dep:tar", "dep:dirs", "dep:walkdir"] [dependencies] chromiumoxide = { workspace = true, optional = true } @@ -30,6 +31,15 @@ image = { workspace = true } # URL encoding for screenshot data URLs urlencoding = { version = "2", optional = true } +# Chrome auto-download (chrome-fetch feature) +reqwest = { workspace = true, optional = true } +sha2 = { workspace = true, optional = true } +zip = { workspace = true, optional = true } +flate2 = { workspace = true, optional = true } +tar = { workspace = true, optional = true } +dirs = { workspace = true, optional = true } +walkdir = { workspace = true, optional = true } + [dev-dependencies] axum = { workspace = true } proptest = { workspace = true } diff --git a/crates/engine/src/chrome_fetch/cache.rs b/crates/engine/src/chrome_fetch/cache.rs new file mode 100644 index 0000000..6107d6d --- /dev/null +++ b/crates/engine/src/chrome_fetch/cache.rs @@ -0,0 +1,71 @@ +//! Platform cache directory for downloaded Chrome builds. + +use std::path::{Path, PathBuf}; + +/// Default cache root for Folio's downloaded Chrome. +/// +/// - macOS: `~/Library/Caches/folio/chromium` +/// - Linux: `$XDG_CACHE_HOME/folio/chromium` (falls back to `~/.cache`) +/// - Windows: `%LOCALAPPDATA%\folio\chromium` +/// +/// Override via `FOLIO_CHROME_CACHE` env var; constructor argument wins +/// over both. +pub fn cache_dir() -> PathBuf { + if let Ok(env) = std::env::var("FOLIO_CHROME_CACHE") { + if !env.is_empty() { + return PathBuf::from(env); + } + } + let base = dirs::cache_dir().unwrap_or_else(|| PathBuf::from(".")); + base.join("folio").join("chromium") +} + +/// Returns `Some(path)` if a cached Chrome for `version` exists and the +/// executable is present. +pub fn cached_chrome(cache: &Path, version: &str) -> Option { + let exe = chrome_exe_path(&cache.join(version)); + if exe.exists() { Some(exe) } else { None } +} + +/// Path to the Chrome executable inside an extracted Chrome-for-Testing +/// distribution rooted at `dist`. +pub(crate) fn chrome_exe_path(dist: &Path) -> PathBuf { + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + dist.join("chrome-mac-arm64").join("Google Chrome for Testing.app") + .join("Contents/MacOS/Google Chrome for Testing") + } + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + { + dist.join("chrome-mac-x64").join("Google Chrome for Testing.app") + .join("Contents/MacOS/Google Chrome for Testing") + } + #[cfg(target_os = "linux")] + { + dist.join("chrome-linux64").join("chrome") + } + #[cfg(target_os = "windows")] + { + dist.join("chrome-win64").join("chrome.exe") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cache_dir_respects_env_override() { + // SAFETY: test mutates env in a non-overlapping way. + unsafe { std::env::set_var("FOLIO_CHROME_CACHE", "/tmp/folio-test-cache"); } + assert_eq!(cache_dir(), PathBuf::from("/tmp/folio-test-cache")); + // SAFETY: justified above. + unsafe { std::env::remove_var("FOLIO_CHROME_CACHE"); } + } + + #[test] + fn cached_chrome_none_when_dir_missing() { + let tmp = tempfile::tempdir().unwrap(); + assert!(cached_chrome(tmp.path(), "999.0.0.0").is_none()); + } +} diff --git a/crates/engine/src/chrome_fetch/detect.rs b/crates/engine/src/chrome_fetch/detect.rs new file mode 100644 index 0000000..125af26 --- /dev/null +++ b/crates/engine/src/chrome_fetch/detect.rs @@ -0,0 +1,109 @@ +//! Detect a usable system Chrome / Chromium executable. + +use std::path::PathBuf; + +/// Returns the first existing Chrome executable found via env vars, +/// `$PATH`, and platform-default install paths. Returns `None` if none +/// of the candidates resolve. +pub fn detect_system_chrome() -> Option { + detect_with( + std::env::var("BROWSER_PATH").ok().as_deref(), + std::env::var("CHROME_PATH").ok().as_deref(), + &|name| which::which(name).ok(), + &|p: &std::path::Path| p.exists(), + ) +} + +pub(crate) fn detect_with( + browser_path_env: Option<&str>, + chrome_path_env: Option<&str>, + path_lookup: &dyn Fn(&str) -> Option, + exists: &dyn Fn(&std::path::Path) -> bool, +) -> Option { + for env in [browser_path_env, chrome_path_env].into_iter().flatten() { + if !env.is_empty() { + let p = PathBuf::from(env); + if exists(&p) { + return Some(p); + } + } + } + for name in ["chromium-browser", "chromium", "google-chrome", "chrome"] { + if let Some(p) = path_lookup(name) { + return Some(p); + } + } + for candidate in platform_defaults() { + let p = PathBuf::from(candidate); + if exists(&p) { + return Some(p); + } + } + None +} + +#[cfg(target_os = "macos")] +fn platform_defaults() -> &'static [&'static str] { + &[ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + ] +} + +#[cfg(target_os = "linux")] +fn platform_defaults() -> &'static [&'static str] { + &[ + "/usr/bin/google-chrome", + "/usr/bin/chromium", + "/usr/bin/chromium-browser", + "/snap/bin/chromium", + ] +} + +#[cfg(target_os = "windows")] +fn platform_defaults() -> &'static [&'static str] { + &[ + r"C:\Program Files\Google\Chrome\Application\chrome.exe", + r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", + ] +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + #[test] + fn explicit_browser_path_wins_when_exists() { + let result = detect_with( + Some("/fake/chrome"), + None, + &|_| None, + &|p: &Path| p == Path::new("/fake/chrome"), + ); + assert_eq!(result, Some(PathBuf::from("/fake/chrome"))); + } + + #[test] + fn falls_back_to_path_lookup() { + let result = detect_with( + None, + None, + &|name| if name == "chromium" { Some(PathBuf::from("/usr/bin/chromium")) } else { None }, + &|_| false, + ); + assert_eq!(result, Some(PathBuf::from("/usr/bin/chromium"))); + } + + #[test] + fn returns_none_when_nothing_found() { + let result = detect_with(None, None, &|_| None, &|_| false); + assert_eq!(result, None); + } + + #[test] + fn empty_env_var_is_skipped() { + let result = detect_with(Some(""), None, &|_| None, &|_| false); + assert_eq!(result, None); + } +} diff --git a/crates/engine/src/chrome_fetch/download.rs b/crates/engine/src/chrome_fetch/download.rs new file mode 100644 index 0000000..06bd28b --- /dev/null +++ b/crates/engine/src/chrome_fetch/download.rs @@ -0,0 +1,200 @@ +//! Download a pinned Chrome-for-Testing build, verify, extract into the +//! cache. +//! +//! Manifest format: +//! https://github.com/GoogleChromeLabs/chrome-for-testing +//! +//! Per-version endpoint: +//! `https://googlechromelabs.github.io/chrome-for-testing/.json` + +use std::path::{Path, PathBuf}; + +use serde::Deserialize; +use sha2::{Digest, Sha256}; +use thiserror::Error; +use tokio::io::AsyncWriteExt; + +use super::cache::chrome_exe_path; + +/// Errors that can occur when fetching or preparing a Chrome-for-Testing binary. +#[derive(Debug, Error)] +pub enum ChromeFetchError { + /// System Chrome was not found and `auto_download` was disabled. + #[error("system Chrome not found and auto_download disabled")] + NotFoundAndDownloadDisabled, + /// The current platform is not supported by the Chrome-for-Testing manifest. + #[error("unsupported platform: {0}")] + UnsupportedPlatform(&'static str), + /// Fetching or parsing the per-version manifest failed. + #[error("manifest fetch failed: {0}")] + Manifest(String), + /// The manifest did not contain a download entry for this platform. + #[error("no download for platform '{0}' in manifest")] + NoPlatformInManifest(&'static str), + /// The HTTP download of the Chrome archive failed. + #[error("download failed: {0}")] + Download(String), + /// SHA-256 digest of the downloaded archive did not match the expected value. + #[error("checksum mismatch: expected {expected}, got {actual}")] + Checksum { + /// The expected hex digest from the manifest. + expected: String, + /// The actual hex digest computed from the downloaded file. + actual: String, + }, + /// Extracting the zip archive failed. + #[error("extract failed: {0}")] + Extract(String), + /// An underlying I/O error. + #[error("io: {0}")] + Io(#[from] std::io::Error), +} + +#[derive(Debug, Deserialize)] +struct VersionManifest { + downloads: Downloads, +} +#[derive(Debug, Deserialize)] +struct Downloads { + chrome: Vec, +} +#[derive(Debug, Deserialize)] +struct DownloadEntry { + platform: String, + url: String, +} + +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +const PLATFORM: &str = "mac-arm64"; +#[cfg(all(target_os = "macos", target_arch = "x86_64"))] +const PLATFORM: &str = "mac-x64"; +#[cfg(target_os = "linux")] +const PLATFORM: &str = "linux64"; +#[cfg(target_os = "windows")] +const PLATFORM: &str = "win64"; + +/// Download Chrome `version` into `cache_root//`. Returns the +/// path to the executable. +/// +/// Atomicity: download to `cache_root/.partial/`, extract there, +/// rename to `cache_root//` on success. +pub async fn download_chrome(cache_root: &Path, version: &str) -> Result { + let manifest = fetch_manifest(version).await?; + let entry = manifest.downloads.chrome.into_iter() + .find(|e| e.platform == PLATFORM) + .ok_or(ChromeFetchError::NoPlatformInManifest(PLATFORM))?; + + let dest = cache_root.join(version); + if dest.exists() { + return Ok(chrome_exe_path(&dest)); + } + tokio::fs::create_dir_all(cache_root).await?; + let staging = cache_root.join(format!("{version}.partial")); + if staging.exists() { + tokio::fs::remove_dir_all(&staging).await?; + } + tokio::fs::create_dir_all(&staging).await?; + + let archive = staging.join(archive_filename()); + download_to_file(&entry.url, &archive).await?; + extract_archive(&archive, &staging)?; + tokio::fs::rename(&staging, &dest).await?; + Ok(chrome_exe_path(&dest)) +} + +async fn fetch_manifest(version: &str) -> Result { + let url = format!( + "https://googlechromelabs.github.io/chrome-for-testing/{version}.json" + ); + let resp = reqwest::get(&url).await + .map_err(|e| ChromeFetchError::Manifest(e.to_string()))?; + if !resp.status().is_success() { + return Err(ChromeFetchError::Manifest(format!("HTTP {}", resp.status()))); + } + let text = resp.text().await.map_err(|e| ChromeFetchError::Manifest(e.to_string()))?; + serde_json::from_str(&text).map_err(|e| ChromeFetchError::Manifest(e.to_string())) +} + +async fn download_to_file(url: &str, dest: &Path) -> Result<(), ChromeFetchError> { + let resp = reqwest::get(url).await + .map_err(|e| ChromeFetchError::Download(e.to_string()))?; + if !resp.status().is_success() { + return Err(ChromeFetchError::Download(format!("HTTP {}", resp.status()))); + } + let bytes = resp.bytes().await + .map_err(|e| ChromeFetchError::Download(e.to_string()))?; + let mut file = tokio::fs::File::create(dest).await?; + file.write_all(&bytes).await?; + file.flush().await?; + Ok(()) +} + +#[allow(dead_code)] +fn verify_sha256(path: &Path, expected_hex: &str) -> Result<(), ChromeFetchError> { + let mut hasher = Sha256::new(); + let mut file = std::fs::File::open(path)?; + std::io::copy(&mut file, &mut hasher)?; + let actual = hex_lower(&hasher.finalize()); + if actual != expected_hex.to_ascii_lowercase() { + return Err(ChromeFetchError::Checksum { expected: expected_hex.into(), actual }); + } + Ok(()) +} + +fn hex_lower(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + s.push_str(&format!("{b:02x}")); + } + s +} + +fn archive_filename() -> &'static str { "chrome.zip" } + +fn extract_archive(archive: &Path, into: &Path) -> Result<(), ChromeFetchError> { + let file = std::fs::File::open(archive)?; + let mut zip = zip::ZipArchive::new(file) + .map_err(|e| ChromeFetchError::Extract(e.to_string()))?; + zip.extract(into).map_err(|e| ChromeFetchError::Extract(e.to_string()))?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + for entry in walkdir::WalkDir::new(into) { + let entry = entry.map_err(|e| ChromeFetchError::Extract(e.to_string()))?; + if entry.file_type().is_file() + && entry.file_name().to_string_lossy().contains("chrome") + { + let mut perm = entry.metadata() + .map_err(|e| ChromeFetchError::Extract(e.to_string()))? + .permissions(); + perm.set_mode(0o755); + let _ = std::fs::set_permissions(entry.path(), perm); + } + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hex_lower_pads_zeros() { + assert_eq!(hex_lower(&[0x0a, 0xff, 0x00]), "0aff00"); + } + + #[test] + fn manifest_deserializes() { + let json = r#"{ + "downloads": { + "chrome": [ + {"platform": "linux64", "url": "https://example.com/chrome.zip"}, + {"platform": "mac-arm64", "url": "https://example.com/mac.zip"} + ] + } + }"#; + let m: VersionManifest = serde_json::from_str(json).unwrap(); + assert_eq!(m.downloads.chrome.len(), 2); + } +} diff --git a/crates/engine/src/chrome_fetch/mod.rs b/crates/engine/src/chrome_fetch/mod.rs new file mode 100644 index 0000000..1b86da9 --- /dev/null +++ b/crates/engine/src/chrome_fetch/mod.rs @@ -0,0 +1,60 @@ +//! Detect and download Chrome / Chromium for embedded use. +//! +//! Bindings (`crates/py`, `crates/js`) call [`ensure_chrome`] which +//! returns a path to a usable Chrome executable, downloading a pinned +//! Chrome-for-Testing build into a platform cache directory if no system +//! Chrome is available. +//! +//! See `docs/superpowers/specs/2026-05-01-bindings-design.md` Β§ +//! "Chrome auto-download" for the contract. + +#![cfg(feature = "chrome-fetch")] + +mod detect; +mod download; +mod cache; + +pub use detect::detect_system_chrome; +pub use download::{download_chrome, ChromeFetchError}; +pub use cache::{cache_dir, cached_chrome}; + +use std::path::PathBuf; + +/// Pinned Chrome-for-Testing version. Bumped per Folio release. +/// Single source of truth: `bindings/CHROME_VERSION` mirrors this string. +pub const CHROME_VERSION: &str = include_str!("../../../../bindings/CHROME_VERSION"); + +/// Options controlling [`ensure_chrome`]. +#[derive(Debug, Clone)] +pub struct EnsureOptions { + /// Explicit path to a Chrome executable; skips all detection if set. + pub explicit: Option, + /// Override the platform cache directory used for downloaded binaries. + pub cache_dir: Option, + /// When `true`, download Chrome automatically if no system Chrome is found. + pub auto_download: bool, +} + +impl Default for EnsureOptions { + fn default() -> Self { + Self { explicit: None, cache_dir: None, auto_download: true } + } +} + +/// Returns a path to a usable Chrome. +pub async fn ensure_chrome(opts: &EnsureOptions) -> Result { + if let Some(p) = &opts.explicit { + return Ok(p.clone()); + } + if let Some(p) = detect_system_chrome() { + return Ok(p); + } + let cache = opts.cache_dir.clone().unwrap_or_else(cache_dir); + if let Some(p) = cached_chrome(&cache, CHROME_VERSION.trim()) { + return Ok(p); + } + if !opts.auto_download { + return Err(ChromeFetchError::NotFoundAndDownloadDisabled); + } + download_chrome(&cache, CHROME_VERSION.trim()).await +} diff --git a/crates/engine/src/lib.rs b/crates/engine/src/lib.rs index 04364db..33cd8b1 100644 --- a/crates/engine/src/lib.rs +++ b/crates/engine/src/lib.rs @@ -16,6 +16,9 @@ pub mod chromium; #[cfg(feature = "libreoffice")] pub mod libreoffice; +#[cfg(feature = "chrome-fetch")] +pub mod chrome_fetch; + pub use bookmarks::{Bookmark, read_bookmarks, write_bookmarks, flatten_bookmarks}; pub use encrypt::{EncryptionAlgorithm, Permissions, encrypt_pdf, decrypt_pdf, is_encrypted, qpdf_available as encrypt_qpdf_available}; pub use pdfa::{PdfAProfile, convert_to_pdfa, ghostscript_available, qpdf_available}; diff --git a/crates/js/Cargo.toml b/crates/js/Cargo.toml index e8608d2..e4a0ad3 100644 --- a/crates/js/Cargo.toml +++ b/crates/js/Cargo.toml @@ -9,4 +9,19 @@ description = "Folio Node.js bindings (napi-rs)" name = "folio_js" crate-type = ["cdylib"] +[features] +default = ["chromium", "libreoffice", "chrome-fetch"] +chromium = ["engine/chromium"] +libreoffice = ["engine/libreoffice"] +chrome-fetch = ["engine/chrome-fetch"] + [dependencies] +engine = { workspace = true } +napi = { workspace = true, features = ["napi8", "tokio_rt", "serde-json"] } +napi-derive = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true } + +[build-dependencies] +napi-build = { workspace = true } diff --git a/crates/js/build.rs b/crates/js/build.rs new file mode 100644 index 0000000..0f1b010 --- /dev/null +++ b/crates/js/build.rs @@ -0,0 +1,3 @@ +fn main() { + napi_build::setup(); +} diff --git a/crates/js/src/errors.rs b/crates/js/src/errors.rs new file mode 100644 index 0000000..97e49ca --- /dev/null +++ b/crates/js/src/errors.rs @@ -0,0 +1,34 @@ +//! Map `engine::EngineError` and `chrome_fetch::ChromeFetchError` to +//! tagged napi `Error` instances. The JS-side loader (Task 9) inspects +//! the `[Tag]` prefix on the message and raises a typed JS error subclass. + +use engine::EngineError; +use napi::{Error, Status}; + +/// Convert an [`EngineError`] to a napi [`Error`] with a tagged message. +pub fn engine_to_napi(err: EngineError) -> Error { + let (status, msg) = match err { + EngineError::ChromeNotFound { .. } => + (Status::GenericFailure, format!("[ChromeNotFound] {err}")), + EngineError::Timeout(_) => + (Status::GenericFailure, format!("[Timeout] {err}")), + EngineError::InvalidOption(_) | EngineError::InvalidPageRange(_) => + (Status::InvalidArg, format!("[Validation] {err}")), + EngineError::ChromeLaunch(_) | EngineError::Cdp(_) | EngineError::Navigation { .. } => + (Status::GenericFailure, format!("[Chromium] {err}")), + // No specific Office variant; route everything else to FolioError on the JS side. + _ => (Status::GenericFailure, err.to_string()), + }; + Error::new(status, msg) +} + +/// Convert a [`ChromeFetchError`] to a napi [`Error`] with a tagged message. +#[cfg(feature = "chrome-fetch")] +pub fn fetch_to_napi(err: engine::chrome_fetch::ChromeFetchError) -> Error { + use engine::chrome_fetch::ChromeFetchError as E; + let prefix = match err { + E::NotFoundAndDownloadDisabled => "[ChromeNotFound]", + _ => "[ChromeFetch]", + }; + Error::new(Status::GenericFailure, format!("{prefix} {err}")) +} diff --git a/crates/js/src/folio.rs b/crates/js/src/folio.rs new file mode 100644 index 0000000..9d77662 --- /dev/null +++ b/crates/js/src/folio.rs @@ -0,0 +1,256 @@ +//! `class Folio` β€” async Node.js facade over the engine. + +use std::sync::Arc; + +use napi::bindgen_prelude::*; +use napi_derive::napi; +use serde_json::Value as Json; + +#[cfg(feature = "chromium")] +use engine::ChromiumEngine; +#[cfg(feature = "libreoffice")] +use engine::LibreOfficeEngine; + +use crate::errors::engine_to_napi; +#[cfg(feature = "chrome-fetch")] +use crate::errors::fetch_to_napi; + +/// Options passed to [`Folio::create`]. +#[napi(object)] +pub struct CreateOptions { + /// Which engines to enable. Defaults to `["chromium", "office"]`. + pub engines: Option>, + /// Explicit path to a Chrome/Chromium executable. + pub chrome_path: Option, + /// Automatically download Chrome if no system Chrome is found. + pub auto_download_chrome: Option, + /// Directory used to cache downloaded Chrome binaries. + pub chrome_cache_dir: Option, +} + +/// Async Folio client that wraps the PDF/document engines. +#[napi] +pub struct Folio { + #[cfg(feature = "chromium")] + chromium: Option>, + #[cfg(feature = "libreoffice")] + libreoffice: Option>, +} + +#[napi] +impl Folio { + /// Create a new Folio instance, launching the requested engines. + #[napi(factory)] + pub async fn create(opts: Option) -> Result { + let opts = opts.unwrap_or(CreateOptions { + engines: None, + chrome_path: None, + auto_download_chrome: None, + chrome_cache_dir: None, + }); + let want = opts.engines.unwrap_or_else(|| vec!["chromium".into(), "office".into()]); + let want_chromium = want.iter().any(|s| s == "chromium"); + let want_office = want.iter().any(|s| s == "office" || s == "libreoffice"); + + #[cfg(feature = "chromium")] + let chromium = if want_chromium { + Some(Arc::new( + launch_chromium( + opts.chrome_path.as_deref(), + opts.auto_download_chrome.unwrap_or(true), + opts.chrome_cache_dir.as_deref(), + ) + .await?, + )) + } else { + None + }; + + #[cfg(feature = "libreoffice")] + let libreoffice = if want_office { + Some(Arc::new( + LibreOfficeEngine::discover().await.map_err(engine_to_napi)?, + )) + } else { + None + }; + + #[cfg(not(feature = "chromium"))] + let _ = ( + want_chromium, + opts.chrome_path, + opts.auto_download_chrome, + opts.chrome_cache_dir, + ); + #[cfg(not(feature = "libreoffice"))] + let _ = want_office; + + Ok(Folio { + #[cfg(feature = "chromium")] + chromium, + #[cfg(feature = "libreoffice")] + libreoffice, + }) + } + + /// Convert an HTML string to a PDF buffer. + #[napi] + pub async fn html_to_pdf(&self, html: String, options: Option) -> Result { + let opts: engine::PdfOptions = parse_opts(options)?; + #[cfg(feature = "chromium")] + { + let engine = self.chromium.clone().ok_or_else(|| { + Error::new( + Status::GenericFailure, + "[EngineDisabled] chromium engine not enabled", + ) + })?; + let ctx = engine::RequestContext::default(); + let bytes = engine + .html_to_pdf(&html, None, &opts, &ctx) + .await + .map_err(engine_to_napi)?; + Ok(bytes.into()) + } + #[cfg(not(feature = "chromium"))] + { + let _ = (html, opts); + Err(Error::new( + Status::GenericFailure, + "[EngineDisabled] chromium feature not compiled in", + )) + } + } + + /// Convert a URL to a PDF buffer. + #[napi] + pub async fn url_to_pdf(&self, url: String, options: Option) -> Result { + let opts: engine::PdfOptions = parse_opts(options)?; + #[cfg(feature = "chromium")] + { + let engine = self.chromium.clone().ok_or_else(|| { + Error::new( + Status::GenericFailure, + "[EngineDisabled] chromium engine not enabled", + ) + })?; + let ctx = engine::RequestContext::default(); + let bytes = engine + .url_to_pdf(&url, &opts, &ctx) + .await + .map_err(engine_to_napi)?; + Ok(bytes.into()) + } + #[cfg(not(feature = "chromium"))] + { + let _ = (url, opts); + Err(Error::new( + Status::GenericFailure, + "[EngineDisabled] chromium feature not compiled in", + )) + } + } + + /// Convert a Markdown string to a PDF buffer. + #[napi] + pub async fn markdown_to_pdf(&self, md: String, options: Option) -> Result { + let opts: engine::PdfOptions = parse_opts(options)?; + #[cfg(feature = "chromium")] + { + let engine = self.chromium.clone().ok_or_else(|| { + Error::new( + Status::GenericFailure, + "[EngineDisabled] chromium engine not enabled", + ) + })?; + let ctx = engine::RequestContext::default(); + let bytes = engine + .markdown_to_pdf(&md, &opts, &ctx) + .await + .map_err(engine_to_napi)?; + Ok(bytes.into()) + } + #[cfg(not(feature = "chromium"))] + { + let _ = (md, opts); + Err(Error::new( + Status::GenericFailure, + "[EngineDisabled] chromium feature not compiled in", + )) + } + } + + /// Convert an office document at `path` to a PDF buffer. + #[napi] + pub async fn office_to_pdf(&self, path: String, options: Option) -> Result { + let opts: engine::OfficeOptions = parse_opts(options)?; + #[cfg(feature = "libreoffice")] + { + let engine = self.libreoffice.clone().ok_or_else(|| { + Error::new( + Status::GenericFailure, + "[EngineDisabled] libreoffice engine not enabled", + ) + })?; + let p = std::path::PathBuf::from(path); + let bytes = engine.convert(&p, &opts).await.map_err(engine_to_napi)?; + Ok(bytes.into()) + } + #[cfg(not(feature = "libreoffice"))] + { + let _ = (path, opts); + Err(Error::new( + Status::GenericFailure, + "[EngineDisabled] libreoffice feature not compiled in", + )) + } + } + + /// Shut down the Folio instance and release resources. + #[napi] + pub async fn close(&self) -> Result<()> { + Ok(()) + } +} + +fn parse_opts(v: Option) -> Result { + match v { + None => Ok(T::default()), + Some(j) => serde_json::from_value(j).map_err(|e| { + Error::new( + Status::InvalidArg, + format!("[Validation] invalid options: {e}"), + ) + }), + } +} + +#[cfg(feature = "chromium")] +async fn launch_chromium( + chrome_path: Option<&str>, + auto_download: bool, + cache_dir: Option<&str>, +) -> Result { + let executable: Option = match chrome_path { + Some(p) => Some(p.into()), + None => { + #[cfg(feature = "chrome-fetch")] + { + let opts = engine::chrome_fetch::EnsureOptions { + explicit: None, + cache_dir: cache_dir.map(Into::into), + auto_download, + }; + Some(engine::chrome_fetch::ensure_chrome(&opts).await.map_err(fetch_to_napi)?) + } + #[cfg(not(feature = "chrome-fetch"))] + { + let _ = (auto_download, cache_dir); + None + } + } + }; + let mut cfg = engine::BrowserConfig::default(); + cfg.executable = executable; + engine::ChromiumEngine::launch_with(cfg).await.map_err(engine_to_napi) +} diff --git a/crates/js/src/lib.rs b/crates/js/src/lib.rs index b93cf3f..6c0e600 100644 --- a/crates/js/src/lib.rs +++ b/crates/js/src/lib.rs @@ -1,14 +1,6 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} +//! Folio Node.js bindings β€” see `bindings/node/README.md`. -#[cfg(test)] -mod tests { - use super::*; +mod errors; +mod folio; - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +pub use folio::{CreateOptions, Folio}; diff --git a/crates/py/Cargo.toml b/crates/py/Cargo.toml index 5c6909e..cb3f68d 100644 --- a/crates/py/Cargo.toml +++ b/crates/py/Cargo.toml @@ -6,7 +6,21 @@ license.workspace = true description = "Folio Python bindings (PyO3)" [lib] -name = "folio_py" +name = "_native" crate-type = ["cdylib"] +[features] +default = ["chromium", "libreoffice", "chrome-fetch"] +chromium = ["engine/chromium"] +libreoffice = ["engine/libreoffice"] +chrome-fetch = ["engine/chrome-fetch"] + [dependencies] +engine = { workspace = true } +pyo3 = { workspace = true, features = ["extension-module", "abi3-py38"] } +pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"] } +tokio = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +parking_lot = "0.12" diff --git a/crates/py/src/errors.rs b/crates/py/src/errors.rs new file mode 100644 index 0000000..b5204e3 --- /dev/null +++ b/crates/py/src/errors.rs @@ -0,0 +1,54 @@ +//! Map `engine::EngineError` into a Python exception hierarchy. + +use engine::EngineError; +use pyo3::create_exception; +use pyo3::exceptions::PyException; +use pyo3::prelude::*; + +create_exception!(_native, FolioError, PyException); +create_exception!(_native, ChromeNotFoundError, FolioError); +create_exception!(_native, ChromeFetchError, FolioError); +create_exception!(_native, ChromiumError, FolioError); +create_exception!(_native, OfficeError, FolioError); +create_exception!(_native, EngineDisabledError, FolioError); +create_exception!(_native, TimeoutError, FolioError); +create_exception!(_native, ValidationError, FolioError); + +pub fn engine_to_py(err: EngineError) -> PyErr { + match err { + EngineError::ChromeNotFound { .. } => ChromeNotFoundError::new_err(err.to_string()), + EngineError::Timeout(_) => TimeoutError::new_err(err.to_string()), + EngineError::InvalidOption(_) | EngineError::InvalidPageRange(_) => { + ValidationError::new_err(err.to_string()) + } + EngineError::ChromeLaunch(_) | EngineError::Cdp(_) | EngineError::Navigation { .. } => { + ChromiumError::new_err(err.to_string()) + } + // No specific Office variant in EngineError; office failures surface as + // Internal / Io / Pdf depending on what failed. Route everything else + // to the generic FolioError. If finer routing matters later, the engine + // can grow an Office variant. + _ => FolioError::new_err(err.to_string()), + } +} + +#[cfg(feature = "chrome-fetch")] +pub fn fetch_to_py(err: engine::chrome_fetch::ChromeFetchError) -> PyErr { + use engine::chrome_fetch::ChromeFetchError as E; + match err { + E::NotFoundAndDownloadDisabled => ChromeNotFoundError::new_err(err.to_string()), + _ => ChromeFetchError::new_err(err.to_string()), + } +} + +pub fn register(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("FolioError", py.get_type_bound::())?; + m.add("ChromeNotFoundError", py.get_type_bound::())?; + m.add("ChromeFetchError", py.get_type_bound::())?; + m.add("ChromiumError", py.get_type_bound::())?; + m.add("OfficeError", py.get_type_bound::())?; + m.add("EngineDisabledError", py.get_type_bound::())?; + m.add("TimeoutError", py.get_type_bound::())?; + m.add("ValidationError", py.get_type_bound::())?; + Ok(()) +} diff --git a/crates/py/src/folio_async.rs b/crates/py/src/folio_async.rs new file mode 100644 index 0000000..bcdf6c2 --- /dev/null +++ b/crates/py/src/folio_async.rs @@ -0,0 +1,218 @@ +//! `class AsyncFolio` β€” async facade returning Python awaitables. +//! Engine futures are bridged to the caller's running event loop via +//! `pyo3_async_runtimes::tokio::future_into_py`. + +use std::sync::Arc; + +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict}; +use pyo3_async_runtimes::tokio::future_into_py; + +#[cfg(feature = "chromium")] +use engine::ChromiumEngine; +#[cfg(feature = "libreoffice")] +use engine::LibreOfficeEngine; + +use crate::errors::{engine_to_py, EngineDisabledError}; +use crate::types::from_py; + +#[pyclass(name = "AsyncFolio", module = "folio")] +pub struct AsyncFolio { + #[cfg(feature = "chromium")] + chromium: Option>, + #[cfg(feature = "libreoffice")] + libreoffice: Option>, +} + +#[pymethods] +impl AsyncFolio { + #[staticmethod] + #[pyo3(signature = (engines = None, chrome_path = None, auto_download_chrome = true, chrome_cache_dir = None))] + fn create<'py>( + py: Python<'py>, + engines: Option>, + chrome_path: Option, + auto_download_chrome: bool, + chrome_cache_dir: Option, + ) -> PyResult> { + let want = engines.unwrap_or_else(|| vec!["chromium".into(), "office".into()]); + let want_chromium = want.iter().any(|s| s == "chromium"); + let want_office = want.iter().any(|s| s == "office" || s == "libreoffice"); + future_into_py(py, async move { + #[cfg(feature = "chromium")] + let chromium = if want_chromium { + Some(Arc::new( + crate::launch::launch_chromium( + chrome_path.as_deref(), + auto_download_chrome, + chrome_cache_dir.as_deref(), + ) + .await?, + )) + } else { + None + }; + #[cfg(feature = "libreoffice")] + let libreoffice = if want_office { + Some(Arc::new(crate::launch::launch_libreoffice().await?)) + } else { + None + }; + #[cfg(not(feature = "chromium"))] + let _ = (want_chromium, chrome_path, auto_download_chrome, chrome_cache_dir); + #[cfg(not(feature = "libreoffice"))] + let _ = want_office; + Python::with_gil(|py| { + Ok::( + Py::new( + py, + AsyncFolio { + #[cfg(feature = "chromium")] + chromium, + #[cfg(feature = "libreoffice")] + libreoffice, + }, + )? + .into_any() + .into(), + ) + }) + }) + } + + fn html_to_pdf<'py>( + &self, + py: Python<'py>, + html: String, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::PdfOptions = from_py(options)?; + #[cfg(feature = "chromium")] + { + let engine = self + .chromium + .clone() + .ok_or_else(|| EngineDisabledError::new_err("chromium engine not enabled"))?; + future_into_py(py, async move { + let ctx = engine::RequestContext::default(); + let bytes = engine + .html_to_pdf(&html, None, &opts, &ctx) + .await + .map_err(engine_to_py)?; + Python::with_gil(|py| Ok::(PyBytes::new_bound(py, &bytes).into())) + }) + } + #[cfg(not(feature = "chromium"))] + { + let _ = (html, opts); + Err(EngineDisabledError::new_err("chromium feature not compiled in")) + } + } + + fn url_to_pdf<'py>( + &self, + py: Python<'py>, + url: String, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::PdfOptions = from_py(options)?; + #[cfg(feature = "chromium")] + { + let engine = self + .chromium + .clone() + .ok_or_else(|| EngineDisabledError::new_err("chromium engine not enabled"))?; + future_into_py(py, async move { + let ctx = engine::RequestContext::default(); + let bytes = engine + .url_to_pdf(&url, &opts, &ctx) + .await + .map_err(engine_to_py)?; + Python::with_gil(|py| Ok::(PyBytes::new_bound(py, &bytes).into())) + }) + } + #[cfg(not(feature = "chromium"))] + { + let _ = (url, opts); + Err(EngineDisabledError::new_err("chromium feature not compiled in")) + } + } + + fn markdown_to_pdf<'py>( + &self, + py: Python<'py>, + md: String, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::PdfOptions = from_py(options)?; + #[cfg(feature = "chromium")] + { + let engine = self + .chromium + .clone() + .ok_or_else(|| EngineDisabledError::new_err("chromium engine not enabled"))?; + future_into_py(py, async move { + let ctx = engine::RequestContext::default(); + let bytes = engine + .markdown_to_pdf(&md, &opts, &ctx) + .await + .map_err(engine_to_py)?; + Python::with_gil(|py| Ok::(PyBytes::new_bound(py, &bytes).into())) + }) + } + #[cfg(not(feature = "chromium"))] + { + let _ = (md, opts); + Err(EngineDisabledError::new_err("chromium feature not compiled in")) + } + } + + fn office_to_pdf<'py>( + &self, + py: Python<'py>, + path: String, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::OfficeOptions = from_py(options)?; + #[cfg(feature = "libreoffice")] + { + let engine = self + .libreoffice + .clone() + .ok_or_else(|| EngineDisabledError::new_err("libreoffice engine not enabled"))?; + future_into_py(py, async move { + let p = std::path::PathBuf::from(path); + let bytes = engine.convert(&p, &opts).await.map_err(engine_to_py)?; + Python::with_gil(|py| Ok::(PyBytes::new_bound(py, &bytes).into())) + }) + } + #[cfg(not(feature = "libreoffice"))] + { + let _ = (path, opts); + Err(EngineDisabledError::new_err("libreoffice feature not compiled in")) + } + } + + fn close<'py>(&self, py: Python<'py>) -> PyResult> { + future_into_py(py, async move { + Python::with_gil(|py| Ok::(py.None())) + }) + } + + fn __aenter__<'py>(slf: Py, py: Python<'py>) -> PyResult> { + // Return an awaitable that resolves to self. + future_into_py(py, async move { + Python::with_gil(|py| Ok::(slf.into_any().into_py(py))) + }) + } + + fn __aexit__<'py>( + &self, + py: Python<'py>, + _t: PyObject, + _v: PyObject, + _tb: PyObject, + ) -> PyResult> { + self.close(py) + } +} diff --git a/crates/py/src/folio_sync.rs b/crates/py/src/folio_sync.rs new file mode 100644 index 0000000..b5ba9be --- /dev/null +++ b/crates/py/src/folio_sync.rs @@ -0,0 +1,214 @@ +//! `class Folio` β€” sync facade over the engine using a shared tokio runtime. + +use std::sync::Arc; + +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict}; + +#[cfg(feature = "chromium")] +use engine::ChromiumEngine; +#[cfg(feature = "libreoffice")] +use engine::LibreOfficeEngine; + +use crate::errors::{engine_to_py, EngineDisabledError}; +use crate::runtime::runtime; +use crate::types::from_py; + +pub(crate) struct State { + #[cfg(feature = "chromium")] + pub chromium: Option>, + #[cfg(feature = "libreoffice")] + pub libreoffice: Option>, + pub closed: bool, +} + +#[pyclass(name = "Folio", module = "folio")] +pub struct Folio { + pub(crate) inner: parking_lot::Mutex, +} + +#[pymethods] +impl Folio { + #[new] + #[pyo3(signature = (engines = None, chrome_path = None, auto_download_chrome = true, chrome_cache_dir = None))] + fn new( + py: Python<'_>, + engines: Option>, + chrome_path: Option, + auto_download_chrome: bool, + chrome_cache_dir: Option, + ) -> PyResult { + let want = engines.unwrap_or_else(|| vec!["chromium".into(), "office".into()]); + let want_chromium = want.iter().any(|s| s == "chromium"); + let want_office = want.iter().any(|s| s == "office" || s == "libreoffice"); + + py.allow_threads(|| -> PyResult { + runtime().block_on(async move { + #[cfg(feature = "chromium")] + let chromium = if want_chromium { + Some(Arc::new( + crate::launch::launch_chromium( + chrome_path.as_deref(), + auto_download_chrome, + chrome_cache_dir.as_deref(), + ) + .await?, + )) + } else { + None + }; + + #[cfg(feature = "libreoffice")] + let libreoffice = if want_office { + Some(Arc::new(crate::launch::launch_libreoffice().await?)) + } else { + None + }; + + #[cfg(not(feature = "chromium"))] + let _ = (want_chromium, chrome_path, auto_download_chrome, chrome_cache_dir); + #[cfg(not(feature = "libreoffice"))] + let _ = want_office; + + Ok(Folio { + inner: parking_lot::Mutex::new(State { + #[cfg(feature = "chromium")] + chromium, + #[cfg(feature = "libreoffice")] + libreoffice, + closed: false, + }), + }) + }) + }) + } + + fn html_to_pdf<'py>( + &self, + py: Python<'py>, + html: &str, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::PdfOptions = from_py(options)?; + let engine = self.chromium_or_err()?; + let html = html.to_string(); + let ctx = engine::RequestContext::default(); + let bytes = py + .allow_threads(|| { + runtime().block_on(async move { + engine.html_to_pdf(&html, None, &opts, &ctx).await + }) + }) + .map_err(engine_to_py)?; + Ok(PyBytes::new_bound(py, &bytes)) + } + + fn url_to_pdf<'py>( + &self, + py: Python<'py>, + url: &str, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::PdfOptions = from_py(options)?; + let engine = self.chromium_or_err()?; + let url = url.to_string(); + let ctx = engine::RequestContext::default(); + let bytes = py + .allow_threads(|| { + runtime().block_on(async move { engine.url_to_pdf(&url, &opts, &ctx).await }) + }) + .map_err(engine_to_py)?; + Ok(PyBytes::new_bound(py, &bytes)) + } + + fn markdown_to_pdf<'py>( + &self, + py: Python<'py>, + md: &str, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::PdfOptions = from_py(options)?; + let engine = self.chromium_or_err()?; + let md = md.to_string(); + let ctx = engine::RequestContext::default(); + let bytes = py + .allow_threads(|| { + runtime() + .block_on(async move { engine.markdown_to_pdf(&md, &opts, &ctx).await }) + }) + .map_err(engine_to_py)?; + Ok(PyBytes::new_bound(py, &bytes)) + } + + fn office_to_pdf<'py>( + &self, + py: Python<'py>, + path: &str, + options: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let opts: engine::OfficeOptions = from_py(options)?; + let engine = self.office_or_err()?; + let p = std::path::PathBuf::from(path); + let bytes = py + .allow_threads(|| { + runtime().block_on(async move { engine.convert(&p, &opts).await }) + }) + .map_err(engine_to_py)?; + Ok(PyBytes::new_bound(py, &bytes)) + } + + fn close(&self, py: Python<'_>) -> PyResult<()> { + py.allow_threads(|| { + let mut state = self.inner.lock(); + if state.closed { + return Ok(()); + } + state.closed = true; + Ok::<(), PyErr>(()) + }) + } + + fn __enter__(slf: Py) -> Py { + slf + } + + fn __exit__( + &self, + py: Python<'_>, + _t: PyObject, + _v: PyObject, + _tb: PyObject, + ) -> PyResult<()> { + self.close(py) + } +} + +impl Folio { + #[cfg(feature = "chromium")] + fn chromium_or_err(&self) -> PyResult> { + self.inner.lock().chromium.clone().ok_or_else(|| { + EngineDisabledError::new_err("chromium engine not enabled for this Folio instance") + }) + } + + #[cfg(not(feature = "chromium"))] + fn chromium_or_err(&self) -> PyResult<()> { + Err(EngineDisabledError::new_err( + "chromium feature not compiled in", + )) + } + + #[cfg(feature = "libreoffice")] + fn office_or_err(&self) -> PyResult> { + self.inner.lock().libreoffice.clone().ok_or_else(|| { + EngineDisabledError::new_err("libreoffice engine not enabled for this Folio instance") + }) + } + + #[cfg(not(feature = "libreoffice"))] + fn office_or_err(&self) -> PyResult<()> { + Err(EngineDisabledError::new_err( + "libreoffice feature not compiled in", + )) + } +} diff --git a/crates/py/src/launch.rs b/crates/py/src/launch.rs new file mode 100644 index 0000000..1264f9f --- /dev/null +++ b/crates/py/src/launch.rs @@ -0,0 +1,53 @@ +//! Centralised engine-launch wiring for both Folio and AsyncFolio. + +#[cfg(feature = "chromium")] +use engine::BrowserConfig; + +#[cfg(feature = "chromium")] +pub async fn launch_chromium( + chrome_path: Option<&str>, + auto_download: bool, + cache_dir: Option<&str>, +) -> Result { + use crate::errors::engine_to_py; + #[cfg(feature = "chrome-fetch")] + use crate::errors::fetch_to_py; + + let executable: Option = match chrome_path { + Some(p) => Some(p.into()), + None => { + #[cfg(feature = "chrome-fetch")] + { + let opts = engine::chrome_fetch::EnsureOptions { + explicit: None, + cache_dir: cache_dir.map(Into::into), + auto_download, + }; + Some( + engine::chrome_fetch::ensure_chrome(&opts) + .await + .map_err(fetch_to_py)?, + ) + } + #[cfg(not(feature = "chrome-fetch"))] + { + let _ = (auto_download, cache_dir); + None + } + } + }; + + let mut cfg = BrowserConfig::default(); + cfg.executable = executable; + engine::ChromiumEngine::launch_with(cfg) + .await + .map_err(engine_to_py) +} + +#[cfg(feature = "libreoffice")] +pub async fn launch_libreoffice() -> Result { + use crate::errors::engine_to_py; + engine::LibreOfficeEngine::discover() + .await + .map_err(engine_to_py) +} diff --git a/crates/py/src/lib.rs b/crates/py/src/lib.rs index b93cf3f..0ffa02d 100644 --- a/crates/py/src/lib.rs +++ b/crates/py/src/lib.rs @@ -1,14 +1,24 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} +//! Folio Python bindings β€” see `bindings/python/README.md`. + +mod errors; +mod folio_async; +mod folio_sync; +mod launch; +mod runtime; +mod types; -#[cfg(test)] -mod tests { - use super::*; +use pyo3::prelude::*; - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } +#[pymodule] +fn _native(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { + // Initialize the tokio runtime builder that pyo3-async-runtimes will use + // to drive futures returned by AsyncFolio methods. + // pyo3_async_runtimes::tokio::init takes a Builder (not a built Runtime). + let mut builder = tokio::runtime::Builder::new_multi_thread(); + builder.enable_all().thread_name("folio-py-async"); + pyo3_async_runtimes::tokio::init(builder); + errors::register(py, m)?; + m.add_class::()?; + m.add_class::()?; + Ok(()) } diff --git a/crates/py/src/runtime.rs b/crates/py/src/runtime.rs new file mode 100644 index 0000000..29d7c7d --- /dev/null +++ b/crates/py/src/runtime.rs @@ -0,0 +1,17 @@ +//! Shared single tokio runtime used by sync Folio. +//! AsyncFolio uses pyo3-async-runtimes' bridged runtime instead. + +use std::sync::OnceLock; +use tokio::runtime::Runtime; + +static RUNTIME: OnceLock = OnceLock::new(); + +pub fn runtime() -> &'static Runtime { + RUNTIME.get_or_init(|| { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .thread_name("folio-py") + .build() + .expect("init folio-py tokio runtime") + }) +} diff --git a/crates/py/src/types.rs b/crates/py/src/types.rs new file mode 100644 index 0000000..774473a --- /dev/null +++ b/crates/py/src/types.rs @@ -0,0 +1,55 @@ +//! Convert between Python dicts and engine option structs by routing +//! through `serde_json`. This avoids hand-writing a `FromPyObject` impl +//! for every engine option, and matches the engine's existing `serde` +//! contract used by the HTTP server. + +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyString, PyStringMethods as _}; +use serde::de::DeserializeOwned; + +pub fn from_py(opts: Option<&Bound<'_, PyDict>>) -> PyResult { + let Some(d) = opts else { + return Ok(T::default()); + }; + let json = pyobject_to_json(d.as_any())?; + serde_json::from_value(json).map_err(|e| { + super::errors::ValidationError::new_err(format!("invalid options: {e}")) + }) +} + +fn pyobject_to_json(obj: &Bound<'_, PyAny>) -> PyResult { + if obj.is_none() { + return Ok(serde_json::Value::Null); + } + if let Ok(b) = obj.extract::() { + return Ok(b.into()); + } + if let Ok(i) = obj.extract::() { + return Ok(i.into()); + } + if let Ok(f) = obj.extract::() { + return Ok(f.into()); + } + if let Ok(s) = obj.downcast::() { + return Ok(s.to_string_lossy().to_string().into()); + } + if let Ok(seq) = obj.downcast::() { + let mut out = Vec::with_capacity(seq.len()); + for item in seq.iter() { + out.push(pyobject_to_json(&item)?); + } + return Ok(serde_json::Value::Array(out)); + } + if let Ok(d) = obj.downcast::() { + let mut map = serde_json::Map::new(); + for (k, v) in d.iter() { + let k: String = k.extract()?; + map.insert(k, pyobject_to_json(&v)?); + } + return Ok(serde_json::Value::Object(map)); + } + let type_name = obj.get_type().name()?.to_string_lossy().to_string(); + Err(super::errors::ValidationError::new_err(format!( + "unsupported python type: {type_name}" + ))) +} diff --git a/docs/markdown-plus.md b/docs/markdown-plus.md new file mode 100644 index 0000000..9456cbf --- /dev/null +++ b/docs/markdown-plus.md @@ -0,0 +1,307 @@ +# Folio Markdown+ β€” A New Markdownβ†’PDF Variation + +> **Status:** Design proposal. Companion to `comparison.md` at the repo root. +> **Scope:** Defines a third Markdown rendering route for Folio that sits +> alongside the existing `/forms/chromium/convert/markdown` (basic) and the +> Gotenberg-compatible template-based path. Targets document-quality output +> (reports, dossiers, technical writing) rather than the raw GFM-in-a-box +> baseline. + +--- + +## 1. Why a new variation? + +Today Folio offers a single Markdown pipeline (`crates/engine/src/chromium/markdown.rs`): + +- `pulldown_cmark` with `Options::all()` (tables, strikethrough, task lists, + footnotes, smart punctuation). +- Wrapped in a fixed `` shell with a single bundled stylesheet + (`markdown.css`). +- Rendered through Chromium β†’ PDF. + +That covers the Gotenberg-equivalent baseline, but it falls short for the +target users implied by the operator console + observability investment: +people producing **report-grade PDFs at scale** β€” incident write-ups, +generated dossiers, customer-facing one-pagers, weekly digests. + +Gaps observed against both the current code and Gotenberg's +`/forms/chromium/convert/markdown`: + +| Need | Current Folio | Gotenberg | Gap | +|---------------------------------------|---------------|-----------|----------------| +| YAML / TOML front-matter for metadata | ❌ | ❌ | both miss | +| Math (KaTeX / MathJax) rendering | ❌ | ❌ | both miss | +| Mermaid / PlantUML diagrams | ❌ | ❌ | both miss | +| Syntax-highlighted code | ❌ (CSS only) | ❌ | both miss | +| Admonitions / callouts | ❌ | ❌ | both miss | +| Auto table-of-contents | ❌ | ❌ | both miss | +| Themed templates (named styles) | ❌ | ❌ | both miss | +| Header/footer driven by front-matter | ❌ | partial | folio behind | +| Cover page generation | ❌ | ❌ | both miss | +| Cross-document includes (`@include`) | ❌ | ❌ | both miss | +| Asset upload + relative paths | partial | βœ… | folio behind | + +The "new variation" β€” **Markdown+** β€” targets the bottom half of that table +in a single coherent route. It is *not* a replacement for the basic route; +the basic route stays as the cheapest, fastest, GFM-baseline path. + +--- + +## 2. Route design + +``` +POST /forms/chromium/convert/markdown-plus +``` + +Multipart form-data, same auth/observability stack as every other Chromium +route. Discovery via `/_/`, Prometheus, OTel traces wired identically. + +### 2.1 Form fields + +| Field | Type | Required | Purpose | +|----------------------|----------------|----------|------------------------------------------------------------| +| `index.md` | file | βœ… | Entry-point document | +| `*.md` | file (repeat) | ❌ | Additional documents resolvable via `@include` | +| `assets/**` | files | ❌ | Images, fonts, custom CSS resolvable by relative path | +| `theme` | text | ❌ | Named theme: `default`, `report`, `book`, `slide`, `memo` | +| `stylesheet` | file | ❌ | Override CSS β€” applied **after** the theme | +| `math` | text | ❌ | `none` \| `katex` \| `mathjax` (default: `katex` if `$`s) | +| `diagrams` | text | ❌ | `none` \| `mermaid` \| `auto` (default: `auto`) | +| `highlight` | text | ❌ | `none` \| `prism` \| `treesitter` (default: `prism`) | +| `toc` | text | ❌ | `none` \| `auto` \| `front` \| `back` (default: `auto`) | +| `cover` | text | ❌ | `none` \| `auto` (renders cover from front-matter) | +| `frontMatterFormat` | text | ❌ | `yaml` \| `toml` (default: detect by fence) | +| ... (all PDF options from basic route inherited unchanged) | + +Anything in the basic route's PDF options block (paper size, margins, +landscape, header/footer HTML, scale, page ranges, cookies, headers) flows +through unchanged so Markdown+ does not become a parallel options surface. + +### 2.2 Front-matter contract + +A document opens with a fenced front-matter block: + +```markdown +--- +title: Q2 Reliability Review +author: Folio SRE +date: 2026-04-30 +classification: internal +toc: true +theme: report +header: "{title} β€” {classification}" +footer: "Page {pageNumber} of {totalPages}" +--- + +# Executive summary +... +``` + +The renderer: + +1. Strips and parses the block (`serde_yaml` / `toml`). +2. Promotes selected keys onto the PDF: `title` β†’ ``, `author` β†’ + `dc:creator` metadata, `date` β†’ header substitution, etc. +3. Substitutes `{title}`, `{author}`, `{date}`, `{pageNumber}`, + `{totalPages}`, `{url}`, `{classification}` inside header/footer HTML + *before* it reaches Chromium. +4. Anything in front-matter beats the matching form field β€” front-matter is + the document's voice; form fields are the operator's voice. (Inverse + precedence is wrong: it would let an operator silently relabel a + classified document.) + +### 2.3 Pipeline + +``` +markdown bytes + β”‚ + β”œβ”€β”€ front-matter split (yaml|toml) + β”‚ + β”œβ”€β”€ @include resolution (recursive, cycle-detected, depth-capped) + β”‚ + β”œβ”€β”€ pulldown-cmark (Options::all + custom event stream) + β”‚ β”‚ + β”‚ β”œβ”€β”€ inline math $...$ β†’ <span class="math math-inline"> + β”‚ β”œβ”€β”€ block math $$...$$ β†’ <div class="math math-display"> + β”‚ β”œβ”€β”€ ```mermaid β†’ <pre class="mermaid">...</pre> + β”‚ β”œβ”€β”€ ```lang β†’ highlighted <pre><code class="lang-..."> + β”‚ └── > [!NOTE]… admonition β†’ <aside class="callout note"> + β”‚ + β”œβ”€β”€ auto-toc injection (heading walk, slugged anchors, configurable depth) + β”‚ + β”œβ”€β”€ theme.css + user stylesheet inlined + β”‚ + β”œβ”€β”€ KaTeX/Mermaid/Prism JS bundles inlined (or skipped if extension off) + β”‚ + └── Chromium render with extended waitFunction: + () => window.__folioReady === true + set after KaTeX + Mermaid finish. +``` + +Each stage owns one file under +`crates/engine/src/chromium/markdown_plus/`: + +``` +markdown_plus/ +β”œβ”€β”€ mod.rs // public render() and option types +β”œβ”€β”€ frontmatter.rs // parse + extract +β”œβ”€β”€ include.rs // @include resolution +β”œβ”€β”€ extensions.rs // pulldown-cmark event-stream rewrites +β”œβ”€β”€ toc.rs // heading walk + injection +β”œβ”€β”€ theme.rs // named themes (embedded CSS) +β”œβ”€β”€ assets.rs // KaTeX / Mermaid / Prism inlining +└── ready.rs // window.__folioReady wait protocol +``` + +This mirrors the existing module layout (`launch.rs`, `render.rs`, +`screenshot.rs`, `wait.rs`, `pdf_params.rs`) β€” no new architectural +patterns introduced. + +--- + +## 3. Concrete syntax additions + +All additions are **optional** β€” a plain GFM document still renders +identically to the basic route (modulo theme). + +### 3.1 Math + +```markdown +The continuous form is $\hat{f}(\xi) = \int f(x)\,e^{-2\pi i x\xi}\,dx$, +and the discrete equivalent: + +$$ +X_k = \sum_{n=0}^{N-1} x_n \cdot e^{-2\pi i k n / N} +$$ +``` + +### 3.2 Diagrams + +````markdown +```mermaid +sequenceDiagram + Client->>Folio: POST /markdown-plus + Folio->>Chromium: rendered HTML + Chromium-->>Folio: PDF bytes + Folio-->>Client: 200 OK +``` +```` + +### 3.3 Admonitions (GitHub-style) + +```markdown +> [!NOTE] +> Folio does not require LibreOffice for this route. + +> [!WARNING] +> Mermaid renders client-side; render times scale with diagram count. +``` + +Recognised tags: `NOTE`, `TIP`, `IMPORTANT`, `WARNING`, `CAUTION`. Each maps +to `<aside class="callout {tag}">` and is themed in CSS. + +### 3.4 Includes + +```markdown +@include shared/header.md +@include sections/methodology.md +``` + +Resolved relative to the multipart upload's logical root. Cycle-detected +(error returned as `400 invalid_include`); max depth 8. + +### 3.5 Auto-anchors and TOC + +Every heading gets a slugged `id`. `toc=auto` injects a `<nav class="toc">` +where the first explicit `<!-- toc -->` marker appears (or after the cover +page if absent and `cover=auto`). + +--- + +## 4. Themes + +Five embedded themes, each a single CSS file under +`markdown_plus/themes/`: + +| Theme | Use case | Notes | +|----------|-------------------------------------------|--------------------------------| +| `default`| GFM-on-paper, neutral serif headings, sans body | Matches the existing `markdown.css` look so basic-route docs render identically when re-routed | +| `report` | Quarterly reviews, post-mortems | Letter-spaced caps headings, classification banner, page-numbered footer | +| `book` | Long-form, multi-chapter | Drop-caps, running headers from front-matter `chapter:` | +| `slide` | One-section-per-page | `page-break-after: always` on `<h1>`; large body text | +| `memo` | One-pagers, exec summaries | Tight margins, no cover, single-column | + +A user-supplied `stylesheet` is appended *after* the theme, so themes are +override-friendly without forcing the user to start from zero. + +--- + +## 5. Operability + +Markdown+ is louder than the basic route, so it earns its own observability +labels. No new metric *types* β€” just additional label values on the +existing histograms and counters: + +- `folio_conversions_total{engine="chromium",endpoint="markdown_plus", ...}` +- `folio_conversion_duration_seconds{...,endpoint="markdown_plus"}` +- New histogram `folio_markdown_plus_stage_duration_seconds{stage}` with + stages: `frontmatter`, `include`, `parse`, `toc`, `theme`, `assets`, + `chromium`. This is genuinely new information β€” KaTeX or Mermaid blow-ups + are otherwise invisible inside the chromium total. +- The operator console grows a Markdown+ panel only if any `markdown_plus` + conversion has been observed in the last hour (avoid empty UI noise). + +OTel: a single span per request named `markdown_plus.render`, with one +child span per stage. Wires through the existing tracing layer β€” no new +crate. + +--- + +## 6. What this variation deliberately does *not* do + +- **No HTML sanitisation regression.** Raw `<script>` is dropped at the + parser level (Folio's basic route inlines it but Chrome refuses to run + it; Markdown+ tightens this β€” `<script>` becomes a comment, no + exceptions). +- **No template engine.** Front-matter substitution is `{key}` only; no + Mustache/Handlebars/Liquid. People who want full templating compose two + passes: render a Liquid template themselves, then POST to Folio. +- **No multi-file output.** One Markdown+ request β†’ one PDF. Bulk + rendering belongs in the (separate) batch API. +- **No cross-request state.** Includes resolve from the upload only β€” never + from a server-side library. Templating-by-stealth is an exfiltration + vector and Folio is opinionated against it. + +--- + +## 7. Migration & compatibility + +- Basic `/forms/chromium/convert/markdown` is **untouched**. Existing + Gotenberg-compatible callers see no change. +- Markdown+ ships behind a config flag `--enable-markdown-plus` (default + on) so locked-down deployments can disable it without touching the + binary. +- The existing `markdown.rs` is renamed `markdown_basic.rs` only if no + external code imports it; otherwise it stays put and Markdown+ lives + beside it. (Non-breaking is the priority.) + +--- + +## 8. Implementation checklist + +1. New module skeleton under `crates/engine/src/chromium/markdown_plus/`. +2. Front-matter parser + tests (YAML, TOML, missing block, malformed). +3. `@include` resolver with cycle + depth limits + tests. +4. pulldown-cmark event-stream extensions (math, mermaid, admonitions). +5. TOC walker + injector. +6. Theme bundle + asset inliner (KaTeX, Prism, Mermaid as opt-in features). +7. `window.__folioReady` ready-protocol; extend `wait.rs`. +8. New route in `crates/server/src/routes/chromium.rs`. +9. Stage-duration histogram in `metrics.rs`. +10. Operator console panel (Svelte component, gated on observed traffic). +11. BDD scenarios mirroring the basic route's coverage plus math, mermaid, + admonitions, includes, themes. +12. Docs page under (archived spec). + +This is a tractable, ~2-week single-engineer slice. It does not depend on +the webhook or batch work-in-progress, so it can ship in parallel. diff --git a/docs/specs-archive-2026-05-01.zip b/docs/specs-archive-2026-05-01.zip new file mode 100644 index 0000000..5b7ddb8 Binary files /dev/null and b/docs/specs-archive-2026-05-01.zip differ diff --git a/docs/specs/00-overview.md b/docs/specs/00-overview.md deleted file mode 100644 index 12f8554..0000000 --- a/docs/specs/00-overview.md +++ /dev/null @@ -1,114 +0,0 @@ -# Folio Specs β€” Overview - -> Spec-driven, sub-agent-friendly development plan for the Folio workspace. - -## Why this exists - -Each spec under `docs/specs/` is a **self-contained work order** for a single -crate or module. An implementing agent must be able to: - -1. Read **only** the spec (plus the cited docs/links inside it), -2. Produce code that satisfies every item in the spec's *Acceptance* section, -3. Run the *Test plan* and have it pass. - -This decouples authoring from implementation, lets multiple agents work in -parallel on independent specs, and gives reviewers a single source of truth -to compare a PR against. - -## Source-of-truth hierarchy - -When specs and other docs disagree: - -``` -docs/specs/* (this directory) <- highest priority, authoritative -docs/proposal.md <- design intent, may be stale -docs/gotenberg-spec.md <- Gotenberg API contract we mirror -README.md <- user-facing summary -docs/gap-analysis.md <- background / context only -docs/obscura-spec.md <- background / context only -``` - -If a spec needs to override `proposal.md`, do it explicitly in the spec body -and call it out in the PR. - -## Spec template - -Every spec MUST contain these sections in this order: - -1. **Goal** β€” one sentence, present tense. -2. **Scope** β€” what's in / out. -3. **Public API** β€” exact Rust signatures (or HTTP routes / CLI surface). -4. **Behavior** β€” stepwise pseudocode for each public entrypoint. -5. **Errors** β€” every error variant the code can produce + when. -6. **Edge cases** β€” concrete adversarial inputs and the required response. -7. **Test plan** β€” list of unit + integration tests with input β†’ expected. -8. **Acceptance** β€” bullet checklist; every box must be tickable to merge. -9. **Out of scope / follow-ups** β€” explicitly deferred work. - -## Dispatch ledger - -| ID | Spec | Crate | Depends on | Phase | -|-----|----------------------------|------------------|---------------|-------| -| 10 | engine-types | `engine` | β€” | 1 | -| 11 | engine-chromium | `engine` | 10 | 1 | -| 12 | engine-libreoffice | `engine` | 10 | 3 | -| 13 | engine-pdfops | `engine` | 10 | 4 | -| 20 | cli | `cli` | 10, 11 | 1/5 | -| 30 | server | `server` | 10, 11(+12,13)| 2 | -| 40 | bindings-py | `py` | 10, 11 | 6 | -| 41 | bindings-js | `js` | 10, 11 | 6 | - -Phases mirror `@docs/proposal.md` *Implementation Phases*. Anything in the -same phase with no shared dependency can be worked in parallel by separate -sub-agents. - -## Conventions - -### Rust - -- Edition: `2024` (set at workspace level). -- Errors: each crate exports a `thiserror` enum; binaries/bindings convert - to `anyhow::Error` only at the top of `main` / FFI boundary. -- All public async fns take `&self`, never `&mut self`. Internal mutability - goes through `tokio::sync` primitives. -- Public types implement `Debug` + `Clone` where it doesn't break invariants. -- No `unsafe` outside FFI shims (`py`, `js`). -- `#![deny(rust_2018_idioms, missing_docs)]` on every published crate's lib. -- Public functions documented with `///`; doc examples compile (`cargo test --doc`). - -### Imports / lib names - -The `engine` crate's package is `engine`; importable path is `engine::…`. -The `py` and `js` crates produce a `cdylib` with `[lib] name = "folio"` so -their respective host languages see a module called `folio`. - -### Tests - -- **Unit tests** colocated in `src/` via `#[cfg(test)] mod tests`. -- **Integration tests** under each crate's `tests/`. -- **End-to-end** Chrome-bound tests gated behind `#[ignore]` and run by CI - with `cargo test -- --ignored` after Chrome is provisioned. They never - block local `cargo test`. -- Test PDFs are validated by: - - Byte-stream contains `%PDF-1.` header and `%%EOF` trailer. - - `lopdf::Document::load_mem(&bytes)` round-trips successfully. - - Page count matches expectation. - -### Commits - -Conventional commits, scoped by spec ID where applicable, e.g.: - -- `feat(engine/11): implement ChromiumEngine::html_to_pdf` -- `test(engine/11): add networkidle wait condition tests` -- `docs(specs): expand 13-engine-pdfops` - -### Definition of Done (per spec) - -A spec is **done** when: - -1. Every box in *Acceptance* is checked, -2. `cargo fmt --check` and `cargo clippy --workspace -- -D warnings` pass, -3. `cargo test --workspace` passes (excluding `--ignored` E2E), -4. Public API matches *Public API* section verbatim, -5. The spec file itself is updated if any deviation was necessary, with - rationale in the commit message. diff --git a/docs/specs/10-engine-types.md b/docs/specs/10-engine-types.md deleted file mode 100644 index 542ae3b..0000000 --- a/docs/specs/10-engine-types.md +++ /dev/null @@ -1,291 +0,0 @@ -# Spec 10 β€” `engine::types` - -> Shared types and error model for the Folio engine. All other specs build on -> this; nothing else should redeclare these types. - -## Goal - -Provide the canonical, serde-aware Rust types that describe a PDF generation -request and the engine's error surface, without taking any dependency on -`chromiumoxide`, `lopdf`, or HTTP frameworks. - -## Scope - -**In:** `PdfOptions`, `PaperSize`, `Margins`, `WaitCondition`, `MediaType`, -`PageRanges`, `BrowserConfig`, `EngineError`, `EngineResult<T>`. - -**Out:** Anything Chromium-, LibreOffice-, or HTTP-specific. Those live in -their own specs and may *use* these types. - -## Public API - -Module path: `engine::types` (re-exported from `engine`'s crate root). - -```rust -use std::path::PathBuf; -use std::time::Duration; -use serde::{Deserialize, Serialize}; - -/// All knobs that influence a single PDF render. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(default, rename_all = "camelCase")] -pub struct PdfOptions { - pub paper: PaperSize, - pub margin: Margins, - pub landscape: bool, - /// Multiplier applied to page rendering. 0.1..=2.0. - pub scale: f32, - pub print_background: bool, - pub prefer_css_page_size: bool, - pub emulate_media: MediaType, - pub page_ranges: Option<PageRanges>, - pub header_template: Option<String>, - pub footer_template: Option<String>, - pub wait: WaitCondition, -} - -impl Default for PdfOptions { /* see Behavior */ } - -/// Paper dimensions in inches. Constructors enforce > 0. -#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] -pub struct PaperSize { - pub width_in: f32, - pub height_in: f32, -} - -impl PaperSize { - pub const A4: Self = Self { width_in: 8.27, height_in: 11.69 }; - pub const LETTER: Self = Self { width_in: 8.5, height_in: 11.0 }; - pub const LEGAL: Self = Self { width_in: 8.5, height_in: 14.0 }; - pub const A3: Self = Self { width_in: 11.69, height_in: 16.54 }; - pub const A5: Self = Self { width_in: 5.83, height_in: 8.27 }; - - pub fn new(width_in: f32, height_in: f32) -> Result<Self, EngineError>; -} - -/// Margins in inches. -#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] -pub struct Margins { - pub top: f32, pub right: f32, pub bottom: f32, pub left: f32, -} - -impl Margins { - pub const ZERO: Self = Self { top: 0.0, right: 0.0, bottom: 0.0, left: 0.0 }; - pub const DEFAULT: Self = Self { top: 0.39, right: 0.39, bottom: 0.39, left: 0.39 }; // ~1cm - - pub fn uniform(inches: f32) -> Self; -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum MediaType { #[default] Print, Screen } - -/// Page ranges parsed from the Gotenberg-compatible string form, e.g. "1-3,5,7-". -/// `to_string` round-trips canonical form. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(try_from = "String", into = "String")] -pub struct PageRanges(Vec<PageRange>); - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum PageRange { Single(u32), Closed(u32, u32), OpenEnd(u32) /* "7-" */ } - -impl PageRanges { - pub fn parse(s: &str) -> Result<Self, EngineError>; - pub fn contains(&self, page: u32, total: u32) -> bool; -} - -/// What to wait for after navigation/setContent before rendering. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "camelCase")] -pub enum WaitCondition { - #[default] - Load, - DomContentLoaded, - NetworkIdle, - Selector { selector: String }, - Expression { expression: String }, - Delay { #[serde(with = "humantime_serde")] duration: Duration }, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(default, rename_all = "camelCase")] -pub struct BrowserConfig { - /// Path to chrome/chromium. If `None`, autodiscover via $PATH then - /// platform-typical locations; finally fall through to `EngineError::ChromeNotFound`. - pub executable: Option<PathBuf>, - /// Run with --headless=new. Default true. - pub headless: bool, - /// Extra command line flags appended verbatim. - pub extra_args: Vec<String>, - /// Disable Chrome's sandbox. Required inside most Docker images. - /// Default: true on Linux, false elsewhere. - pub no_sandbox: bool, - /// Per-page navigation/render timeout. - #[serde(with = "humantime_serde")] - pub timeout: Duration, -} - -impl Default for BrowserConfig { /* see Behavior */ } - -#[derive(Debug, thiserror::Error)] -pub enum EngineError { - #[error("invalid option: {0}")] - InvalidOption(String), - - #[error("invalid page range: {0}")] - InvalidPageRange(String), - - #[error("chrome executable not found (searched: {searched:?})")] - ChromeNotFound { searched: Vec<PathBuf> }, - - #[error("chrome failed to launch: {0}")] - ChromeLaunch(String), - - #[error("CDP error: {0}")] - Cdp(String), - - #[error("navigation failed for {url}: {reason}")] - Navigation { url: String, reason: String }, - - #[error("operation timed out after {0:?}")] - Timeout(Duration), - - #[error("io error: {0}")] - Io(#[from] std::io::Error), - - #[error("internal error: {0}")] - Internal(String), -} - -pub type EngineResult<T> = Result<T, EngineError>; -``` - -## Behavior - -### `PdfOptions::default()` - -``` -PdfOptions { - paper: PaperSize::A4, - margin: Margins::DEFAULT, - landscape: false, - scale: 1.0, - print_background: true, - prefer_css_page_size: false, - emulate_media: MediaType::Print, - page_ranges: None, - header_template: None, - footer_template: None, - wait: WaitCondition::Load, -} -``` - -### `BrowserConfig::default()` - -``` -BrowserConfig { - executable: None, - headless: true, - extra_args: vec![], - no_sandbox: cfg!(target_os = "linux"), - timeout: Duration::from_secs(60), -} -``` - -### `PaperSize::new(w, h)` - -- If `w <= 0.0` or `h <= 0.0` β†’ `EngineError::InvalidOption("paper dimensions must be > 0")`. -- If `w > 200.0` or `h > 200.0` β†’ `EngineError::InvalidOption("paper dimensions must be <= 200in")`. -- Else `Ok(Self { width_in: w, height_in: h })`. - -### `PageRanges::parse(s)` - -Grammar (whitespace ignored): - -``` -ranges := range ("," range)* -range := number | number "-" number | number "-" -number := [1-9][0-9]* -``` - -- Empty input or only commas β†’ `EngineError::InvalidPageRange`. -- A range `a-b` requires `a <= b`, else error. -- Result preserves input order. Caller is responsible for de-duplication. - -### `PageRanges::contains(page, total)` - -- `Single(n)` β†’ `page == n && n <= total`. -- `Closed(a, b)` β†’ `a <= page && page <= b.min(total)`. -- `OpenEnd(a)` β†’ `a <= page && page <= total`. - -### Validation (used by `ChromiumEngine` before invoking CDP) - -`PdfOptions::validate(&self) -> EngineResult<()>` checks: - -- `0.1 <= scale <= 2.0`, -- `paper.width_in > 0 && paper.height_in > 0` (already by constructor), -- All margins are finite and `>= 0` and each `< paper.width_in / 2` (left/right) or `< paper.height_in / 2` (top/bottom), -- Header/footer templates, if `Some`, are non-empty after trimming. - -This function MUST be exposed publicly; binaries call it before queueing a render. - -## Errors - -The full `EngineError` enum is the *only* error type returned from any spec -in the `engine::*` family. Each downstream spec adds variants by editing -this spec rather than introducing parallel error enums. - -## Edge cases - -| Input | Required behavior | -|-----------------------------------|-----------------------------------------------------------| -| `PageRanges::parse("")` | `Err(InvalidPageRange("empty"))` | -| `PageRanges::parse(",,")` | `Err(InvalidPageRange)` | -| `PageRanges::parse("0-3")` | `Err(InvalidPageRange("page numbers are 1-indexed"))` | -| `PageRanges::parse("5-3")` | `Err(InvalidPageRange("end < start"))` | -| `PageRanges::parse(" 1 - 3 , 7-")`| `Ok([Closed(1,3), OpenEnd(7)])` | -| `PaperSize::new(0.0, 11.0)` | `Err(InvalidOption(..))` | -| `PaperSize::new(8.5, f32::NAN)` | `Err(InvalidOption(..))` | -| `PdfOptions { scale: 3.0, .. }` | `validate()` β†’ `Err(InvalidOption("scale out of range"))` | -| Missing fields in JSON deserialise| Treated as defaults via `#[serde(default)]` | - -## Test plan - -All in `crates/engine/src/types.rs` under `#[cfg(test)] mod tests`. - -- `paper_size_constants_match_spec` β€” all five preset constants. -- `paper_size_new_rejects_nonpositive`. -- `paper_size_new_rejects_nan_inf`. -- `margins_uniform_sets_all_four`. -- `page_ranges_parse_single_number`. -- `page_ranges_parse_closed_range`. -- `page_ranges_parse_open_end`. -- `page_ranges_parse_mixed_with_whitespace`. -- `page_ranges_parse_rejects_zero`. -- `page_ranges_parse_rejects_inverted`. -- `page_ranges_parse_rejects_empty`. -- `page_ranges_contains_handles_total_clamp`. -- `page_ranges_round_trips_via_serde`. -- `pdf_options_default_matches_spec`. -- `pdf_options_validate_scale_range`. -- `pdf_options_validate_margin_too_large`. -- `pdf_options_serde_camel_case_roundtrip` β€” JSON `{"paper":{"widthIn":...},...}`. -- `wait_condition_default_is_load`. -- `wait_condition_serde_tag_kind`. -- `browser_config_default_no_sandbox_on_linux_only`. - -## Acceptance - -- [ ] `crates/engine/src/types.rs` exists and is `pub mod types` from `lib.rs`. -- [ ] All public items in *Public API* compile and match signatures verbatim. -- [ ] Workspace deps added: `serde`, `serde_json` (dev), `thiserror`, `humantime-serde`. -- [ ] `cargo test -p engine` passes with all tests in *Test plan*. -- [ ] `cargo doc -p engine --no-deps` produces no warnings. -- [ ] No `unwrap`/`expect` on user-supplied input paths. -- [ ] `lib.rs` carries `#![deny(rust_2018_idioms, missing_docs)]`. - -## Out of scope / follow-ups - -- ScreenshotOptions (separate spec when we tackle `/screenshot/*`). -- PDF/A and PDF/UA flags (added when spec 13 lands). -- Cookies / extra HTTP headers (added by spec 11; types live there). diff --git a/docs/specs/11-engine-chromium.md b/docs/specs/11-engine-chromium.md deleted file mode 100644 index 07b800b..0000000 --- a/docs/specs/11-engine-chromium.md +++ /dev/null @@ -1,442 +0,0 @@ -# Spec 11 β€” `engine::chromium::ChromiumEngine` - -> The Phase-1 MVP. Converts HTML / URL / Markdown to PDF via real Chrome -> through the Chrome DevTools Protocol. - -## Goal - -Provide a single `ChromiumEngine` type that reliably produces a PDF byte -stream from HTML strings, remote URLs, or Markdown β€” usable from binaries -(CLI, server) and bindings without any wrapper layer. - -## Scope - -**In:** - -- Browser lifecycle (launch, reuse, shutdown). -- `html_to_pdf`, `url_to_pdf`, `markdown_to_pdf`. -- Wait conditions (load / domcontentloaded / networkidle / selector / expression / delay). -- All `PdfOptions` knobs from spec 10 mapped onto CDP `Page.printToPDF`. -- Cookies, extra HTTP headers, custom user agent (per-call). - -**Out:** - -- Connection pooling for HTTP server (spec 30 wraps this engine in a pool). -- Auto-download of Chrome (deferred β€” first cut requires a chrome on `$PATH` - or in `BrowserConfig::executable`). -- PDF/A / PDF/UA conformance (spec 13). - -## Public API - -Module path: `engine::chromium`, re-exported as `engine::ChromiumEngine`. - -```rust -use crate::types::{BrowserConfig, EngineResult, PdfOptions}; -use std::collections::HashMap; -use std::sync::Arc; - -/// One Chromium browser instance shared across many concurrent renders. -/// Cheap to clone (`Arc` inside). -#[derive(Clone)] -pub struct ChromiumEngine { - inner: Arc<Inner>, // private -} - -impl ChromiumEngine { - /// Launch a new browser with default config. - pub async fn launch() -> EngineResult<Self>; - - /// Launch with explicit config (executable path, sandbox, timeout, ...). - pub async fn launch_with(config: BrowserConfig) -> EngineResult<Self>; - - /// Render an HTML string to PDF bytes. - /// `base_url`, when `Some`, is used as the document's base URL so that - /// relative `<img>`, `<link>` etc. resolve against it. - pub async fn html_to_pdf( - &self, - html: &str, - base_url: Option<&str>, - opts: &PdfOptions, - request: &RequestContext, - ) -> EngineResult<Vec<u8>>; - - /// Navigate to `url` and render to PDF bytes. - pub async fn url_to_pdf( - &self, - url: &str, - opts: &PdfOptions, - request: &RequestContext, - ) -> EngineResult<Vec<u8>>; - - /// Render Markdown to PDF. Implementation: render to HTML internally - /// (CommonMark + tables + strikethrough + task lists) wrapped in a small - /// stylesheet, then call `html_to_pdf`. - pub async fn markdown_to_pdf( - &self, - markdown: &str, - opts: &PdfOptions, - request: &RequestContext, - ) -> EngineResult<Vec<u8>>; - - /// Capture an HTML string as a screenshot. - /// Returns PNG, JPEG, or WebP bytes based on `format`. - /// `base_url`, when `Some`, is used as the document's base URL. - pub async fn screenshot_html( - &self, - html: &str, - base_url: Option<&str>, - opts: &ScreenshotOptions, - request: &RequestContext, - ) -> EngineResult<Vec<u8>>; - - /// Navigate to `url` and capture a screenshot. - pub async fn screenshot_url( - &self, - url: &str, - opts: &ScreenshotOptions, - request: &RequestContext, - ) -> EngineResult<Vec<u8>>; - - /// Render Markdown to HTML then capture a screenshot. - pub async fn screenshot_markdown( - &self, - markdown: &str, - opts: &ScreenshotOptions, - request: &RequestContext, - ) -> EngineResult<Vec<u8>>; - - /// Best-effort liveness probe β€” `true` iff the browser process responds - /// to `Browser.getVersion` within `BrowserConfig::timeout`. - pub async fn healthy(&self) -> bool; - - /// Close the browser. Idempotent. Future calls return - /// `EngineError::Internal("engine shut down")`. - pub async fn shutdown(self) -> EngineResult<()>; -} - -/// Per-render request context. Always passed even when empty. -#[derive(Debug, Clone, Default)] -pub struct RequestContext { - pub user_agent: Option<String>, - pub extra_headers: HashMap<String, String>, - pub cookies: Vec<Cookie>, - /// HTTP statuses that should fail the render. Empty means no statuses fail. - pub fail_on_status: Vec<u16>, -} - -#[derive(Debug, Clone)] -pub struct Cookie { - pub name: String, - pub value: String, - pub domain: Option<String>, - pub path: Option<String>, - pub secure: bool, - pub http_only: bool, -} - -/// Screenshot output format. -#[derive(Debug, Clone, Copy)] -pub enum ScreenshotFormat { - Png, - Jpeg, - Webp, -} - -/// Options for screenshot capture. -#[derive(Debug, Clone)] -pub struct ScreenshotOptions { - /// Output format (default: Png). - pub format: ScreenshotFormat, - /// JPEG/WebP quality (0-100, default: 80). - pub quality: Option<u8>, - /// Capture full scrollable page (default: false). - pub full_page: bool, - /// Viewport dimensions (default: 1920x1080). - pub viewport_width: u32, - pub viewport_height: u32, - /// Device scale factor (default: 1.0). - pub scale: f32, - /// Clip rectangle (optional). When set, only this region is captured. - pub clip_x: Option<f64>, - pub clip_y: Option<f64>, - pub clip_width: Option<f64>, - pub clip_height: Option<f64>, -} - -impl Default for ScreenshotOptions { - fn default() -> Self { - Self { - format: ScreenshotFormat::Png, - quality: None, - full_page: false, - viewport_width: 1920, - viewport_height: 1080, - scale: 1.0, - clip_x: None, - clip_y: None, - clip_width: None, - clip_height: None, - } - } -} -``` - -## Behavior - -### Launch flow - -1. Resolve `BrowserConfig::executable`: - 1. If `Some(p)`, use it. - 2. Else, in order, check `$BROWSER_PATH`, `which chromium`, `which chrome`, - and platform-typical defaults - (`/Applications/Google Chrome.app/Contents/MacOS/Google Chrome`, - `/usr/bin/google-chrome`, `/usr/bin/chromium`, etc.). - 3. If none β†’ `EngineError::ChromeNotFound { searched }`. -2. Spawn Chrome with: `--headless=new`, `--disable-gpu`, - `--hide-scrollbars`, `--mute-audio`, plus `--no-sandbox` iff - `config.no_sandbox`, plus `config.extra_args`. -3. Connect via WebSocket using `chromiumoxide::Browser::launch`. On error - β†’ `EngineError::ChromeLaunch(msg)`. -4. Spawn a background task to drive the chromiumoxide handler future. Store - its `JoinHandle` in `Inner` so `shutdown` can abort it. - -### Chrome Version Compatibility - -The engine uses `chromiumoxide` 0.9 which is generated from Chrome DevTools -Protocol (CDP) definitions matching Chrome up to version ~135. Newer Chrome -versions (136+) may emit CDP event types that chromiumoxide doesn't recognize, -causing deserialization warnings like: - -``` -WS Invalid message: data did not match any variant of untagged enum Message -``` - -**Impact:** These are non-fatal. PDF generation continues to work because core -CDP commands (`Page.printToPDF`, navigation, etc.) remain compatible. The -warnings only affect event notifications Chrome sends asynchronously. - -**Resolution options:** -1. Use Chrome 134-135 for clean logs (matching chromiumoxide 0.9 CDP version) -2. Accept warnings with Chrome 136+ (PDF generation still works) -3. Wait for chromiumoxide update with newer CDP definitions - -The engine logs the detected Chrome version at startup and warns if >135. - -### `html_to_pdf` - -1. `opts.validate()?` (from spec 10). -2. Open a new page (`browser.new_page("about:blank")`). -3. Apply `RequestContext`: - - If `user_agent.is_some()`, send `Network.setUserAgentOverride`. - - If `!extra_headers.is_empty()`, send `Network.setExtraHTTPHeaders`. - - For each cookie, send `Network.setCookie`. -4. If `base_url.is_some()`, navigate first to that URL with `wait = Load`, - then call `Page.setDocumentContent` on the main frame to inject `html`. - Otherwise, set the page content directly via `page.set_content(html)`. -5. Run `Emulation.setEmulatedMedia` with `"print"` or `"screen"` per - `opts.emulate_media`. -6. Wait per `opts.wait` (see Wait Conditions). -7. Build CDP `Page.printToPDF` params from `opts` and call. The engine MUST - handle paginated streaming responses (chromiumoxide returns a base64 - string by default; decode to `Vec<u8>`). -8. Close the page (best-effort; log errors but do not fail the render). -9. Return PDF bytes. - -If any CDP call returns an error, map to: - -- Network/connection close β†’ `EngineError::Cdp(msg)`. -- Navigation failures (`net::ERR_*`) β†’ `EngineError::Navigation`. -- A `tokio::time::timeout` of `BrowserConfig::timeout` wraps the entire - render; on elapse β†’ `EngineError::Timeout`. - -### `url_to_pdf` - -Same as `html_to_pdf` but step 4 becomes `page.goto(url)` and the -`base_url` parameter does not apply. - -If `RequestContext::fail_on_status` is non-empty, listen for -`Network.responseReceived`; if the main frame's response status is in the -list β†’ cancel and return `EngineError::Navigation`. - -### `markdown_to_pdf` - -1. Convert via `pulldown-cmark` with `Options::all()`. -2. Wrap in a built-in HTML template (`<html><head><meta charset>... - <style>{default-css}</style></head><body>{rendered}</body></html>`). -3. Delegate to `html_to_pdf` with `base_url = None`. - -The default stylesheet lives in `crates/engine/src/chromium/markdown.css` -and is `include_str!`'d. Minimum: readable typography, code-block -monospace, table borders. - -### Wait conditions - -| `WaitCondition` | Implementation | -|-----------------------|------------------------------------------------------------------------------------------------| -| `Load` | Already implicit after `set_content` / `goto`. No extra wait. | -| `DomContentLoaded` | Subscribe to `Page.domContentEventFired`. Resolve on first event. | -| `NetworkIdle` | Subscribe to `Page.lifecycleEvent` and resolve on `name == "networkIdle"`. | -| `Selector { s }` | Poll `Runtime.evaluate("!!document.querySelector(s)")` every 50ms until `true` or timeout. | -| `Expression { e }` | Same polling pattern but evaluating the user expression. Must coerce result to bool. | -| `Delay { duration }` | `tokio::time::sleep(duration)`. | - -All wait paths are bounded by `BrowserConfig::timeout`. - -### Screenshot behavior - -#### `screenshot_html` - -1. Apply `RequestContext` (user agent, headers, cookies) same as `html_to_pdf`. -2. Set page content via `page.set_content(html)` or navigate to `base_url` first. -3. Wait per `opts.wait` (see Wait Conditions). -4. Build screenshot params from `ScreenshotOptions`: - - `format` β†’ `ScreenshotFormat::Png/Jpeg/Webp` - - `quality` β†’ JPEG/WebP quality (0-100) - - `clip` β†’ Optional clip rectangle - - `full_page` β†’ Capture full scrollable page -5. Call `page.screenshot(params)`. -6. Return image bytes. - -#### `screenshot_url` - -Same as `screenshot_html` but step 2 becomes `page.goto(url)`. - -If `RequestContext::fail_on_status` is non-empty, listen for -`Network.responseReceived`; if the main frame's response status is in the -list β†’ cancel and return `EngineError::Navigation`. - -#### `screenshot_markdown` - -1. Convert Markdown to HTML via `pulldown-cmark` (same as `markdown_to_pdf`). -2. Delegate to `screenshot_html` with `base_url = None`. - -### `Page.printToPDF` parameter mapping - -``` -landscape <- opts.landscape -displayHeaderFooter <- opts.header_template.is_some() || opts.footer_template.is_some() -headerTemplate <- opts.header_template -footerTemplate <- opts.footer_template -printBackground <- opts.print_background -scale <- opts.scale -paperWidth <- opts.paper.width_in -paperHeight <- opts.paper.height_in -marginTop <- opts.margin.top -marginBottom <- opts.margin.bottom -marginLeft <- opts.margin.left -marginRight <- opts.margin.right -pageRanges <- opts.page_ranges.map(|r| r.to_string()) -preferCSSPageSize <- opts.prefer_css_page_size -transferMode <- "ReturnAsBase64" -``` - -### Concurrency - -`html_to_pdf` / `url_to_pdf` / `markdown_to_pdf` are safe to invoke from -many concurrent tasks against a single `ChromiumEngine`. Each call opens -its own page β€” there is no implicit serialization. Callers wanting -back-pressure should impose a `tokio::sync::Semaphore` upstream (the -server crate, spec 30, will). - -## Errors - -Reuses `EngineError` from spec 10. New error sources documented above: -`ChromeNotFound`, `ChromeLaunch`, `Cdp`, `Navigation`, `Timeout`. No new -variants needed. - -## Edge cases - -| Scenario | Required behavior | -|-----------------------------------------------------|--------------------------------------------------------------------------------| -| HTML body empty string | Produce a single blank page; not an error. | -| URL returns 5xx, `fail_on_status = [500..=599]` | `EngineError::Navigation { reason: "status 503" }`. | -| URL is not http/https (e.g. `file://`) | Allowed if Chrome accepts it; we do not pre-validate scheme. | -| `opts.scale = 3.0` | Caught by `opts.validate()` β†’ `EngineError::InvalidOption` before any CDP call.| -| `Selector` never matches before timeout | `EngineError::Timeout`. | -| Engine cloned then dropped | Browser stays alive while *any* clone exists. | -| `shutdown()` called while another render is running | Render returns `EngineError::Internal("engine shut down")`; shutdown succeeds. | -| Markdown contains raw `<script>` | Tag stripped by `pulldown-cmark` defaults; not executed. | -| Header template references `{date}` etc. | Pass through verbatim; Chrome substitutes. | - -## Test plan - -### Unit tests (`crates/engine/src/chromium/mod.rs`) - -These do not need Chrome. - -- `executable_resolution_prefers_explicit`. -- `executable_resolution_falls_back_to_path`. -- `executable_resolution_emits_searched_list_on_failure`. -- `printtopdf_params_built_from_pdfoptions` β€” assert exact CDP param map. -- `markdown_template_wraps_with_charset_meta`. - -### Integration tests (`crates/engine/tests/chromium_html.rs`) - -Marked `#[ignore]`; require `CHROME_PATH` env or system Chrome. Run via -`cargo test -p engine -- --ignored`. - -- `html_to_pdf_returns_valid_pdf_bytes` β€” bytes start with `%PDF-` and - load via `lopdf::Document::load_mem`. -- `html_to_pdf_respects_paper_size` β€” render 1inΓ—1in page; check - `MediaBox` in lopdf. -- `url_to_pdf_against_local_axum` β€” spin up a tiny axum server with - `/index.html`, render, assert page count == 1. -- `wait_selector_completes_when_element_appears` β€” page injects element - after 100ms via setTimeout; assert success. -- `wait_selector_times_out_when_missing` β€” assert `EngineError::Timeout`. -- `cookies_and_headers_round_trip` β€” local server echoes them back into - the rendered HTML; assert echoes appear in PDF text (via lopdf text - extraction). -- `concurrent_renders_do_not_deadlock` β€” spawn 8 tasks, all complete. -- `markdown_to_pdf_renders_table` β€” assert table cells appear in - extracted text. -- `shutdown_cancels_in_flight_render` β€” assert in-flight render returns - the documented internal error. - -### Screenshot integration tests (`crates/engine/tests/chromium_screenshot.rs`) - -Marked `#[ignore]`; require `CHROME_PATH` env or system Chrome. - -- `screenshot_html_returns_valid_png` β€” bytes start with PNG magic - (`\x89PNG`). -- `screenshot_html_jpeg_format` β€” set format to JPEG; bytes start with - `0xFF 0xD8` (JPEG magic). -- `screenshot_url_captures_page` β€” navigate to local server, capture, - verify non-empty image. -- `screenshot_full_page` β€” render tall page, set `full_page = true`, - verify image height > viewport height. -- `screenshot_clip_rect` β€” set clip rectangle, verify output dimensions. -- `screenshot_markdown_renders` β€” convert Markdown to screenshot, verify - output is valid image. -- `screenshot_quality_jpeg` β€” set JPEG quality to 50, verify output - smaller than quality 100. - -### Doc tests (`engine/src/chromium/mod.rs`) - -Compile-only example showing the canonical usage from `@README.md:85-97`, -behind `#[cfg(doctest)]` `no_run`. - -## Acceptance - -- [ ] `crates/engine/src/chromium/mod.rs` exists with the full Public API. -- [ ] `chromiumoxide` and `pulldown-cmark` added to `crates/engine/Cargo.toml` - via `workspace.dependencies`. -- [ ] All unit tests in *Test plan* pass with `cargo test -p engine`. -- [ ] All ignored integration tests pass locally with a system Chrome. -- [ ] No `unsafe`. No `panic!` outside test code. -- [ ] `cargo clippy -p engine -- -D warnings` clean. -- [ ] `ChromiumEngine` is `Send + Sync + Clone` (assert via `static_assertions`). -- [ ] `shutdown` is idempotent (test). -- [ ] Screenshot methods (`screenshot_html`, `screenshot_url`, - `screenshot_markdown`) implemented. -- [ ] `ScreenshotOptions` and `ScreenshotFormat` types exist. -- [ ] Screenshot integration tests pass with system Chrome. - -## Out of scope / follow-ups - -- Screenshot routes (`/screenshot/*`) β€” implemented in this spec, server - routes in spec 30. -- Auto-download of Chrome β€” feature flag `auto-download` once stable. -- PDF/A and PDF/UA β€” picked up in spec 13 + a Ghostscript-style post-pass. -- Browser pool (multiple Chrome processes) β€” picked up in spec 30 once - benchmarks indicate need. diff --git a/docs/specs/12-engine-libreoffice.md b/docs/specs/12-engine-libreoffice.md deleted file mode 100644 index e961e44..0000000 --- a/docs/specs/12-engine-libreoffice.md +++ /dev/null @@ -1,337 +0,0 @@ -# Spec 12 β€” `engine::libreoffice::LibreOfficeEngine` - -> Office document β†’ PDF via the `soffice --headless` subprocess. - -## Goal - -Convert files in any LibreOffice-supported format (Word, Excel, PowerPoint, -ODF, RTF, CSV, etc.) to PDF bytes by orchestrating short-lived `soffice` -subprocesses, with isolated user profiles for safe concurrency, so the -server's `/forms/libreoffice/convert` route mirrors Gotenberg. - -## Scope - -**In:** - -- Discovery / configuration of the `soffice` binary. -- Single-file and multi-file conversion (with optional merge to one PDF). -- Per-call isolated `UserInstallation` profile. -- PDF/A-1b / A-2b / A-3b export via LibreOffice's filter options. -- Hard timeouts, structured error mapping. - -**Out:** - -- PDF post-processing (delegated to spec 13 for `merge`). -- Long-running `soffice` daemon mode β€” every call is a fresh subprocess. - (A pool may come later as a follow-up if benchmarks justify it.) -- Per-format quirks beyond what LibreOffice's CLI flags expose - (e.g., specific Excel range selection β€” out of MVP). - -## Public API - -Module path: `engine::libreoffice`, re-exported as -`engine::LibreOfficeEngine`. - -```rust -use crate::types::{EngineError, EngineResult, PageRanges}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; - -/// Wrapper around the `soffice` binary. Cheap to clone (`Arc` inside). -#[derive(Clone)] -pub struct LibreOfficeEngine { - inner: Arc<Inner>, // private: { exe, timeout, semaphore } -} - -#[derive(Debug, Clone)] -pub struct LibreOfficeConfig { - /// Path to `soffice` (or `libreoffice`). `None` = autodiscover. - pub executable: Option<PathBuf>, - /// Per-conversion timeout. Default 120s. - pub timeout: Duration, - /// Maximum concurrent subprocess invocations. Default `num_cpus::get()`. - pub max_concurrency: usize, -} - -impl Default for LibreOfficeConfig { - /* see Behavior */ -} - -impl LibreOfficeEngine { - /// Discover `soffice` on PATH and platform defaults. - pub async fn discover() -> EngineResult<Self>; - - pub async fn launch(config: LibreOfficeConfig) -> EngineResult<Self>; - - /// Convert one input file to PDF bytes. - pub async fn convert( - &self, - input: &Path, - opts: &OfficeOptions, - ) -> EngineResult<Vec<u8>>; - - /// Convert many inputs, optionally merging into a single PDF. - /// Inputs are converted in parallel up to `max_concurrency`. Output - /// order, when merging, follows input order. - pub async fn convert_many( - &self, - inputs: &[PathBuf], - opts: &OfficeOptions, - ) -> EngineResult<Vec<Vec<u8>>>; - - /// Returns true iff `soffice --version` succeeds within `timeout`. - pub async fn healthy(&self) -> bool; -} - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[serde(default, rename_all = "camelCase")] -pub struct OfficeOptions { - pub landscape: bool, - pub page_ranges: Option<PageRanges>, - /// PDF/A profile, if any. - pub pdf_a: Option<PdfAProfile>, - /// PDF/UA accessibility tagging. - pub pdf_ua: bool, - /// Quality knob for embedded raster images. 1..=100. None = LO default. - pub quality: Option<u8>, - /// Reduce image resolution (DPI). None = LO default. - pub max_image_resolution: Option<u32>, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum PdfAProfile { A1B, A2B, A3B } -``` - -## Behavior - -### `LibreOfficeConfig::default()` - -```rust -LibreOfficeConfig { - executable: None, - timeout: Duration::from_secs(120), - max_concurrency: std::thread::available_parallelism() - .map(|n| n.get()) - .unwrap_or(4), -} -``` - -### Executable discovery (`discover` / `launch` with `executable = None`) - -Search order, first hit wins; record the full searched list for -diagnostics: - -1. `$LIBREOFFICE_PATH` (env var). -2. `which soffice` then `which libreoffice`. -3. macOS: `/Applications/LibreOffice.app/Contents/MacOS/soffice`. -4. Linux: `/usr/bin/soffice`, `/usr/bin/libreoffice`, - `/usr/lib/libreoffice/program/soffice`, - `/snap/bin/libreoffice`, `/var/lib/flatpak/exports/bin/org.libreoffice.LibreOffice`. -5. Windows: `C:\Program Files\LibreOffice\program\soffice.exe`, - `C:\Program Files (x86)\LibreOffice\program\soffice.exe`. - -If none found β†’ `EngineError::Internal("LibreOffice not found: searched [...]")`. -(Reuses `EngineError::Internal` since spec 10 owns the enum; the message -is the discriminator.) - -After discovery, the engine probes with `soffice --headless --version` -under `config.timeout`. Probe failure β†’ `EngineError::Internal("LibreOffice probe failed: ...")`. - -### `convert(input, opts)` - -1. `input.exists()`; else `EngineError::Io(io::ErrorKind::NotFound)`. -2. Acquire one permit from the engine's `Semaphore(max_concurrency)`. -3. Create `tmp = tempfile::tempdir()` (auto-cleanup via Drop). -4. Create `user_dir = tmp.path().join("uipfx")` (LibreOffice - `UserInstallation`). Build `file://` URL. -5. Build `outdir = tmp.path().join("out")` and create it. -6. Build CLI args: - - ``` - --headless - --norestore --nologo --nodefault --nofirststartwizard - --convert-to <export-target> - --outdir <outdir> - "-env:UserInstallation=file:///<user_dir>" - <input absolute path> - ``` - - `<export-target>` is built per [filter rules](#export-filter): - - - Default: `pdf:writer_pdf_Export` (or the appropriate exporter β€” see - filter table) with options expressed as a JSON-ish blob: - `pdf:writer_pdf_Export:{"PageRange":{"type":"string","value":"1-3,5"},...}`. - -7. Spawn via `tokio::process::Command`, capture stdout/stderr, - wait under `tokio::time::timeout(config.timeout, child.wait_with_output())`. -8. On exit code 0: - - Locate the produced `<basename>.pdf` in `outdir`. - - Read and return the bytes; `tmp` drops, cleaning everything. -9. Non-zero exit: - - Try to extract the LibreOffice error message from stderr; map to - `EngineError::Internal(format!("soffice exit {code}: {stderr}"))`. -10. Timeout: kill child, return `EngineError::Timeout(config.timeout)`. - -### `convert_many(inputs, opts)` - -1. Empty input slice β†’ `Ok(vec![])`. -2. For each input, spawn a `tokio::task` calling `self.convert(input, opts)`. -3. `tokio::task::JoinSet::join_all` with the same global semaphore - gating concurrency. -4. Return `Vec<Vec<u8>>` in input order. - -`merge = true` is **not** part of `OfficeOptions`. Server / CLI layers -that want a single merged PDF must call `convert_many` and then -`engine::pdfops::merge` (spec 13). This keeps responsibilities clean and -avoids a circular dep between the libreoffice and pdfops modules. - -### Export filter - -| Input extension(s) | Exporter (CLI suffix) | -|-----------------------------------|----------------------------------| -| .doc .docx .odt .rtf .txt .html | `pdf:writer_pdf_Export` | -| .xls .xlsx .ods .csv | `pdf:calc_pdf_Export` | -| .ppt .pptx .odp | `pdf:impress_pdf_Export` | -| .odg .vsd .vsdx | `pdf:draw_pdf_Export` | -| (anything else) | `pdf` (let LO infer) | - -Detection is by lowercased extension only. The full table is kept inside -`engine::libreoffice::filter::for_extension(&str) -> &'static str`. - -### Filter parameters β†’ CLI options blob - -For `pdf:writer_pdf_Export` (and equivalents), append a `:{...}` JSON-ish -blob containing only the fields set by `OfficeOptions`. The serializer -produces LibreOffice's expected `{"Key":{"type":"...","value":...}}` -shape. Mapping: - -| `OfficeOptions` field | LO key | LO type | -|------------------------------------------|-----------------------|----------------| -| `page_ranges` (formatted as range string)| `PageRange` | `string` | -| `pdf_a = A1B` β†’ `1`, `A2B` β†’ `2`, `A3B`=`3` | `SelectPdfVersion` | `long` | -| `pdf_ua = true` | `PDFUACompliance` | `boolean` | -| `quality` | `Quality` | `long` | -| `max_image_resolution` | `MaxImageResolution` | `long` | -| `landscape = true` | `IsLandscape` | `boolean` | - -If no fields are set, the blob is omitted entirely (`pdf:writer_pdf_Export` -without the `:` suffix). - -### Concurrency / safety - -Concurrent `soffice` invocations are safe **only** if each uses a -distinct `UserInstallation` directory. The implementation guarantees -this by always allocating a fresh `tempdir` per call. - -The `Semaphore` is a backstop against fork-bombing the host when many -calls land at once; it does not affect correctness. - -### `healthy()` - -Run `soffice --headless --version` with a small (5s) timeout regardless -of `config.timeout`. Returns `true` on exit code 0 with non-empty stdout. - -## Errors - -Reuses `EngineError` from spec 10. Operative variants: - -| Variant | Source | -|-----------------------------|-------------------------------------------------------------------------------------| -| `Io` | Input file missing, tempdir creation failed. | -| `Timeout(timeout)` | `soffice` exceeded `config.timeout`. Child is force-killed. | -| `Internal(msg)` | Discovery / probe failed, soffice exited non-zero, or output PDF missing. | -| `InvalidOption(msg)` | `quality` outside 1..=100, `max_image_resolution` 0, or `page_ranges` empty string. | - -## Edge cases - -| Scenario | Required behavior | -|-------------------------------------------------------|-------------------------------------------------------------------------| -| Input path with non-UTF-8 chars | Pass through as `OsStr` to `Command::arg`; do not re-encode. | -| Input file is itself a `.pdf` | Allowed β€” LO will rewrite it. Useful for PDF/A retrofitting. | -| Filename collides with an existing file in `outdir` | Cannot happen: `outdir` is a fresh tempdir per call. | -| LibreOffice produces an empty PDF | Treated as success; bytes returned as-is. Validation is the caller's job. | -| `OfficeOptions::quality = 0` | `EngineError::InvalidOption("quality must be 1..=100")`. | -| `pdf_a = A1B` + `landscape = true` | Allowed; LO honors both. | -| Concurrent calls on slow machines | `Semaphore` queues them; total wall time is bounded by oldest pending. | -| Killed by SIGINT | Tempdir Drop runs; child receives SIGKILL via `Command::kill_on_drop`. | - -## Test plan - -### Unit tests (`crates/engine/src/libreoffice/mod.rs`) - -No subprocess required. - -- `discover_returns_searched_list_when_missing` β€” point env to a bogus - path, assert `EngineError::Internal` message contains every searched path. -- `for_extension_maps_writer_calc_impress_draw`. -- `for_extension_is_case_insensitive`. -- `for_extension_unknown_returns_pdf_fallback`. -- `office_options_default_emits_no_filter_blob`. -- `office_options_with_page_ranges_emits_pagerange_key`. -- `office_options_with_pdf_a_maps_select_pdf_version_long`. -- `office_options_quality_zero_rejected`. -- `office_options_quality_above_100_rejected`. -- `office_options_max_image_resolution_zero_rejected`. - -### Integration tests (`crates/engine/tests/libreoffice.rs`) - -`#[ignore]`d; require `soffice` on PATH or `LIBREOFFICE_PATH`. - -- `convert_docx_produces_valid_pdf` β€” fixture `tests/fixtures/office/sample.docx`, - assert bytes start with `%PDF-` and `lopdf::Document::load_mem` succeeds. -- `convert_xlsx_landscape_orientation` β€” when `landscape = true`, - rendered MediaBox is wider than tall. -- `convert_pptx_page_ranges` β€” `page_ranges = "1-1"` produces 1 page, - full doc produces N pages. -- `convert_with_pdf_a_2b_writes_pdfa_metadata` β€” rendered file's metadata - contains `pdfaid` namespace. -- `convert_many_preserves_order` β€” three inputs, timestamps ensure - parallel execution, output order matches input order. -- `convert_timeout_kills_child` β€” set `timeout = 100ms`; convert a heavy - fixture; assert `EngineError::Timeout` and verify no zombie soffice - process left behind (best-effort assertion via `pgrep`). -- `convert_missing_input_io_error` β€” non-existent path β†’ `EngineError::Io`. -- `convert_unsupported_format_falls_back_to_generic_filter` β€” give it a - weird extension; assert success. -- `concurrent_calls_use_distinct_user_dirs` β€” instrument by setting - `UserInstallation` to a captured path via a wrapper script; assert - paths differ across two parallel invocations. - -### Doc tests - -Compile-only example mirroring the Server's expected usage: - -```ignore -let lo = LibreOfficeEngine::discover().await?; -let pdf = lo.convert(Path::new("doc.docx"), &OfficeOptions::default()).await?; -``` - -## Acceptance - -- [ ] `crates/engine/src/libreoffice/mod.rs` exists and is `pub mod libreoffice` - from `lib.rs`. -- [ ] All public items in *Public API* compile and match signatures verbatim. -- [ ] `tempfile`, `tokio` (with `process` feature) added via - `workspace.dependencies`. -- [ ] `OfficeOptions::validate()` exists with the constraints noted under - *Errors*; called at the top of `convert` and `convert_many`. -- [ ] Filter table covered by exhaustive unit test - `for_extension_covers_table`. -- [ ] All unit tests pass with `cargo test -p engine`. -- [ ] All `#[ignore]` integration tests pass locally with a system `soffice`. -- [ ] `cargo clippy -p engine -- -D warnings` clean. -- [ ] No global mutable state. No `unsafe`. No leaked tempdirs. -- [ ] `LibreOfficeEngine` is `Send + Sync + Clone` (asserted via - `static_assertions`). - -## Out of scope / follow-ups - -- A long-running `soffice --headless --accept` daemon mode with UNO - socket multiplexing β€” separate spec when warranted by benchmarks. -- Bulk format conversion routes (e.g. `.docx β†’ .odt`); this engine is - PDF-only. -- Encrypted document passwords (`--password`-style flags). -- Custom UNO macros executed pre/post export. -- Page count reporting without parsing the produced PDF. diff --git a/docs/specs/13-engine-pdfops.md b/docs/specs/13-engine-pdfops.md deleted file mode 100644 index 70ddd31..0000000 --- a/docs/specs/13-engine-pdfops.md +++ /dev/null @@ -1,353 +0,0 @@ -# Spec 13 β€” `engine::pdfops` - -> Pure-Rust PDF post-processing via `lopdf`. Stateless free functions on -> in-memory PDF byte streams. - -## Goal - -Provide merge / split / flatten / metadata / watermark operations against -PDF byte streams, with no shell-out to `qpdf`, `pdfcpu`, or `pdftk`, so -the server's `/forms/pdfengines/*` routes mirror Gotenberg using only -Rust dependencies. - -## Scope - -**In:** - -- `merge`, `split`, `flatten`, `read_metadata`, `write_metadata`, - `watermark`, `rotate`. -- All ops accept and return owned `Vec<u8>`, taking and returning byte - buffers so they compose with the server's pipeline without filesystem - round-trips. - -**Out:** - -- Encryption / decryption (follow-up spec; needs RC4/AES wiring). -- PDF/A or PDF/UA conformance β€” these require Ghostscript-style passes. - Requested PDF/A from the LibreOffice path (spec 12) is honored there. -- Bookmarks read/write β€” follow-up. -- Image / OCR extraction β€” out of scope. - -## Public API - -Module path: `engine::pdfops`. All functions are free functions; the -module is stateless. - -```rust -use crate::types::{EngineError, EngineResult, PageRanges}; -use std::collections::BTreeMap; - -/// Concatenate a sequence of PDFs into a single document, preserving order. -/// Empty input slice is an error. -pub fn merge(pdfs: &[&[u8]]) -> EngineResult<Vec<u8>>; - -#[derive(Debug, Clone)] -pub enum SplitMode { - /// One output PDF per `PageRanges` chunk, in order. - /// Pages absent from any chunk are dropped. - ByRanges(Vec<PageRanges>), - /// Split every N pages, in order. Last chunk may be shorter. - EveryN(u32), - /// One output PDF per single page. - OnePagePerFile, -} - -pub fn split(pdf: &[u8], mode: &SplitMode) -> EngineResult<Vec<Vec<u8>>>; - -/// Flatten interactive form fields and annotations into static page content. -/// Idempotent on already-flat PDFs. -pub fn flatten(pdf: &[u8]) -> EngineResult<Vec<u8>>; - -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] -#[serde(default, rename_all = "PascalCase")] -pub struct Metadata { - pub title: Option<String>, - pub author: Option<String>, - pub subject: Option<String>, - pub keywords: Option<String>, - pub creator: Option<String>, - pub producer: Option<String>, - /// Wire format: "D:YYYYMMDDhhmmssΒ±hh'mm'" (PDF date string). - pub creation_date: Option<String>, - pub mod_date: Option<String>, - /// Custom info-dict entries; keys are PDF Name strings, ASCII only. - #[serde(skip_serializing_if = "BTreeMap::is_empty")] - pub custom: BTreeMap<String, String>, -} - -pub fn read_metadata(pdf: &[u8]) -> EngineResult<Metadata>; -/// Merge `meta` into the document's info dict. Fields set to `None` are -/// left untouched; fields set to `Some("")` are removed. -pub fn write_metadata(pdf: &[u8], meta: &Metadata) -> EngineResult<Vec<u8>>; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum Position { - Center, - TopLeft, TopCenter, TopRight, - MiddleLeft, MiddleRight, - BottomLeft, BottomCenter, BottomRight, -} - -#[derive(Debug, Clone)] -pub struct WatermarkOptions { - pub kind: WatermarkKind, - /// 0.0..=1.0; values outside are clamped. - pub opacity: f32, - pub rotation_deg: f32, - pub position: Position, - /// Apply on every page (true) or only odd pages (false β†’ "stamp first"). - /// Most callers want true. - pub all_pages: bool, - /// Tile across the page surface. - pub tiled: bool, -} - -#[derive(Debug, Clone)] -pub enum WatermarkKind { - Text { - text: String, - /// PostScript font name. None = `Helvetica`. - font: Option<String>, - /// Point size. Default 48. - font_size: f32, - /// RGBA in 0..=1. - color: [f32; 4], - }, - ImagePng { bytes: Vec<u8> }, -} - -pub fn watermark(pdf: &[u8], opts: &WatermarkOptions) -> EngineResult<Vec<u8>>; - -/// Rotate pages by 0/90/180/270 degrees (clockwise). Other angles β†’ error. -pub fn rotate(pdf: &[u8], pages: &PageRanges, angle_deg: i32) -> EngineResult<Vec<u8>>; -``` - -## Behavior - -### `merge(pdfs)` - -1. Empty slice β†’ `EngineError::InvalidOption("merge requires at least one input")`. -2. Single input β†’ return a clone of the input bytes after a parse round-trip - (validates input). On parse failure β†’ `EngineError::Internal`. -3. Otherwise: - 1. Load each input via `lopdf::Document::load_mem(bytes)`. - 2. Use `lopdf` page-tree concatenation (the canonical pattern: assemble - a fresh `Document`, renumber object IDs to avoid collision via - `Document::renumber_objects()`, then build a unified `/Pages` tree). - 3. Copy `/Outlines` if present from the **first** input only (do not - attempt to merge bookmarks; out of scope). - 4. Drop `/AcroForm` and `/Names` to avoid name collisions. - 5. Set `/Producer` to `"folio/<version>"`. - 6. Save to `Vec<u8>` via `Document::save_to(&mut Vec<u8>)`. - -### `split(pdf, mode)` - -1. Parse via `lopdf::Document::load_mem`. -2. Determine `total = doc.get_pages().len() as u32`. -3. For each chunk, build the **inclusive** list of 1-indexed page numbers: - - `ByRanges(rs)`: `rs.iter().map(|r| pages_for(r, total))`. Empty - resolved chunk after clamping β†’ skipped (do not produce empty PDFs). - - `EveryN(n)`: `n == 0` β†’ `EngineError::InvalidOption("EveryN requires N >= 1")`. - Otherwise produce `ceil(total / n)` chunks of size at most `n`. - - `OnePagePerFile`: produce `total` chunks, one page each. -4. For each chunk: clone the source `Document`, call - `Document::delete_pages(&pages_to_remove)`, save to `Vec<u8>`. -5. Return the chunks in the order they were generated. - -### `flatten(pdf)` - -1. Parse via `lopdf`. -2. Walk the page tree; for each page: - 1. Iterate `/Annots` array. For each annotation: - - If it's a widget annotation referencing a form field with a - rendered appearance (`/AP /N`), append the appearance stream as - a Form XObject and `Do` it from the page's content stream. - - Other annotation types are dropped (the goal of flattening). - 2. Remove the page's `/Annots` entry. -3. Remove `/AcroForm` from the catalog. -4. Save. - -The implementation MUST handle the common case of unfilled forms by -simply removing widgets without crashing. PDFs without forms or -annotations are returned re-serialized but logically identical. - -### `read_metadata(pdf)` - -1. Parse via `lopdf`. -2. Read `/Info` reference from the trailer; if absent, return - `Metadata::default()`. -3. Decode each known key (`Title`, `Author`, ...) as PDF text string - (handles both `()`-literal and `<>`-hex encodings, and the - UTF-16BE BOM convention). -4. All other entries land in `custom`, with keys as ASCII Names. - -### `write_metadata(pdf, meta)` - -1. Parse. -2. Get-or-create the `/Info` dictionary. -3. For each `Some` field on `meta`: - - If the value is `""`, delete the key. - - Otherwise set it as a PDF text string. Strings with non-ASCII - characters use the UTF-16BE BOM encoding. -4. Custom keys: same rule. Reject keys not matching `^[A-Za-z][A-Za-z0-9_-]{0,127}$` - with `EngineError::InvalidOption`. -5. Always update `/ModDate` to "now" in PDF date format unless - `meta.mod_date` is already set. -6. Save. - -### `watermark(pdf, opts)` - -1. Validate: - - `opacity` clamped to `0.0..=1.0`. - - `rotation_deg` not constrained. - - `WatermarkKind::Text { font_size, .. }` requires `font_size > 0.0`, - else `EngineError::InvalidOption`. - - `WatermarkKind::ImagePng { bytes }`: bytes must start with the PNG - signature `\x89PNG\r\n\x1a\n`, else `EngineError::InvalidOption`. -2. Parse the input. -3. Build a Form XObject containing the watermark content: - - Text: a single `BT ... ET` block with `Tf`, `rg/RG`, `cm` (rotation + - translation), and `Tj` / `TJ`. Use the chosen font (default - `Helvetica`); embed via `BaseFont`. - - Image: embed the PNG as an `Image` XObject. Use a transparent - `Group { S /Transparency }` to support opacity. -4. For each page (or odd pages if `all_pages = false`): - 1. Resolve page MediaBox. - 2. Compute the placement matrix: - - If `tiled`, repeat the XObject in a grid. Spacing = 1.5 Γ— bbox - of the watermark XObject. - - Else, single placement at `Position` with offset 0. - 3. Append a content stream that runs `q ... cm ... gs ... Do Q`. -5. Save. - -### `rotate(pdf, pages, angle_deg)` - -1. `angle_deg.rem_euclid(360)` must be in `{0, 90, 180, 270}`, else - `EngineError::InvalidOption("angle must be 0/90/180/270")`. -2. Parse. -3. For each page p in 1..=total: if `pages.contains(p, total)`, set - `/Rotate` to `(existing + angle_deg).rem_euclid(360)`. -4. Save. - -### General - -- All ops set `/Producer = "folio/<CARGO_PKG_VERSION>"` (overwrite). -- All ops preserve the input version unless an op fundamentally requires - bumping (none in MVP). -- All ops compress streams with `FlateDecode` on save. - -## Errors - -Reuses `EngineError` from spec 10: - -| Variant | Source | -|--------------------------|------------------------------------------------------------------------| -| `InvalidOption(msg)` | Bad PNG header, invalid angle, empty merge input, EveryN with N=0, etc.| -| `InvalidPageRange(msg)` | `split(ByRanges)` chunk yields empty page set after parse. | -| `Internal(msg)` | `lopdf` parse / save failures, encrypted documents in MVP. | - -Encrypted documents are detected at parse time (`lopdf::Document::is_encrypted`) -and rejected with `EngineError::Internal("encrypted PDFs are not supported in MVP")`. - -## Edge cases - -| Scenario | Required behavior | -|-------------------------------------------------------|--------------------------------------------------------------------| -| `merge(&[a])` with valid `a` | Returns a parse-resaved copy of `a`. | -| `merge` with one corrupted input | `EngineError::Internal("merge: input #2: ...")` β€” never panic. | -| `split(EveryN(7))` on 3-page doc | Returns one chunk with all 3 pages. | -| `split(ByRanges([1-1000]))` on 3-page doc | Returns one chunk with pages 1..=3 (clamped). | -| `split(ByRanges([5-10]))` on 3-page doc | Empty resolved chunk β†’ skipped; result `vec![]`. | -| Repeated `flatten` calls | Idempotent. Second call returns identical (modulo timestamps). | -| `read_metadata` on PDF without `/Info` | `Metadata::default()`. | -| `write_metadata` with unicode title | Stored as UTF-16BE with BOM. | -| `write_metadata { custom: { "bad name!": ... } }` | `EngineError::InvalidOption`. | -| Watermark on encrypted PDF | `EngineError::Internal("encrypted PDFs are not supported in MVP")`. | -| `rotate(pages = "")` | Caught by spec 10's `PageRanges::parse`. | -| `rotate(angle_deg = 360)` | Treated as 0 β€” no-op write that re-saves bytes. | - -## Test plan - -All in `crates/engine/src/pdfops/mod.rs` plus -`crates/engine/tests/pdfops.rs`. - -### Unit tests (no fixtures required) - -- `merge_empty_input_rejected`. -- `merge_invalid_option_message_includes_index`. -- `split_every_n_zero_rejected`. -- `split_every_n_clamps_when_total_smaller_than_n`. -- `split_by_ranges_skips_empty_chunks`. -- `rotate_invalid_angle_rejected`. -- `rotate_normalizes_360_to_0_noop`. -- `metadata_default_when_info_dict_missing`. -- `write_metadata_rejects_invalid_custom_key`. -- `write_metadata_empty_string_removes_key`. -- `watermark_png_header_validation`. -- `watermark_negative_font_size_rejected`. -- `producer_set_after_each_op`. - -### Integration tests (`crates/engine/tests/pdfops.rs`) - -These use small PDF fixtures committed under -`crates/engine/tests/fixtures/pdf/` (each <50 KB): - -- `single_page_a4.pdf`, `three_page_letter.pdf`, `with_form.pdf`, - `with_annotations.pdf`, `unicode_title.pdf`. - -Tests: - -- `merge_two_singles_yields_two_pages` β€” load result, page count == 2. -- `merge_preserves_order` β€” first page is from input A, second from B. -- `split_every_n_yields_expected_counts` β€” 3-page doc split N=2 yields - chunks of 2 + 1. -- `split_by_ranges_extracts_specific_pages`. -- `flatten_removes_form_fields` β€” input `with_form.pdf` produces output - whose AcroForm dict is absent. -- `flatten_idempotent` β€” flatten ∘ flatten = flatten (byte-stable - modulo `/ModDate`). -- `read_write_metadata_round_trip` β€” write Title="Hello", read back equal. -- `read_metadata_unicode_title` β€” `with_unicode_title.pdf` decodes to - the expected Rust `String`. -- `watermark_text_appears_on_every_page` β€” flatten then text-extract - via `lopdf`; assert the watermark string is present per page. -- `watermark_image_png_validates_signature` β€” corrupt header β†’ error. -- `rotate_only_targeted_pages` β€” three-page doc, rotate 1,3 by 90Β°, - verify `/Rotate` on pages 1 and 3 only. -- `encrypted_input_rejected` β€” fixture `encrypted.pdf`, every public - function returns the documented error. - -### Property tests (`proptest`) - -- `merge_associative_for_two_groupings` β€” for any 3-element vector of - small valid PDFs, `merge(merge(a, b), c) == merge(a, merge(b, c))` in - page count and ordering. -- `split_then_merge_round_trips_page_count` β€” split EveryN, merge back, - page count equal. - -## Acceptance - -- [ ] `crates/engine/src/pdfops/mod.rs` exists and is `pub mod pdfops` - from `lib.rs`. -- [ ] Public API matches verbatim, including module-level free functions. -- [ ] `lopdf` and `proptest` (dev-only) added via `workspace.dependencies`. -- [ ] All ops are stateless; no `static`s, no `lazy_static`, no global - mutable state. -- [ ] All ops set `/Producer` to `folio/<crate version>`. -- [ ] Encrypted-input rejection covered by an explicit unit test. -- [ ] All unit tests pass with `cargo test -p engine`. -- [ ] All integration tests pass with `cargo test -p engine`. -- [ ] All property tests pass. -- [ ] `cargo clippy -p engine -- -D warnings` clean. -- [ ] No `unsafe`. No `.unwrap()` outside `#[cfg(test)]` and `#[test]`. - -## Out of scope / follow-ups - -- Encrypt / decrypt with user/owner passwords. -- Embed missing fonts (would require font subsetting). -- Bookmarks read/write. -- Stamp (similar to watermark but not opacity-blended) β€” likely a thin - variant of `watermark` once the latter is solid. -- PDF linearization ("Fast Web View"). -- Image extraction. diff --git a/docs/specs/14-engine-pdfa.md b/docs/specs/14-engine-pdfa.md deleted file mode 100644 index f9ea434..0000000 --- a/docs/specs/14-engine-pdfa.md +++ /dev/null @@ -1,204 +0,0 @@ -# Spec 14 β€” `engine::pdfa` - -> PDF/A and PDF/UA conformance conversion via Ghostscript or qpdf. -> Stateless free functions on in-memory PDF byte streams. - -## Goal - -Provide PDF/A-1b, PDF/A-2b, PDF/A-3b, and PDF/UA conformance conversion -for existing PDF documents. This enables enterprise archival compliance -and accessibility standards. - -## Scope - -**In:** - -- PDF/A-1b, PDF/A-2b, PDF/A-3b conversion (archival compliance). -- PDF/UA-1, PDF/UA-2 conversion (accessibility). -- Validation of output against veraPDF or similar. -- Shell-out to `gs` (Ghostscript) or `qpdf` for actual conversion. -- Server endpoint `/forms/pdfengines/convert` with `pdfa` form field. - -**Out:** - -- Creating PDF/A from scratch (convert from HTML/Office via Chromium/LibreOffice). -- PDF/A-1a, PDF/A-2a, PDF/A-3a (full conformance with logical structure). -- Repairing malformed PDFs that cannot be parsed. - -## Public API - -Module path: `engine::pdfa`. Stateless free functions. - -```rust -use crate::types::{EngineError, EngineResult}; - -/// PDF/A conformance levels for archival compliance. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum PdfAProfile { - /// PDF/A-1b: Basic conformance (Level B) for PDF 1.4. - PdfA1b, - /// PDF/A-2b: Basic conformance (Level B) for PDF 1.7. - PdfA2b, - /// PDF/A-3b: Basic conformance (Level B) with embedded files support. - PdfA3b, -} - -/// PDF/UA conformance levels for accessibility. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum PdfUaProfile { - /// PDF/UA-1: Universal Accessibility (ISO 14289-1). - PdfUa1, - /// PDF/UA-2: Updated accessibility standard (ISO 14289-2). - PdfUa2, -} - -/// Convert a PDF to PDF/A conformance. -/// -/// Uses Ghostscript's pdfwrite device with PDF/A settings. -/// Falls back to qpdf if Ghostscript is unavailable. -pub fn convert_to_pdfa(pdf: &[u8], profile: PdfAProfile) -> EngineResult<Vec<u8>>; - -/// Convert a PDF to PDF/UA accessibility conformance. -/// -/// Adds accessibility features and validates logical structure. -pub fn convert_to_pdfua(pdf: &[u8], profile: PdfUaProfile) -> EngineResult<Vec<u8>>; - -/// Validate a PDF against a PDF/A or PDF/UA profile. -/// -/// Returns validation report with passed/failed rules. -/// Requires external tool (veraPDF or qpdf validation). -pub fn validate(pdf: &[u8], profile: PdfAValidationProfile) -> EngineResult<ValidationReport>; - -#[derive(Debug, Clone)] -pub struct ValidationReport { - pub compliant: bool, - pub profile: String, - pub failed_rules: Vec<RuleViolation>, - pub warnings: Vec<String>, -} - -#[derive(Debug, Clone)] -pub struct RuleViolation { - pub rule_id: String, - pub description: String, - pub severity: Severity, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Severity { - Error, - Warning, -} -``` - -## Implementation Strategy - -### Option 1: Ghostscript (Primary) - -Ghostscript's `pdfwrite` device has built-in PDF/A conversion: - -```bash -gs -dPDFA=1 -dBATCH -dNOPAUSE -sProcessColorModel=DeviceRGB \ - -sDEVICE=pdfwrite -sPDFACompatibilityPolicy=1 \ - -sOutputFile=output.pdf input.pdf -``` - -Pros: -- Industry standard, widely tested -- Handles color model conversion -- Built-in font embedding checks - -Cons: -- Large dependency (~50MB) -- Slower than pure-Rust alternatives - -### Option 2: qpdf (Fallback) - -qpdf has limited PDF/A support via `--qpdf` and `--set-pdf-a`: - -```bash -qpdf --qpdf --set-pdf-a input.pdf output.pdf -``` - -Pros: -- Already in our Docker images -- Fast, pure transformation - -Cons: -- Limited profile support -- No color model conversion - -### Decision - -**Primary:** Ghostscript for full PDF/A-1b/2b/3b support -**Fallback:** qpdf for basic compliance marking - -## Server API - -New endpoint mirroring Gotenberg: - -``` -POST /forms/pdfengines/convert -``` - -Form fields: -- `files` - Input PDF file(s) -- `pdfa` - Profile: `PDF/A-1b`, `PDF/A-2b`, `PDF/A-3b` -- `pdfua` - Profile: `PDF/UA-1`, `PDF/UA-2` (mutually exclusive with `pdfa`) - -Response: -- Converted PDF with proper `Content-Type: application/pdf` -- `Content-Disposition` with `.pdf` suffix - -## Error Handling - -| Error | Condition | -|-------|-----------| -| `EngineError::InvalidInput` | Input not a valid PDF | -| `EngineError::ConversionFailed` | Ghostscript/qpdf error | -| `EngineError::ProfileUnsupported` | Profile not available | -| `EngineError::Timeout` | Conversion exceeded limit | - -## Testing - -Unit tests: -- Convert sample PDFs to each profile -- Verify output opens without error -- Check PDF version header changed appropriately - -Integration tests (BDD): -- Gotenberg feature parity: `pdfengines_convert.feature` -- veraPDF validation of output -- Binary size not exploded - -## Dependencies - -```toml -[dependencies] -# Shell execution -tokio = { version = "1", features = ["process"] } - -[dev-dependencies] -# PDF parsing for verification -pdf-extract = "0.8" -``` - -Runtime requirements: -- `gs` (Ghostscript 9.50+) OR `qpdf` (10.6+) -- veraPDF (optional, for validation testing) - -## Open Questions - -1. Should we embed Ghostscript in Docker or make it optional? -2. Do we need PDF/A-3b file embedding support? -3. Should validation be a separate endpoint? - -## References - -- ISO 19005-1 (PDF/A-1) -- ISO 19005-2 (PDF/A-2) -- ISO 19005-3 (PDF/A-3) -- ISO 14289 (PDF/UA) -- Ghostscript PDF/A docs: https://ghostscript.com/doc/VectorDevices.htm#PDFA diff --git a/docs/specs/15-webhook.md b/docs/specs/15-webhook.md deleted file mode 100644 index d47d524..0000000 --- a/docs/specs/15-webhook.md +++ /dev/null @@ -1,270 +0,0 @@ -# Spec 15 β€” Webhook System - -> Asynchronous processing with HTTP callbacks. -> Enables non-blocking PDF operations with webhook notifications. - -## Goal - -Provide async request processing where Folio calls a user-provided webhook -URL when processing completes (success or error). Mirrors Gotenberg's -webhook functionality for long-running operations. - -## Scope - -**In:** - -- Async mode via `Gotenberg-Async: true` header. -- Webhook callback via `Gotenberg-Webhook-Url` header. -- Error webhook via `Gotenberg-Webhook-Error-Url` header (optional). -- Extra HTTP headers for webhook requests. -- JSON event payload with result metadata. -- In-memory job queue (phase 1) β†’ persistent queue (phase 2). - -**Out:** - -- Webhook signature verification (HMAC) β€” follow-up security spec. -- Webhook retry with exponential backoff β€” basic retry only. -- Event sourcing / webhook events endpoint β€” basic callback only. - -## Public API (Internal) - -Module path: `server::webhook`. Internal to server crate. - -```rust -use axum::http::HeaderMap; - -/// Webhook configuration extracted from request headers. -#[derive(Debug, Clone)] -pub struct WebhookConfig { - /// Primary webhook URL for success notifications. - pub webhook_url: String, - /// Optional separate URL for error notifications. - pub error_url: Option<String>, - /// Extra headers to include in webhook requests. - pub extra_headers: HeaderMap, - /// Run synchronously even if webhooks configured (sync mode override). - pub sync_mode: bool, -} - -/// Extract webhook config from request headers. -pub fn extract_webhook_config(headers: &HeaderMap) -> Option<WebhookConfig>; - -/// Job handle for async processing. -pub struct WebhookJob { - pub id: String, - pub operation: Operation, - pub config: WebhookConfig, -} - -/// Operations that support async/webhooks. -#[derive(Debug, Clone)] -pub enum Operation { - ChromiumConvertHtml { html: Vec<u8>, opts: PdfOptions }, - ChromiumConvertUrl { url: String, opts: PdfOptions }, - LibreOfficeConvert { file: Vec<u8>, opts: OfficeOptions, filename: String }, - PdfMerge { files: Vec<Vec<u8>> }, - PdfSplit { file: Vec<u8>, mode: SplitMode }, - PdfConvert { file: Vec<u8>, profile: PdfAProfile }, -} - -/// Spawn async job and return job ID immediately. -pub async fn spawn_webhook_job( - job: WebhookJob, - state: AppState, -) -> Result<String, WebhookError>; - -/// Deliver webhook callback with result. -pub async fn deliver_webhook( - url: &str, - result: &WebhookResult, - extra_headers: &HeaderMap, -) -> Result<(), WebhookError>; - -/// Webhook result payload. -#[derive(Debug, Clone, Serialize)] -pub struct WebhookResult { - pub job_id: String, - pub status: JobStatus, - pub operation: String, - pub filename: Option<String>, - pub error: Option<String>, - #[serde(skip_serializing_if = "Option::is_none")] - pub duration_ms: Option<u64>, -} - -#[derive(Debug, Clone, Copy, Serialize)] -#[serde(rename_all = "lowercase")] -pub enum JobStatus { - Success, - Error, -} -``` - -## HTTP API - -### Headers (Request) - -| Header | Required | Description | -|--------|----------|-------------| -| `Gotenberg-Async` | No | `true` to enable async mode | -| `Gotenberg-Webhook-Url` | Yes* | Webhook URL for success | -| `Gotenberg-Webhook-Error-Url` | No | Separate URL for errors | -| `Gotenberg-Webhook-Extra-Http-Headers` | No | JSON object of extra headers | - -*Required if `Gotenberg-Async: true` - -### Headers (Webhook Request) - -Folio sends POST to webhook URL with: - -| Header | Value | -|--------|-------| -| `Content-Type` | `application/json` or `application/pdf` | -| `Gotenberg-Trace` | Correlation ID from original request | -| `X-Request-Id` | Folio's request ID | -| User's extra headers | As specified | - -### Response (Async Mode) - -When async mode enabled, immediate response: - -```http -HTTP/1.1 202 Accepted -Gotenberg-Trace: <correlation-id> - -{"job_id": "uuid", "status": "pending"} -``` - -### Webhook Payload (Success) - -```json -{ - "job_id": "uuid", - "status": "success", - "operation": "chromium_convert_html", - "filename": "result.pdf", - "duration_ms": 1234 -} -``` - -With PDF attached as binary body, or download URL if configured for storage. - -### Webhook Payload (Error) - -```json -{ - "job_id": "uuid", - "status": "error", - "operation": "pdf_merge", - "error": "Failed to parse PDF: invalid xref", - "duration_ms": 500 -} -``` - -## Implementation Strategy - -### Option 1: In-Memory Queue (Phase 1) - -Use `tokio::task::spawn` + `tokio::sync::mpsc` channel: - -```rust -pub struct WebhookQueue { - sender: mpsc::Sender<WebhookJob>, - receiver: Arc<Mutex<mpsc::Receiver<WebhookJob>>>, -} -``` - -Pros: -- Simple, no external dependencies -- Fast for moderate load - -Cons: -- Jobs lost on restart -- No horizontal scaling - -### Option 2: Persistent Queue (Phase 2) - -SQLite or Redis-backed queue: - -```rust -pub struct PersistentQueue { - db: SqlitePool, -} -``` - -Pros: -- Survives restarts -- Can scale horizontally - -Cons: -- Additional dependency - -### Decision - -**Phase 1:** In-memory queue with optional SQLite persistence. - -## Architecture - -``` -Request β†’ Extract Webhook Config - β†’ If async: Queue Job β†’ Return 202 - β†’ Worker processes job - β†’ POST result to webhook URL -``` - -Worker pool: -- 4 concurrent webhook processors (configurable) -- Timeout: 30s for webhook delivery -- Retry: 3 attempts with 5s delay - -## Error Handling - -| Error | Action | -|-------|--------| -| Invalid webhook URL | 400 Bad Request | -| Webhook timeout | Retry 2x, then fail | -| Webhook 4xx/5xx | Retry 2x, then fail | -| Job processing error | Send to error webhook | - -## Security Considerations - -1. **URL validation** - Reject private IPs, localhost (configurable) -2. **SSRF protection** - DNS rebinding checks -3. **HMAC signatures** - Optional webhook signing (follow-up) -4. **Rate limiting** - Per-webhook rate limits - -## Testing - -Unit tests: -- Webhook config extraction from headers -- URL validation (allow/block lists) -- Job serialization/deserialization - -Integration tests: -- End-to-end async conversion with webhook -- Error webhook delivery -- Retry behavior - -## Dependencies - -```toml -[dependencies] -# HTTP client for webhook delivery -reqwest = { version = "0.12", features = ["json"] } -# Job queue (in-memory) -tokio = { version = "1", features = ["sync", "rt"] } -# URL validation -url = "2" -``` - -## Open Questions - -1. Should we support webhook body in sync mode too? -2. File storage for large outputs vs streaming? -3. Webhook signature verification (HMAC) priority? -4. Should we add webhook events API (list/deliveries)? - -## References - -- Gotenberg webhook docs: https://gotenberg.dev/docs/webhook -- CloudEvents spec for webhook payload structure diff --git a/docs/specs/16-bookmarks.md b/docs/specs/16-bookmarks.md deleted file mode 100644 index 5e94276..0000000 --- a/docs/specs/16-bookmarks.md +++ /dev/null @@ -1,197 +0,0 @@ -# Spec 16 β€” PDF Bookmarks (Outlines) - -> Read and write PDF document outlines (bookmarks/table of contents). -> Enables navigation structures in PDF documents. - -## Goal - -Provide read/write access to PDF bookmark hierarchies (Outlines in PDF -terminology). This allows generating tables of contents, extracting -document structure, and adding navigation to merged documents. - -## Scope - -**In:** - -- Read existing bookmark/outline structure from PDF. -- Write new bookmarks to PDF (replacing existing). -- Hierarchical bookmarks with nested children. -- Page number references (0-indexed or 1-indexed configurable). -- JSON serialization for API wire format. - -**Out:** - -- Partial bookmark updates (merge with existing). -- Text position anchors (only page-level). -- Named destinations (follow-up spec). - -## Public API - -Module path: `engine::bookmarks`. Stateless free functions. - -```rust -use crate::types::{EngineError, EngineResult}; - -/// A single bookmark entry. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Bookmark { - /// Display text for the bookmark. - pub title: String, - /// Target page number (1-indexed for user convenience). - pub page: u32, - /// Nesting level (1 = top level, 2 = child, etc.). - #[serde(skip_serializing_if = "Option::is_none")] - pub level: Option<u32>, - /// Child bookmarks (nested outline items). - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub children: Vec<Bookmark>, -} - -/// Read bookmarks from a PDF document. -/// -/// Returns empty vector if document has no outline. -pub fn read_bookmarks(pdf: &[u8]) -> EngineResult<Vec<Bookmark>>; - -/// Write bookmarks to a PDF document. -/// -/// Replaces any existing outline. Bookmarks reference pages by 1-based -/// page numbers. Returns modified PDF with new outline. -pub fn write_bookmarks(pdf: &[u8], bookmarks: &[Bookmark]) -> EngineResult<Vec<u8>>; - -/// Flatten nested bookmark structure to a list. -/// -/// Useful for linear processing. Level indicates nesting depth. -pub fn flatten_bookmarks(bookmarks: &[Bookmark]) -> Vec<(u32, String, u32)>; -// Returns: (level, title, page) -``` - -## Bookmark Structure - -### JSON Format (API) - -```json -[ - { - "title": "Chapter 1", - "page": 1, - "children": [ - {"title": "Section 1.1", "page": 3}, - {"title": "Section 1.2", "page": 5} - ] - }, - { - "title": "Chapter 2", - "page": 10 - } -] -``` - -### Flat Format Alternative - -For simple lists without nesting: - -```json -[ - {"title": "Chapter 1", "page": 1, "level": 1}, - {"title": "Section 1.1", "page": 3, "level": 2}, - {"title": "Chapter 2", "page": 10, "level": 1} -] -``` - -## Implementation Strategy - -### PDF Structure - -PDF bookmarks are stored in the `/Outlines` hierarchy: - -``` -/Outlines (dictionary) - /First β†’ OutlineItem - /Last β†’ OutlineItem - /Count β†’ total count - -OutlineItem (dictionary) - /Title (string) - /Dest β†’ [page_ref, /Fit] - /Parent β†’ parent OutlineItem or Outlines - /First, /Last β†’ child items (if has children) - /Next, /Prev β†’ sibling items -``` - -### Using `lopdf` - -1. **Read**: Traverse `/Outlines` β†’ `/First` chain, following `/Next` pointers, - recursively collecting `/Title` and `/Dest` page references. - -2. **Write**: Create new outline dictionary, build linked list of items, - set up parent/child/next/prev references, replace `/Outlines` in catalog. - -## Server API - -### Read Bookmarks - -``` -POST /forms/pdfengines/bookmarks/read -``` - -Form fields: -- `files` - Single PDF file - -Response (200 OK): -```json -{ - "filename.pdf": [ - {"title": "Chapter 1", "page": 1, "children": [...]} - ] -} -``` - -### Write Bookmarks - -``` -POST /forms/pdfengines/bookmarks/write -``` - -Form fields: -- `files` - Single PDF file -- `bookmarks` - JSON array of bookmarks - -Response (200 OK): -- PDF file with bookmarks applied -- `Content-Disposition: attachment; filename="result.pdf"` - -## Error Handling - -| Error | Condition | -|-------|-----------| -| `EngineError::InvalidInput` | PDF has no catalog or is malformed | -| `EngineError::InvalidBookmark` | Bookmark references non-existent page | -| `EngineError::EmptyInput` | Empty bookmark list (valid, clears outline) | - -## Testing - -Unit tests: -- Read bookmarks from sample PDFs -- Write bookmarks, read back, verify round-trip -- Nested hierarchy preservation -- Page number edge cases (first page, last page) - -Integration tests: -- Gotenberg feature parity: `pdfengines_bookmarks.feature` -- Compare with `pdfinfo -meta` output - -## Dependencies - -Uses existing `lopdf` dependency (already in pdfops). - -## Open Questions - -1. Should we support named destinations (/Dest as name vs array)? -2. Should we preserve existing bookmarks and merge vs replace? -3. Unicode bookmark titles - any encoding issues? - -## References - -- ISO 32000-2:2017, Section 12.3.3 (Document Outlines) -- PDF 1.7 spec, Section 8.2.2 (Outline Hierarchy) diff --git a/docs/specs/17-watermark.md b/docs/specs/17-watermark.md deleted file mode 100644 index 4a2e0b4..0000000 --- a/docs/specs/17-watermark.md +++ /dev/null @@ -1,280 +0,0 @@ -# Spec 17 β€” PDF Watermark & Stamp - -> Overlay images or text onto PDF pages. -> Watermark appears behind content, Stamp appears in front. - -## Goal - -Provide watermark and stamp functionality for PDF documents, allowing -users to overlay images (PNG, JPEG) or text on pages at configurable -positions with opacity control. - -## Scope - -**In:** - -- Image watermark/stamp (PNG, JPEG support via image crate). -- Text watermark/stamp (with font selection). -- Position control: center, corners, edges, custom coordinates. -- Opacity/transparency (0.0 to 1.0). -- Rotation (degrees). -- Page range selection (all pages, odd, even, specific pages). -- Watermark (behind content) vs Stamp (in front of content). - -**Out:** - -- SVG watermarks (rasterize first). -- Multi-page watermark documents. -- Animated watermarks. -- Pattern fills. - -## Public API - -Module path: `engine::watermark`. Stateless free functions. - -```rust -use crate::types::{EngineError, EngineResult}; - -/// Type of overlay. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OverlayType { - /// Watermark appears behind page content. - Watermark, - /// Stamp appears in front of page content. - Stamp, -} - -/// Content to overlay. -#[derive(Debug, Clone)] -pub enum OverlayContent { - /// Image file bytes (PNG or JPEG). - Image { data: Vec<u8>, format: ImageFormat }, - /// Text with font specification. - Text { text: String, font: FontSpec }, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ImageFormat { - Png, - Jpeg, -} - -#[derive(Debug, Clone)] -pub struct FontSpec { - /// Font family name. - pub family: String, - /// Font size in points. - pub size: f32, - /// RGB color (0-255 each). - pub color: (u8, u8, u8), - /// Bold, italic, etc. - pub style: FontStyle, -} - -#[derive(Debug, Clone, Copy, Default)] -pub struct FontStyle { - pub bold: bool, - pub italic: bool, -} - -/// Position on page for overlay. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Position { - Center, - TopLeft, TopCenter, TopRight, - MiddleLeft, MiddleRight, - BottomLeft, BottomCenter, BottomRight, - /// Custom position in PDF points from bottom-left. - Custom { x: f32, y: f32 }, -} - -/// Scale mode for image overlays. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ScaleMode { - /// Original size in pixels. - Original, - /// Fit within page maintaining aspect ratio. - FitPage, - /// Fill page maintaining aspect ratio (may crop). - FillPage, - /// Custom width/height in points. - Custom { width: f32, height: f32 }, -} - -/// Watermark/stamp options. -#[derive(Debug, Clone)] -pub struct WatermarkOptions { - /// Watermark or stamp. - pub overlay_type: OverlayType, - /// Content to overlay. - pub content: OverlayContent, - /// Position on page. - pub position: Position, - /// Opacity 0.0-1.0. - pub opacity: f32, - /// Rotation in degrees (0 = no rotation). - pub rotation: f32, - /// Page range to apply. - pub pages: PageSelection, - /// Scale mode for images. - pub scale: ScaleMode, -} - -/// Page selection for watermark application. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum PageSelection { - All, - First, - Last, - Odd, - Even, - Range(u32, u32), // start, end (1-indexed, inclusive) -} - -/// Apply watermark or stamp to PDF. -/// -/// Returns new PDF with overlay applied. -pub fn apply_watermark( - pdf: &[u8], - opts: &WatermarkOptions, -) -> EngineResult<Vec<u8>>; - -/// Convenience: apply image watermark. -pub fn apply_image_watermark( - pdf: &[u8], - image: &[u8], - format: ImageFormat, - position: Position, - opacity: f32, -) -> EngineResult<Vec<u8>> { - let opts = WatermarkOptions { - overlay_type: OverlayType::Watermark, - content: OverlayContent::Image { data: image.to_vec(), format }, - position, - opacity, - rotation: 0.0, - pages: PageSelection::All, - scale: ScaleMode::FitPage, - }; - apply_watermark(pdf, &opts) -} - -/// Convenience: apply text stamp. -pub fn apply_text_stamp( - pdf: &[u8], - text: &str, - position: Position, - opacity: f32, -) -> EngineResult<Vec<u8>> { - let opts = WatermarkOptions { - overlay_type: OverlayType::Stamp, - content: OverlayContent::Text { - text: text.to_string(), - font: FontSpec { - family: "Helvetica".into(), - size: 48.0, - color: (128, 128, 128), - style: FontStyle::default(), - }, - }, - position, - opacity, - rotation: 0.0, - pages: PageSelection::All, - scale: ScaleMode::Original, - }; - apply_watermark(pdf, &opts) -} -``` - -## Implementation Strategy - -### Using `lopdf` + `image` - -1. **Load PDF** with `lopdf::Document::load_mem()`. -2. **Load image** with `image` crate, convert to PDF XObject. -3. **For each target page**: - - Get page content stream - - Create overlay XObject (Form XObject containing image or text) - - Insert into page resources - - Modify content stream to draw overlay: - - Watermark: Add before existing content (gsave/q/qx/q.../grestore) - - Stamp: Add after existing content -4. **Save modified PDF**. - -### Text Rendering - -For text watermarks: -- Use built-in PDF fonts (Helvetica, Times, Courier) for simplicity -- Or embed TrueType font subset -- Create text object with: - - BT (Begin Text) - - Tf (Set Font) - - Td (Move Text Position) - - Tj (Show Text) - - ET (End Text) - -## Server API - -### Watermark Endpoint - -``` -POST /forms/pdfengines/watermark -``` - -Form fields: -- `files` - Single PDF file -- `watermark` - Image file (PNG/JPEG) or text string -- `mode` - `"watermark"` (behind) or `"stamp"` (front) -- `position` - `"center"`, `"top-left"`, etc. -- `opacity` - 0.0 to 1.0 -- `rotation` - Degrees (optional) -- `pages` - Page range (optional, default "all") - -Response: -- PDF with watermark applied -- `Content-Disposition: attachment; filename="result.pdf"` - -### Stamp Endpoint - -``` -POST /forms/pdfengines/stamp -``` - -Same as watermark, defaults to mode="stamp". - -## Error Handling - -| Error | Condition | -|-------|-----------| -| `EngineError::InvalidInput` | Invalid image format | -| `EngineError::InvalidPage` | Page range out of bounds | -| `EngineError::FontNotFound` | Requested font unavailable | - -## Testing - -Unit tests: -- Image watermark on single page -- Text stamp on all pages -- Opacity verification (PDF structure) -- Position accuracy -- Page range selection - -Integration tests: -- Gotenberg feature parity -- Visual verification (manual or screenshot) -- File size not exploded - -## Dependencies - -```toml -[dependencies] -# Image processing -image = { version = "0.25", default-features = false, features = ["png", "jpeg"] } -# PDF manipulation (already have lopdf) -``` - -## References - -- PDF Spec ISO 32000-2: Section 8.10 (External Objects), 9 (Text) -- Gotenberg docs: https://gotenberg.dev/docs/routes#watermark diff --git a/docs/specs/18-screenshot.md b/docs/specs/18-screenshot.md deleted file mode 100644 index d0bfb76..0000000 --- a/docs/specs/18-screenshot.md +++ /dev/null @@ -1,234 +0,0 @@ -# Spec 18 β€” Chromium Screenshot API - -> Capture web page screenshots as PNG or JPEG images. -> Alternative to PDF generation for image output. - -## Goal - -Provide screenshot capabilities using Chromium to capture web pages as -PNG or JPEG images. Mirrors Gotenberg's screenshot endpoints while -integrating with our existing Chromium infrastructure. - -## Scope - -**In:** - -- Screenshot from HTML string or URL. -- PNG and JPEG output formats. -- Full page or viewport-only capture. -- Window/clipping size configuration. -- Wait conditions (load, networkidle). -- Custom headers, cookies, authentication. - -**Out:** - -- PDF screenshots (use convert endpoints). -- Video recording. -- Mobile device emulation (follow-up). -- Element-level screenshots (single element only). - -## Public API - -Module path: `engine::chromium::screenshot`. Extends existing ChromiumEngine. - -```rust -use crate::types::{EngineError, EngineResult, BrowserConfig}; - -/// Screenshot format. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ScreenshotFormat { - Png, - Jpeg { quality: u8 }, // 0-100 -} - -/// Screenshot capture mode. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CaptureMode { - /// Capture visible viewport only. - Viewport, - /// Capture full page (scroll and stitch). - FullPage, -} - -/// Screenshot options. -#[derive(Debug, Clone)] -pub struct ScreenshotOptions { - /// Output format. - pub format: ScreenshotFormat, - /// Capture mode. - pub mode: CaptureMode, - /// Viewport width in pixels. - pub width: u32, - /// Viewport height in pixels. - pub height: u32, - /// Device scale factor (1.0 = standard, 2.0 = retina). - pub device_scale_factor: f32, - /// Wait condition before capture. - pub wait_condition: WaitCondition, - /// Custom HTTP headers. - pub extra_headers: HashMap<String, String>, - /// Cookies to set. - pub cookies: Vec<Cookie>, - /// Background CSS (e.g., "white" for opaque). - pub background_color: Option<String>, -} - -impl Default for ScreenshotOptions { - fn default() -> Self { - Self { - format: ScreenshotFormat::Png, - mode: CaptureMode::Viewport, - width: 1920, - height: 1080, - device_scale_factor: 1.0, - wait_condition: WaitCondition::Load, - extra_headers: HashMap::new(), - cookies: Vec::new(), - background_color: None, - } - } -} - -/// Screenshot from HTML string. -pub async fn screenshot_html( - engine: &ChromiumEngine, - html: &str, - opts: &ScreenshotOptions, -) -> EngineResult<Vec<u8>>; - -/// Screenshot from URL. -pub async fn screenshot_url( - engine: &ChromiumEngine, - url: &str, - opts: &ScreenshotOptions, -) -> EngineResult<Vec<u8>>; - -/// Screenshot from Markdown. -pub async fn screenshot_markdown( - engine: &ChromiumEngine, - markdown: &str, - opts: &ScreenshotOptions, -) -> EngineResult<Vec<u8>> { - let html = render_markdown_to_html(markdown); - screenshot_html(engine, &html, opts).await -} -``` - -## Implementation Strategy - -### Using `chromiumoxide` - -The `chromiumoxide` crate provides CDP (Chrome DevTools Protocol) access. -Screenshot capture uses the `Page.captureScreenshot` CDP command. - -For full page screenshots: -1. Get full page dimensions via `Page.getLayoutMetrics()` -2. Set viewport to full page size -3. Capture screenshot -4. Restore viewport - -For viewport screenshots: -1. Set requested viewport size -2. Navigate and wait -3. Capture screenshot - -### CDP Commands - -```rust -// Set viewport -Page::set_viewport( - width, height, device_scale_factor, mobile, fit_window -).await?; - -// Navigate and wait -Page::goto(url).await?; -Page::wait_for(selector_or_condition).await?; - -// Capture -let screenshot = Page::capture_screenshot( - format, // "png" or "jpeg" - quality, // for jpeg - clip, // optional viewport clipping - from_surface, // true -).await?; -``` - -## Server API - -### Endpoints - -``` -POST /forms/chromium/screenshot/html -POST /forms/chromium/screenshot/url -POST /forms/chromium/screenshot/markdown -``` - -### Form Fields - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `files` | file | - | HTML/Markdown file (for file endpoints) | -| `url` | string | - | URL to capture | -| `format` | string | "png" | "png" or "jpeg" | -| `quality` | int | 80 | JPEG quality 0-100 | -| `width` | int | 1920 | Viewport width | -| `height` | int | 1080 | Viewport height | -| `fullPage` | bool | false | Capture full scrollable page | -| `scale` | float | 1.0 | Device scale factor | -| `waitFor` | string | "load" | "load", "networkidle", "domcontentloaded" | -| `backgroundColor` | string | - | CSS color for background | - -### Headers - -Same as convert endpoints: -- `Gotenberg-Trace` -- `Gotenberg-Output-Filename` -- Custom headers via `Gotenberg-*` forwarded to page - -### Response - -```http -HTTP/1.1 200 OK -Content-Type: image/png (or image/jpeg) -Content-Disposition: attachment; filename="screenshot.png" - -<binary image data> -``` - -## Error Handling - -| Error | Condition | -|-------|-----------| -| `EngineError::ChromeLaunch` | Browser connection failed | -| `EngineError::NavigationFailed` | URL unreachable | -| `EngineError::Timeout` | Wait condition not met | -| `EngineError::ScreenshotFailed` | CDP screenshot error | - -## Testing - -Unit tests: -- Screenshot HTML with various viewport sizes -- Full page vs viewport capture -- PNG and JPEG output -- Wait conditions - -Integration tests: -- Gotenberg feature parity: `chromium_screenshot_*.feature` -- Image dimensions verification -- File format validation - -## Dependencies - -Uses existing `chromiumoxide` dependency. - -## References - -- Chrome DevTools Protocol: https://chromedevtools.github.io/devtools-protocol/ -- Page.captureScreenshot: https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-captureScreenshot -- Gotenberg docs: https://gotenberg.dev/docs/routes#screenshots - -## Notes - -- Screenshots are handled separately from PDF conversion but share the same Chromium pool -- Consider rate limiting for screenshot endpoints (expensive operation) -- Full page screenshots can be memory-intensive for very long pages diff --git a/docs/specs/19-encrypt.md b/docs/specs/19-encrypt.md deleted file mode 100644 index 5f36511..0000000 --- a/docs/specs/19-encrypt.md +++ /dev/null @@ -1,223 +0,0 @@ -# Spec 19 β€” PDF Encryption - -> Password protection and permission control for PDF documents. -> Uses qpdf for reliable encryption without lopdf complexity. - -## Goal - -Provide PDF password protection with user/owner passwords and -granular permission controls. Uses shell-out to qpdf for -production-ready encryption. - -## Scope - -**In:** - -- User password (required to open document). -- Owner password (required to change permissions). -- Permission flags (print, modify, copy, annotate). -- 128-bit and 256-bit AES encryption. -- Remove encryption (with owner password). - -**Out:** - -- Certificate-based encryption (PKI). -- Digital signatures. -- Custom security handlers. - -## Public API - -Module path: `engine::encrypt`. Stateless free functions. - -```rust -use crate::types::{EngineError, EngineResult}; - -/// Encryption algorithm. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum EncryptionAlgorithm { - /// 128-bit AES (RC4 deprecated). - Aes128, - /// 256-bit AES (recommended). - Aes256, -} - -/// Permission flags for encrypted PDF. -#[derive(Debug, Clone, Copy, Default)] -pub struct Permissions { - /// Allow printing (low-res). - pub print: bool, - /// Allow high-quality printing. - pub print_high_quality: bool, - /// Allow content modification. - pub modify_content: bool, - /// Allow annotation and form filling. - pub annotate: bool, - /// Allow form filling (if false, only existing fields). - pub fill_forms: bool, - /// Allow content extraction (copy/paste). - pub extract_content: bool, - /// Allow document assembly (merge, insert pages). - pub assemble: bool, -} - -impl Permissions { - /// Default permissions: all allowed. - pub fn allow_all() -> Self { - Self { - print: true, - print_high_quality: true, - modify_content: true, - annotate: true, - fill_forms: true, - extract_content: true, - assemble: true, - } - } - - /// Restrictive permissions: view only. - pub fn view_only() -> Self { - Self { - print: false, - print_high_quality: false, - modify_content: false, - annotate: false, - fill_forms: false, - extract_content: false, - assemble: false, - } - } -} - -/// Encrypt PDF with password protection. -/// -/// At least one of `user_password` or `owner_password` must be provided. -/// If `owner_password` is None, it's set same as user_password. -pub async fn encrypt_pdf( - pdf: &[u8], - user_password: Option<&str>, - owner_password: Option<&str>, - algorithm: EncryptionAlgorithm, - permissions: Permissions, -) -> EngineResult<Vec<u8>>; - -/// Remove encryption from PDF. -/// -/// Requires owner password (or user password if no owner set). -pub async fn decrypt_pdf( - pdf: &[u8], - password: &str, -) -> EngineResult<Vec<u8>>; - -/// Check if PDF is encrypted. -pub fn is_encrypted(pdf: &[u8]) -> EngineResult<bool>; -``` - -## Implementation Strategy - -### Using `qpdf` - -qpdf has excellent encryption support: - -```bash -# Encrypt with user password -qpdf --encrypt userpass ownerpass 256 -- input.pdf output.pdf - -# Encrypt with permissions -qpdf --encrypt userpass ownerpass 256 \ - --print=none --modify=none --extract=n \ - input.pdf output.pdf - -# Decrypt -qpdf --password=ownerpass --decrypt input.pdf output.pdf -``` - -### Permission Mapping - -| Permission | qpdf flag | PDF spec | -|------------|-----------|----------| -| Print low-res | `--print=low` | bit 3 | -| Print high-res | `--print=full` | bit 3 + 12 | -| Modify content | `--modify=annotate` | bit 4 | -| Annotate | `--modify=annotate` | bit 6 | -| Fill forms | `--modify=form` | bit 9 | -| Extract | `--extract=y` | bit 5 | -| Assemble | `--assemble=y` | bit 11 | - -## Server API - -### Encrypt Endpoint - -``` -POST /forms/pdfengines/encrypt -``` - -Form fields: -- `files` - Single PDF file -- `userPassword` - Password required to open (optional) -- `ownerPassword` - Password to change permissions (optional) -- `algorithm` - "aes128" or "aes256" (default: aes256) -- `permissions` - Comma-separated list: - - `print`, `print-hq`, `modify`, `annotate`, `fill-forms`, `extract`, `assemble` - - Or `all` (default), `none`, `view-only` - -Response: -- Encrypted PDF -- `Content-Disposition: attachment; filename="result.pdf"` - -### Decrypt Endpoint - -``` -POST /forms/pdfengines/decrypt -``` - -Form fields: -- `files` - Encrypted PDF -- `password` - User or owner password - -Response: -- Decrypted PDF - -## Error Handling - -| Error | Condition | -|-------|-----------| -| `EngineError::InvalidInput` | No password provided | -| `EngineError::EncryptionFailed` | qpdf error | -| `EngineError::DecryptionFailed` | Wrong password | -| `EngineError::NotEncrypted` | Decrypt called on unencrypted PDF | - -## Testing - -Unit tests: -- Encrypt with user password, decrypt succeeds -- Encrypt with owner password only -- Permission verification (attempt restricted action) -- Wrong password rejection - -Integration tests: -- Gotenberg feature parity -- PDF/A compliance after encryption (should be preserved) - -## Dependencies - -Runtime: `qpdf` binary (already in Docker image) - -```toml -[dependencies] -# Shell execution -tokio = { workspace = true } -tempfile = { workspace = true } -``` - -## Security Notes - -1. **Passwords transmitted in form data** - Use HTTPS in production -2. **qpdf binary must be available** - Check at startup -3. **Temporary files** - Cleaned up after operation -4. **Memory safety** - Passwords not logged - -## References - -- qpdf encryption docs: https://qpdf.readthedocs.io/en/stable/encryption.html -- PDF 2.0 spec ISO 32000-2: Section 7.6 (Encryption) -- Gotenberg docs: https://gotenberg.dev/docs/routes#pdf-engines diff --git a/docs/specs/20-bdd-testing.md b/docs/specs/20-bdd-testing.md deleted file mode 100644 index b1dc10a..0000000 --- a/docs/specs/20-bdd-testing.md +++ /dev/null @@ -1,600 +0,0 @@ -# Spec 20 β€” BDD Testing with Cucumber (Detailed Implementation Guide) - -> Port Gotenberg's Gherkin integration tests to Folio. -> Step-by-step implementation for replicating Gotenberg's test infrastructure. - -## Overview - -This spec provides detailed instructions for porting Gotenberg's integration -tests from Go (Godog + testcontainers-go) to Rust (cucumber-rs + testcontainers-rs). - -## Gotenberg's Test Structure (Source) - -``` -gotenberg/test/integration/ -β”œβ”€β”€ features/ # 26 .feature files (Gherkin) -β”‚ β”œβ”€β”€ health.feature -β”‚ β”œβ”€β”€ pdfengines_merge.feature -β”‚ └── ... -β”œβ”€β”€ scenario/ -β”‚ β”œβ”€β”€ scenario.go # Step definitions (Go) -β”‚ β”œβ”€β”€ containers.go # Docker container helpers -β”‚ └── main_test.go # Test runner setup -└── testdata/ # PDF fixtures -``` - -## Folio Target Structure - -``` -crates/server/tests/bdd/ -β”œβ”€β”€ features/ # Copied & adapted from Gotenberg -β”‚ β”œβ”€β”€ health.feature -β”‚ β”œβ”€β”€ pdfengines_merge.feature -β”‚ └── ... (26 files) -β”œβ”€β”€ steps/ -β”‚ β”œβ”€β”€ mod.rs # Step registration -β”‚ β”œβ”€β”€ container.rs # testcontainers-rs wrapper -β”‚ β”œβ”€β”€ http.rs # HTTP client steps -β”‚ β”œβ”€β”€ pdf.rs # PDF assertions -β”‚ └── gotenberg_compat.rs # Go-to-Rust step mappings -β”œβ”€β”€ support/ -β”‚ β”œβ”€β”€ world.rs # Cucumber World struct -β”‚ └── hooks.rs # Before/After hooks -β”œβ”€β”€ testdata/ # Copied from Gotenberg -β”‚ β”œβ”€β”€ page_1.pdf -β”‚ β”œβ”€β”€ page_2.pdf -β”‚ └── ... -└── main.rs # Test runner entry point -``` - -## Step 1: Dependencies (Cargo.toml) - -Add to `crates/server/Cargo.toml`: - -```toml -[dev-dependencies] -# BDD framework -cucumber = "0.21" - -# Docker testcontainers -testcontainers = "0.22" -testcontainers-modules = { version = "0.11", features = ["blocking"] } - -# HTTP client for tests -reqwest = { version = "0.12", features = ["multipart", "json"] } - -# PDF validation -lopdf = { workspace = true } -pdf-extract = "0.8" - -# Async runtime for tests -tokio = { workspace = true } - -# Temporary files -tempfile = { workspace = true } -``` - -## Step 2: Create Directory Structure - -```bash -mkdir -p crates/server/tests/bdd/{features,steps,support,testdata} -touch crates/server/tests/bdd/main.rs -touch crates/server/tests/bdd/steps/{mod.rs,container.rs,http.rs,pdf.rs} -touch crates/server/tests/bdd/support/{world.rs,hooks.rs} -``` - -## Step 3: Copy Gotenberg Test Data - -```bash -cp gotenberg/test/integration/testdata/*.pdf \ - crates/server/tests/bdd/testdata/ -``` - -## Step 4: Port Feature Files - -Copy and adapt each `.feature` file. Example adaptation: - -**Gotenberg (original):** -```gherkin -Given I have a Gotenberg container with the following environment variable(s): - | API_DISABLE_HEALTH_CHECK_ROUTE_TELEMETRY | false | -``` - -**Folio (adapted):** -```gherkin -Given I have a Folio container with the following environment variable(s): - | RUST_LOG | info | -``` - -## Step 5: Implement World (support/world.rs) - -The World holds test state across steps: - -```rust -use cucumber::World; -use reqwest::Client; -use std::collections::HashMap; -use testcontainers::Container; - -#[derive(Debug, World)] -pub struct FolioWorld { - // HTTP client for requests - pub client: Client, - - // Active container (if any) - pub container: Option<Container<GenericImage>>, - - // Last HTTP response - pub response: Option<reqwest::Response>, - - // Response body bytes - pub body: Option<Vec<u8>>, - - // Temporary directory for test files - pub temp_dir: tempfile::TempDir, - - // Container base URL - pub base_url: Option<String>, -} - -impl Default for FolioWorld { - fn default() -> Self { - Self { - client: Client::new(), - container: None, - response: None, - body: None, - temp_dir: tempfile::tempdir().unwrap(), - base_url: None, - } - } -} - -impl FolioWorld { - /// Start Folio container with environment variables - pub async fn start_container(&mut self, env: HashMap<String, String>) { - use testcontainers::{GenericImage, WaitFor}; - - let image = GenericImage::new("deesh2025/no-name", "latest") - .with_wait_for(WaitFor::message_on_stdout("Listening on")); - - // Add environment variables - for (key, value) in env { - let _ = image.with_env_var(key, value); - } - - let container = image.start().await.unwrap(); - let port = container.get_host_port_ipv4(3000).await.unwrap(); - - self.base_url = Some(format!("http://localhost:{}", port)); - self.container = Some(container); - } -} -``` - -## Step 6: Implement Steps (steps/mod.rs) - -Register all step definitions: - -```rust -use cucumber::Steps; -use crate::support::world::FolioWorld; - -mod container; -mod http; -mod pdf; - -pub fn steps() -> Steps<FolioWorld> { - let mut steps = Steps::new(); - - // Container steps - steps.given( - "I have a default Folio container", - container::default_container, - ); - steps.given( - "I have a Folio container with the following environment variable(s)", - container::container_with_env, - ); - - // HTTP steps - steps.when( - regex r#"I make a "(GET|POST)" request to "(.+)""#, - http::make_request, - ); - steps.then( - "the response status code should be {int}", - http::check_status_code, - ); - - // PDF steps - steps.then( - "there should be {int} PDF(s) in the response", - pdf::check_pdf_count, - ); - steps.then( - "the PDF should have {int} page(s)", - pdf::check_page_count, - ); - - steps -} -``` - -## Step 7: Container Steps (steps/container.rs) - -Map Gotenberg's container steps to Rust: - -| Gotenberg (Go) | Folio (Rust) | -|----------------|--------------| -| `iHaveADefaultGotenbergContainer` | `default_container` | -| `iHaveAGotenbergContainerWithEnv` | `container_with_env` | -| `startGotenbergContainer` | `testcontainers::GenericImage` | - -```rust -use std::collections::HashMap; -use cucumber::gherkin::Table; -use crate::support::world::FolioWorld; - -pub async fn default_container(world: &mut FolioWorld) { - world.start_container(HashMap::new()).await; -} - -pub async fn container_with_env(world: &mut FolioWorld, table: &Table) { - let mut env = HashMap::new(); - for row in table.rows.iter().skip(1) { // Skip header - let key = row.cells[0].value.clone(); - let value = row.cells[1].value.clone(); - env.insert(key, value); - } - world.start_container(env).await; -} -``` - -## Step 8: HTTP Steps (steps/http.rs) - -| Gotenberg (Go) | Folio (Rust) | -|----------------|--------------| -| `doRequest` | `reqwest::Client` | -| `s.resp` | `world.response` | -| `s.resp.Code` | `world.response.status().as_u16()` | - -```rust -use cucumber::gherkin::Table; -use crate::support::world::FolioWorld; - -pub async fn make_request( - world: &mut FolioWorld, - method: String, - endpoint: String, -) { - let url = format!("{}{}", world.base_url.as_ref().unwrap(), endpoint); - - let response = match method.as_str() { - "GET" => world.client.get(&url).send().await.unwrap(), - "POST" => world.client.post(&url).send().await.unwrap(), - _ => panic!("Unsupported method: {}", method), - }; - - world.response = Some(response); -} - -pub async fn check_status_code(world: &mut FolioWorld, expected: u16) { - let actual = world.response.as_ref().unwrap().status().as_u16(); - assert_eq!(actual, expected, "Status code mismatch"); -} -``` - -## Step 9: PDF Steps (steps/pdf.rs) - -| Gotenberg (Go) | Folio (Rust) | -|----------------|--------------| -| `assertPDFPageCount` | `lopdf::Document::get_pages()` | -| `assertPDFContent` | `pdf_extract::extract_text()` | - -```rust -use lopdf::Document; -use crate::support::world::FolioWorld; - -pub async fn check_pdf_count(world: &mut FolioWorld, expected: usize) { - // Implementation to count PDFs in multipart response -} - -pub async fn check_page_count(world: &mut FolioWorld, expected: usize) { - let body = world.body.as_ref().unwrap(); - let doc = Document::load_mem(body).unwrap(); - let actual = doc.get_pages().len(); - assert_eq!(actual, expected, "Page count mismatch"); -} -``` - -## Step 10: Test Runner (main.rs) - -```rust -use cucumber::Cucumber; -use std::path::PathBuf; - -mod support; -mod steps; - -use support::world::FolioWorld; -use steps::steps; - -#[tokio::main] -async fn main() { - let runner = Cucumber::<FolioWorld>::new() - .features(&[PathBuf::from("tests/bdd/features")]) - .steps(steps()) - .run_and_exit() - .await; -} -``` - -## Step 11: Run Tests - -```bash -# Build Docker image first -docker build -t deesh2025/no-name:latest . - -# Run all BDD tests -cargo test --test bdd - -# Run specific feature -cargo test --test bdd -- health - -# With debug output -cargo test --test bdd -- --nocapture - -# Generate HTML report -cargo test --test bdd -- --format html --output reports/ -``` - -## Mapping: Gotenberg Steps β†’ Rust Steps - -Complete mapping of all 26 feature file step patterns: - -| Pattern | Go Function | Rust Function | Status | -|---------|-------------|---------------|--------| -| `I have a default Gotenberg container` | `iHaveADefaultGotenbergContainer` | `default_container` | ⬜ | -| `I have a Gotenberg container with env` | `iHaveAGotenbergContainerWithEnv` | `container_with_env` | ⬜ | -| `I make a "X" request to "Y"` | `iMakeARequestToGotenberg` | `make_request` | ⬜ | -| `the response status code should be X` | `theResponseStatusCodeShouldBe` | `check_status_code` | ⬜ | -| `the response header "X" should be "Y"` | `theResponseHeaderShouldBe` | `check_header` | ⬜ | -| `the response body should match JSON` | `theResponseBodyShouldMatchJSON` | `check_json_body` | ⬜ | -| `there should be X PDF(s) in the response` | `thereShouldBeXPDFs` | `check_pdf_count` | ⬜ | -| `the PDF should have X page(s)` | `thePDFShouldHaveXPages` | `check_page_count` | ⬜ | -| `the PDF content at page X should be` | `thePDFContentAtPageShouldBe` | `check_page_content` | ⬜ | -| `the container should log` | `theContainerShouldLog` | `check_logs` | ⬜ | - -## Feature Porting Priority - -Port features in this order: - -1. **Phase 1: Core (Week 1)** - - `health.feature` (simplest) - - `version.feature` - - `root.feature` - -2. **Phase 2: PDF Engines (Week 2)** - - `pdfengines_merge.feature` - - `pdfengines_split.feature` - - `pdfengines_flatten.feature` - - `pdfengines_rotate.feature` - -3. **Phase 3: Chromium (Week 3)** - - `chromium_convert_html.feature` - - `chromium_convert_url.feature` - - `chromium_screenshot_*.feature` - -4. **Phase 4: Advanced (Week 4)** - - `pdfengines_bookmarks.feature` - - `pdfengines_convert.feature` - - `webhook.feature` - - `pdfengines_encrypt.feature` - -## CI/CD Integration - -```yaml -# .github/workflows/bdd.yml -name: BDD Tests -on: [push, pull_request] -jobs: - bdd: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Build Docker image - run: docker build -t deesh2025/no-name:latest . - - - name: Install Chromium - run: sudo apt-get install -y chromium-browser - - - name: Run BDD tests - run: cargo test --test bdd -- --format junit > bdd-results.xml - - - name: Upload results - uses: actions/upload-artifact@v4 - with: - name: bdd-results - path: bdd-results.xml -``` - -## Debugging Tips - -1. **Container not starting**: Check Docker daemon, image tag -2. **Connection refused**: Wait for container healthy state -3. **PDF assertions failing**: Verify lopdf can parse the PDF -4. **Step not found**: Check regex pattern matches exactly - -## References - -- Gotenberg features: `gotenberg/test/integration/features/` -- Gotenberg steps: `gotenberg/test/integration/scenario/scenario.go` -- cucumber-rs docs: https://cucumber-rs.github.io/ -- testcontainers-rs: https://docs.rs/testcontainers/ - - -### Dependencies - -```toml -[dev-dependencies] -cucumber = "0.21" -testcontainers = "0.22" -reqwest = { workspace = true } -serde_json = { workspace = true } -tempfile = { workspace = true } -``` - -## Implementation Phases - -### Phase 1: Infrastructure (Week 1) - -- [ ] Add cucumber and testcontainers dependencies -- [ ] Create test directory structure -- [ ] Implement World struct with HTTP client and temp directory -- [ ] Implement container lifecycle hooks -- [ ] Create basic step definitions (Given/When/Then) - -### Phase 2: Core Feature Tests (Week 2) - -- [ ] Port `health.feature` tests -- [ ] Port `version.feature` tests -- [ ] Port `pdfengines_merge.feature` tests -- [ ] Port `pdfengines_split.feature` tests - -### Phase 3: Chromium Tests (Week 3) - -- [ ] Port `chromium_convert_html.feature` -- [ ] Port `chromium_convert_url.feature` -- [ ] Port `chromium_screenshot_*.feature` tests - -### Phase 4: Advanced Features (Week 4) - -- [ ] Port PDF/A conversion tests -- [ ] Port bookmark tests -- [ ] Port webhook tests (mock server) - -## Key Components - -### World Implementation - -```rust -pub struct World { - /// HTTP client for requests - client: reqwest::Client, - /// Folio container handle - container: Option<FolioContainer>, - /// Last HTTP response - response: Option<reqwest::Response>, - /// Response body bytes - body: Option<Vec<u8>>, - /// Temporary directory for test files - temp_dir: tempfile::TempDir, - /// Test data directory - testdata_dir: PathBuf, -} -``` - -### Step Definitions - -Common steps to implement: - -```rust -#[given("I have a default Folio container")] -async fn default_container(world: &mut World) { - world.start_container().await; -} - -#[when(regex = r#"I make a "(GET|POST)" request to "(.+)""#)] -async fn make_request(world: &mut World, method: String, path: String) { - world.request(&method, &path).await; -} - -#[then("the response status code should be {int}")] -async fn check_status(world: &mut World, expected: u16) { - let actual = world.response.as_ref().unwrap().status().as_u16(); - assert_eq!(actual, expected); -} -``` - -### Container Management - -Using testcontainers-rs: - -```rust -pub struct FolioContainer { - image: GenericImage, - container: Container<GenericImage>, - port: u16, -} - -impl FolioContainer { - pub async fn start() -> Result<Self, TestcontainersError> { - let image = GenericImage::new("deesh2025/no-name", "latest") - .with_wait_for(WaitFor::message_on_stdout("Listening on")); - - let container = image.start().await?; - let port = container.get_host_port_ipv4(3000).await?; - - Ok(Self { image, container, port }) - } - - pub fn base_url(&self) -> String { - format!("http://localhost:{}", self.port) - } -} -``` - -### PDF Assertions - -```rust -pub fn assert_pdf_page_count(pdf_bytes: &[u8], expected: u32) { - let doc = lopdf::Document::load_mem(pdf_bytes).unwrap(); - let pages = doc.get_pages().len() as u32; - assert_eq!(pages, expected, "PDF page count mismatch"); -} - -pub fn assert_pdf_contains_text(pdf_bytes: &[u8], text: &str) { - // Use pdf-extract or similar -} -``` - -## Running Tests - -```bash -# Run all BDD tests -cargo test --test bdd - -# Run specific feature -cargo test --test bdd -- health - -# With output for debugging -cargo test --test bdd -- --nocapture - -# Generate HTML report -cargo test --test bdd -- --format html --output reports/ -``` - -## CI Integration - -```yaml -# .github/workflows/bdd.yml -name: BDD Tests -on: [push, pull_request] -jobs: - bdd: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Build Docker image - run: docker build -t folio:test . - - name: Run BDD tests - run: cargo test --test bdd -``` - -## References - -- cucumber-rs docs: https://cucumber-rs.github.io/ -- testcontainers-rs: https://docs.rs/testcontainers/latest/testcontainers/ -- Gotenberg features: https://github.com/gotenberg/gotenberg/tree/main/test/integration/features diff --git a/docs/specs/20-cli.md b/docs/specs/20-cli.md deleted file mode 100644 index 2fb0cb1..0000000 --- a/docs/specs/20-cli.md +++ /dev/null @@ -1,362 +0,0 @@ -# Spec 20 β€” `cli` (`folio` binary) - -> User-facing command line for one-off conversions and PDF post-processing, -> built on `clap` derive and the `engine` crate. - -## Goal - -Provide a `folio` binary that exercises the engine for HTML / URL / -Markdown / Office conversions and basic PDF ops (merge / split), matching -the README usage in -`@/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/README.md:69-83`, -without needing the HTTP server. - -## Scope - -**In:** - -- `convert` β€” single-file or single-input-source conversion. -- `batch` β€” directory walker that converts many files in parallel. -- `merge`, `split`, `flatten`, `metadata` β€” direct wrappers over spec 13. -- Shell-completion generation. -- Stdin/stdout streaming for pipelines. -- Structured logging behind `RUST_LOG`. - -**Out:** - -- `serve` subcommand. Users invoke `folio-server` directly. (CLI may - later gain a thin `serve` shim, but not in the MVP.) -- Watermark / rotate / encrypt β€” exposed via the server first; CLI - follow-up once the server fronting them is solid. - -## Public surface - -``` -folio <COMMAND> - -Global options (apply to every command): - -v, --verbose Increase log verbosity (-v info, -vv debug, -vvv trace) - -q, --quiet Suppress log output (overrides -v) - --log-format <FORMAT> - text | json. Default: text on a TTY, json otherwise. - --chrome <PATH> Override Chrome executable (BrowserConfig::executable) - --no-sandbox Pass --no-sandbox to Chrome (default true on Linux) - --sandbox Force sandbox on (overrides --no-sandbox / Linux default) - --timeout <DUR> Per-render timeout, e.g. "60s", "2m". Default 60s. - -h, --help - -V, --version -``` - -### `folio convert` - -Exactly one of `--html`, `--url`, `--markdown`, `--office`, `--stdin`. -Exactly one `--output` (path or `-` for stdout). - -``` -folio convert - (--html <FILE> | --url <URL> | --markdown <FILE> | --office <FILE> - | --stdin --as <html|markdown>) - --output <FILE> FILE or '-' for stdout. Required. - - PdfOptions (apply to html/url/markdown; ignored for office): - --paper <SIZE> a4 | letter | legal | a3 | a5 | "WxH" - --landscape - --margin <SPEC> inches (e.g. "0.5") or "TOP,RIGHT,BOTTOM,LEFT" - default 0.39in (~1cm) - --scale <FLOAT> 0.1..=2.0 - --no-print-background - --emulate <print|screen> - --pages <RANGES> e.g. "1-3,5,7-" - --header-template <FILE> Path to HTML file - --footer-template <FILE> - --prefer-css-page-size - --wait <SPEC> load | domcontentloaded | networkidle - | selector:CSS | expr:JS | delay:DUR - - RequestContext (html/url/markdown): - --user-agent <STR> - --header "Name: Value" Repeatable - --cookie "name=value;..." Repeatable; ;-separated attrs - --fail-on-status <SPEC> Repeatable. e.g. "500", "5xx", "400-404" - --base-url <URL> For --html / --markdown / --stdin; ignored otherwise - - Office-only: - --pdf-a <a1b|a2b|a3b> - --pdf-ua - --quality <1..=100> - --max-image-resolution <DPI> -``` - -### `folio batch` - -``` -folio batch - --input-dir <DIR> Required. Walked recursively. - --output-dir <DIR> Required. Mirrors input directory tree. - --pattern <GLOB> Default: "**/*.{html,htm,md,markdown}" - --concurrency <N> Default: number of CPUs - --on-error <stop|skip> Default: skip - --dry-run Print planned conversions, do nothing - - + every PdfOptions / RequestContext flag from `convert` - -Each input is converted individually, with extension switched to .pdf -in the output tree. Office files are accepted iff `--pattern` includes -them; choose `--pattern "**/*.{docx,xlsx,pptx}"` etc. -``` - -### `folio merge` - -``` -folio merge --output <FILE> <INPUT>... - -INPUT may be a path or '-' (read PDF bytes from stdin). Order is preserved. -``` - -### `folio split` - -``` -folio split <INPUT> - --output-dir <DIR> Required. - --prefix <STR> Default: input basename without extension. - --mode <SPEC> ranges:1-3,5,7- | every-n:5 | one-per-page - Default: one-per-page - -Outputs: <prefix>-<NNN>.pdf, zero-padded. e.g. report-001.pdf. -``` - -### `folio flatten` - -``` -folio flatten <INPUT> --output <FILE> -INPUT or FILE may be '-' for stdio. -``` - -### `folio metadata` - -``` -folio metadata read <INPUT> # JSON to stdout -folio metadata write <INPUT> --output <FILE> [--from-json <FILE> | --set KEY=VALUE]... -``` - -`--set` repeatable. Special keys: `Title`, `Author`, `Subject`, -`Keywords`, `Creator`, `Producer`, `CreationDate`, `ModDate`. Anything -else lands in `Metadata::custom`. Empty value (`--set Title=`) deletes. - -### `folio completions <SHELL>` - -Emits completion script to stdout. SHELL ∈ `bash | zsh | fish | powershell`. - -## Behavior - -### Process model - -- One `tokio::runtime::Builder::new_multi_thread().enable_all().build()` - built in `main`. -- All commands are short-lived; the runtime is dropped at exit. -- Logging configured with `tracing_subscriber::fmt()` with the chosen - format. `--quiet` sets the level filter to `off`. `-v` to `info`, - `-vv` to `debug`, `-vvv` to `trace`. `RUST_LOG`, when set, takes - precedence (parsed by `tracing_subscriber::EnvFilter`). - -### Engine reuse - -- `convert`: launches one `ChromiumEngine` (or `LibreOfficeEngine`), - performs one render, calls `shutdown` on success path, returns. -- `batch`: launches one engine, gates renders with - `tokio::sync::Semaphore::new(concurrency)`, fans out via - `tokio::task::JoinSet`, calls `shutdown` once all are joined. -- `merge`, `split`, `flatten`, `metadata`: no engine launch β€” pdfops are - pure functions on byte buffers. - -### Stdin / stdout - -- `--stdin` reads raw bytes from `tokio::io::stdin` until EOF. - `--as html` (default) treats them as a single HTML document; `--as markdown` - feeds them to `markdown_to_pdf`. -- `--output -` writes PDF bytes to `stdout` *unbuffered* and disables - any other stdout output (including the success log line) β€” so - callers can pipe directly. -- `merge` accepts `-` as an input meaning "next chunk of bytes from - stdin". Multiple `-`s are not allowed; stdin can only be consumed once. - -### Option parsing helpers - -- `--paper`: `a4`/`letter`/`legal`/`a3`/`a5` map to `PaperSize` constants. - `WxH` parsed as two `f32`s separated by `x` (case-insensitive); both - values are inches. -- `--margin`: a single value sets all four; `T,R,B,L` sets each in turn. - Unit is inches. Examples: `--margin 0.5`, `--margin 1,0.5,1,0.5`. -- `--wait`: - - `load` / `domcontentloaded` / `networkidle` map to the matching - `WaitCondition` variant. - - `selector:<CSS>` β†’ `WaitCondition::Selector { selector }`. - - `expr:<JS>` β†’ `WaitCondition::Expression { expression }`. - - `delay:<DUR>` β†’ `WaitCondition::Delay { duration: parse_dur }`. -- `--cookie`: `name=value` followed by `;`-separated attributes - `Domain=`, `Path=`, `Secure`, `HttpOnly`. Unknown attributes ignored. -- `--fail-on-status`: parses individual codes (`500`), wildcard families - (`5xx`, `4xx`), or ranges (`500-503`). Resolved into `Vec<u16>`. -- All durations parsed by `humantime::parse_duration` (e.g. `5s`, `2m`, - `500ms`). - -### Logging fields - -For each completed conversion: - -``` -INFO render - source = "html|url|markdown|office" - bytes_in = <usize> (skipped for url) - bytes_out = <usize> - duration_ms = <u64> - pages = <Option<u32>> (extracted via `lopdf` after the fact) -``` - -For each error: `error.code = "<EngineError variant>"` and `error.message`. - -### Exit codes - -| Code | Meaning | -|------|--------------------------------------------------------| -| 0 | Success. | -| 1 | Generic / unexpected error (last-resort fallthrough). | -| 2 | Usage / parse error (delegated to clap). | -| 3 | Engine error (anything mapping to `EngineError`). | -| 4 | Timeout (`EngineError::Timeout`). | -| 5 | I/O error reading inputs / writing outputs. | -| 6 | Multiple errors in `batch` with `--on-error skip`. | - -In `--on-error skip` mode, a non-zero count of failures yields exit code -6 and a one-line summary on stderr. - -### `batch` ordering - -Walks via `walkdir::WalkDir`, collects matching paths into a stable -sorted order, schedules conversions in that order. Reported errors carry -the input path so users can correlate. - -### `merge` / `split` correctness - -- `merge` reads each input fully into memory before delegating to - `engine::pdfops::merge`. Inputs validated as PDFs upon read; bad input - fails fast with the path in the error. -- `split` filenames are zero-padded to fit the chunk count - (`width = chunk_count.to_string().len()`, min 3). - -## Errors - -Mapped to exit codes per the table above. Error messages on stderr -follow this shape: - -``` -error: <one-line summary> - caused by: <next layer> - caused by: <leaf> -``` - -`anyhow`'s `{:#}` formatter is used. The error's source chain MUST -reach the originating `EngineError` variant. - -## Edge cases - -| Scenario | Required behavior | -|--------------------------------------------------------------|--------------------------------------------------------------------| -| `convert --html foo.html --url ...` | clap mutex group rejects β†’ exit 2. | -| `convert --output -` on a TTY | Allowed. Bytes go to stdout. Stderr still receives logs. | -| `convert --output existing.pdf` | Overwrites. No prompt. | -| `batch --input-dir A --output-dir A` (same directory) | Refused; exit 2 with explanation. | -| `batch --output-dir <does-not-exist>` | Created recursively (`fs::create_dir_all`). | -| `batch --concurrency 0` | Treated as 1. | -| `--paper 0x0` | Caught by spec 10 `PaperSize::new`; exit 3. | -| `--margin "1, 2"` (only two values) | Exit 2 with usage hint. | -| `--cookie "novalue"` (no `=`) | Exit 2. | -| `--wait selector:` (empty) | Exit 2. | -| `merge` with one input | Allowed; bytes round-tripped through pdfops. | -| `merge` with zero inputs | Exit 2 (clap requires at least one). | -| `merge -` consumed twice | Exit 2 ("stdin can only be used once"). | -| `metadata read` on encrypted PDF | Exit 3 with the documented engine message. | -| Long-running conversion β†’ SIGINT | Engine receives `shutdown` via `tokio::signal`; exit 130. | - -## Test plan - -Tests live in `crates/cli/tests/cli.rs` using `assert_cmd`, -`predicates`, and `tempfile`. Network-bound and Chrome-bound tests are -`#[ignore]`d by default. - -### Unit tests (option parsers) - -Exposed as `pub(crate)` for direct testing. - -- `parse_paper_named`, `parse_paper_dimensions`, `parse_paper_invalid`. -- `parse_margin_single_value_uniform`. -- `parse_margin_four_values_in_order`. -- `parse_margin_wrong_count`. -- `parse_wait_simple_keywords`. -- `parse_wait_selector`. -- `parse_wait_expression`. -- `parse_wait_delay`. -- `parse_cookie_with_attrs`. -- `parse_cookie_missing_value`. -- `parse_fail_on_status_codes_and_wildcards`. - -### Command-level tests (`assert_cmd`) - -Without engine: - -- `version_subcommand_outputs_semver_string`. -- `convert_requires_one_input_source`. -- `convert_requires_output`. -- `merge_with_no_inputs_exits_2`. -- `split_default_mode_one_per_page` β€” using a tiny canned PDF. -- `metadata_read_round_trips_via_write` β€” pure pdfops. -- `flatten_idempotent_via_cli` β€” pure pdfops. -- `completions_emits_bash_script` β€” output starts with `_folio()`. -- `usage_error_exits_2`. -- `engine_error_path_exits_3` β€” invoke `convert --html nonexistent.html`. - -With Chrome (`#[ignore]`): - -- `convert_html_to_stdout_pipes_bytes` β€” `… --output -` produces - bytes starting with `%PDF-`. -- `convert_url_to_pdf_against_local_axum`. -- `batch_smoke_two_files_into_two_pdfs`. -- `batch_skip_on_error_exits_6_with_summary`. - -With LibreOffice (`#[ignore]`): - -- `convert_office_writer_doc`. -- `convert_office_with_pdf_a_2b`. - -### Logging / output golden tests - -- `log_format_json_emits_valid_json_per_line` β€” capture stderr, parse - each line via `serde_json::from_str`. -- `log_format_text_does_not_emit_color_when_piped`. - -## Acceptance - -- [ ] `crates/cli/src/main.rs` compiles, plus `commands/`, `args/`, - `parse/` submodules as needed. -- [ ] `clap = { workspace = true, features = ["derive", "env"] }`, - `clap_complete`, `assert_cmd`, `predicates`, `humantime`, - `walkdir`, `tracing-subscriber` wired via `workspace.dependencies`. -- [ ] Top-level binary name is `folio` (already set in `crates/cli/Cargo.toml`). -- [ ] `folio convert --help` matches the surface above (golden test - against the rendered help). -- [ ] All listed unit tests pass. -- [ ] All non-ignored integration tests pass. -- [ ] `--ignored` integration tests pass on a host with Chrome and `soffice`. -- [ ] `cargo clippy -p cli -- -D warnings` clean. -- [ ] No `unwrap`/`expect` outside test code. -- [ ] Exit codes match the documented table (assert via `assert_cmd`). - -## Out of scope / follow-ups - -- `serve` subcommand fronting `folio-server`. -- Interactive TUI mode. -- Configuration file support (e.g. `folio.toml` discovered by ancestor - walk) β€” defer until users ask. -- Watermark / rotate / encrypt CLI subcommands β€” once spec 13 covers - them server-side. -- Progress bars in `batch` β€” defer; logs cover it. diff --git a/docs/specs/30-server.md b/docs/specs/30-server.md deleted file mode 100644 index 0b315a0..0000000 --- a/docs/specs/30-server.md +++ /dev/null @@ -1,478 +0,0 @@ -# Spec 30 β€” `server` (`folio-server` binary) - -> Gotenberg-compatible HTTP service backed by the `engine` crate. -> Drop-in replacement for Gotenberg's `/forms/chromium/*`, -> `/forms/libreoffice/*`, and `/forms/pdfengines/*` routes. - -## Goal - -Expose an HTTP API that mirrors Gotenberg's wire contract from -`@/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/docs/gotenberg-spec.md:48-90`, -so existing Gotenberg clients can switch by changing only the base URL. - -## Scope - -**In:** - -- The Phase-1/2 routes listed below (chromium html/url/markdown/screenshot, - libreoffice convert, pdfengines merge/split/flatten/metadata). -- Form-multipart parsing (matching Gotenberg field names). -- One shared `ChromiumEngine` and `LibreOfficeEngine` per process. -- Concurrency limit via global `Semaphore`. -- Per-request `request_id`, structured `tracing` logs, `/health`, `/version`. -- Graceful shutdown on `SIGINT` / `SIGTERM`: drain in-flight, close - engines, exit 0. - -**Out:** - -- Webhook routes (`/forms/webhook`). -- Screenshot routes (`/forms/chromium/screenshot/*`) β€” follow-up. -- Encrypt / watermark / stamp / rotate routes β€” wired once spec 13's - follow-ups land. -- Metrics (Prometheus / OpenTelemetry) β€” separate optional feature. -- Auth β€” none in MVP. Operators are expected to front this with a - reverse proxy when exposed publicly. - -## Public API - -### Routes - -| Method | Path | Handler | -|--------|------------------------------------------|----------------------------------| -| GET | `/health` | `health` | -| GET | `/version` | `version` | -| POST | `/forms/chromium/convert/html` | `chromium_html` | -| POST | `/forms/chromium/convert/url` | `chromium_url` | -| POST | `/forms/chromium/convert/markdown` | `chromium_markdown` | -| POST | `/forms/chromium/screenshot/html` | `chromium_screenshot_html` | -| POST | `/forms/chromium/screenshot/url` | `chromium_screenshot_url` | -| POST | `/forms/chromium/screenshot/markdown` | `chromium_screenshot_markdown` | -| POST | `/forms/libreoffice/convert` | `libreoffice_convert` | -| POST | `/forms/pdfengines/merge` | `pdfengines_merge` | -| POST | `/forms/pdfengines/split` | `pdfengines_split` | -| POST | `/forms/pdfengines/flatten` | `pdfengines_flatten` | -| POST | `/forms/pdfengines/metadata/read` | `pdfengines_metadata_read` | -| POST | `/forms/pdfengines/metadata/write` | `pdfengines_metadata_write` | - -All POST routes are `multipart/form-data`. JSON bodies are not accepted -(matches Gotenberg). - -### CLI surface - -``` -folio-server serve [OPTIONS] - -Options (env-overridable; flags take precedence): - --host <HOST> Default 0.0.0.0 (env FOLIO_HOST) - --port <PORT> Default 3000 (env FOLIO_PORT) - --concurrency <N> Default num_cpus (env FOLIO_CONCURRENCY) - --max-body-bytes <N> Default 50 MiB (env FOLIO_MAX_BODY) - --request-timeout <DUR> Default 120s (env FOLIO_REQUEST_TIMEOUT) - --chrome <PATH> Override Chrome path (env CHROME_PATH) - --no-sandbox / --sandbox (env FOLIO_NO_SANDBOX) - --soffice <PATH> Override soffice path (env LIBREOFFICE_PATH) - --log-level <LEVEL> Default "info" (env RUST_LOG) - --log-format <FORMAT> text | json. Default text on TTY, else json. - (env FOLIO_LOG_FORMAT) -``` - -## Behavior - -### App state - -```rust -pub struct AppState { - chromium: Arc<ChromiumEngine>, - libreoffice: Arc<LibreOfficeEngine>, - sem: Arc<tokio::sync::Semaphore>, // global concurrency cap - config: ServerConfig, // ports, timeouts, max body - started_at: std::time::Instant, -} -``` - -`AppState` is `Clone` (its fields are all `Arc`/`Copy`-friendly) and -attached via `axum::extract::State`. - -### Engine lifecycle - -1. On startup, build `ChromiumEngine::launch_with(BrowserConfig::from(cfg))` - in parallel with `LibreOfficeEngine::launch(LibreOfficeConfig::from(cfg))` - via `tokio::join!`. -2. On either engine failing to launch, log the error and exit 1. -3. On `SIGINT`/`SIGTERM` (`tokio::signal::ctrl_c()` + Unix signals): - 1. Stop accepting new connections (`axum::serve` graceful shutdown). - 2. Wait for in-flight requests up to a 30-second drain budget. - 3. `chromium.shutdown().await` and drop `libreoffice`. - 4. Exit 0. - -### Form-field parsing - -Files are extracted from the multipart body into a per-request -`tempfile::TempDir`. Non-file fields are collected into a -`HashMap<String, String>` (last wins on duplicates). - -Then a pure helper deserialises the map into the relevant request -struct using `serde_urlencoded` (after the map has been re-serialised). -This gives us camelCase field names for free via spec 10's `#[serde]` -annotations. - -### `chromium_html` - -Multipart fields: - -| Field name | Type | Maps to | -|---------------------------|----------|--------------------------------------| -| `files` (one .html file) | file | inlined as the HTML string | -| `files` (additional) | file(s) | written next to `index.html` for relative resolution; `base_url` set to a `file://<tmpdir>/index.html` | -| `paperWidth` | float | `PdfOptions::paper.width_in` | -| `paperHeight` | float | `PdfOptions::paper.height_in` | -| `marginTop` ... `marginRight` | float | `PdfOptions::margin.*` | -| `landscape` | bool | `PdfOptions::landscape` | -| `scale` | float | `PdfOptions::scale` | -| `printBackground` | bool | `PdfOptions::print_background` | -| `pageRanges` | string | `PdfOptions::page_ranges` | -| `headerTemplate` | string | `PdfOptions::header_template` | -| `footerTemplate` | string | `PdfOptions::footer_template` | -| `preferCssPageSize` | bool | `PdfOptions::prefer_css_page_size` | -| `emulateMediaType` | string | `PdfOptions::emulate_media` | -| `waitDelay` | duration | `WaitCondition::Delay` | -| `waitForSelector` | string | `WaitCondition::Selector` | -| `waitForExpression` | string | `WaitCondition::Expression` | -| `userAgent` | string | `RequestContext::user_agent` | -| `extraHttpHeaders` | json | `RequestContext::extra_headers` | -| `cookies` | json | `RequestContext::cookies` | -| `failOnHttpStatusCodes` | json | `RequestContext::fail_on_status` | - -Steps: - -1. Acquire a permit from `state.sem` (await; this is the back-pressure - point). -2. Parse multipart; require a file named `index.html` (Gotenberg - convention). -3. Build `PdfOptions` and `RequestContext` from the form map. -4. Validate via `PdfOptions::validate()`. -5. Call `state.chromium.html_to_pdf(html, base_url, &opts, &ctx)`. -6. Stream the bytes back as `application/pdf` with - `Content-Disposition: attachment; filename="result.pdf"` (matches - Gotenberg). Set `X-Request-Id` echo. - -### `chromium_url` - -Same as `chromium_html`, except instead of `files` there's a `url` -field (string, required), and the engine call is `url_to_pdf`. - -### `chromium_markdown` - -Multipart accepts: - -- An `index.html` file (a wrapper template). -- One or more `.md` files referenced by `<link rel="markdown" href="...">` - inside the wrapper. - -Implementation: - -1. Read all files into the per-request tempdir. -2. Read the wrapper `index.html`. Find all - `<link rel="markdown" href="...">` (or the simpler convention of - reading the *first* `.md` file when no wrapper is provided β€” both - supported, wrapper takes precedence). -3. For each referenced markdown, render via the engine's markdownβ†’html - conversion (delegating to spec 11) and inline into the wrapper. -4. Send the resulting HTML to `html_to_pdf` with `base_url` set to the - tempdir. - -### `chromium_screenshot_html` - -Multipart fields: - -| Field name | Type | Maps to | -|---------------------------|----------|--------------------------------------| -| `files` (one .html file) | file | inlined as the HTML string | -| `format` | string | `ScreenshotOptions::format` (png/jpeg/webp) | -| `quality` | int | `ScreenshotOptions::quality` (0-100) | -| `fullPage` | bool | `ScreenshotOptions::full_page` | -| `clip.x`, `clip.y` | float | Clip rectangle position | -| `clip.width`, `clip.height` | float | Clip rectangle dimensions | -| `viewport.width` | int | `ScreenshotOptions::viewport_width` | -| `viewport.height` | int | `ScreenshotOptions::viewport_height` | -| `viewport.scale` | float | `ScreenshotOptions::scale` | -| `waitDelay` | duration | `WaitCondition::Delay` | -| `waitForSelector` | string | `WaitCondition::Selector` | -| `waitForExpression` | string | `WaitCondition::Expression` | -| `userAgent` | string | `RequestContext::user_agent` | -| `extraHttpHeaders` | json | `RequestContext::extra_headers` | -| `cookies` | json | `RequestContext::cookies` | -| `failOnHttpStatusCodes` | json | `RequestContext::fail_on_status` | - -Steps: - -1. Acquire semaphore permit. -2. Parse multipart; require a file named `index.html`. -3. Build `ScreenshotOptions` and `RequestContext` from form map. -4. Call `state.chromium.screenshot_html(html, base_url, &opts, &ctx)`. -5. Return bytes as `image/png`, `image/jpeg`, or `image/webp` with - `Content-Disposition: attachment; filename="result.{png|jpg|webp}"`. - -### `chromium_screenshot_url` - -Same as `chromium_screenshot_html`, except uses `url` field instead of -`files`, and calls `screenshot_url`. - -### `chromium_screenshot_markdown` - -Same pattern as `chromium_markdown` but renders to screenshot instead of -PDF. Calls `screenshot_markdown`. - -### `libreoffice_convert` - -Multipart fields: - -| Field | Type | Maps to | -|------------------------|----------|------------------------------------| -| `files` | file(s) | input documents | -| `landscape` | bool | `OfficeOptions::landscape` | -| `pageRanges` | string | `OfficeOptions::page_ranges` | -| `pdfa` | string | `OfficeOptions::pdf_a` | -| `pdfua` | bool | `OfficeOptions::pdf_ua` | -| `merge` | bool | post-process via `pdfops::merge` | -| `quality` | int | `OfficeOptions::quality` | -| `maxImageResolution` | int | `OfficeOptions::max_image_resolution` | -| `nativePageRanges` | string | alias of `pageRanges` (Gotenberg) | - -Steps: - -1. Permit + tempdir. -2. Save each `files` part to `tempdir/<name>`. -3. Call `libreoffice.convert_many(...)`. -4. If `merge = true`, pipe results into `pdfops::merge` (spec 13). -5. Return the single-file or zip-of-files response (when not merging - with multiple inputs, ZIP up the outputs as - `application/zip` β€” this matches Gotenberg's behavior). - -### `pdfengines_merge` - -Multipart `files`: two or more PDFs, in field order. Other fields: -`metadata` (json) β€” optional, applied via `pdfops::write_metadata` -after merge. - -### `pdfengines_split` - -Fields: - -- `files`: exactly one PDF. -- `splitMode`: `intervals` | `pages`. (Gotenberg uses `mode` β€” accept - both names.) -- `splitSpan`: integer for `intervals`. -- `splitUnify`: bool β€” when true and mode is `pages`, merge the chunks - back into a single PDF (matches Gotenberg quirk). -- `splitPages`: comma list of page-range chunks for `pages` mode. - -Returns: - -- Single chunk: `application/pdf`. -- Multiple chunks: `application/zip` containing - `result-001.pdf`, `result-002.pdf`, ... - -### `pdfengines_flatten` - -Fields: `files` β€” one or more PDFs. Each is flattened independently; -returns single PDF or ZIP per the same rule. - -### `pdfengines_metadata_read` - -Fields: `files` β€” one or more PDFs. Returns `application/json`: - -```json -{ - "input-1.pdf": { "title": "...", "author": "...", "custom": {...} }, - "input-2.pdf": { ... } -} -``` - -### `pdfengines_metadata_write` - -Fields: - -- `files`: one or more PDFs. -- `metadata`: required JSON. Merged into each input. - -Returns single PDF / ZIP per the standard rule. - -### `health` - -Returns `200 OK` with body: - -```json -{ - "status": "up", - "uptime_secs": 1234, - "chromium": "up" | "down", - "libreoffice": "up" | "down" -} -``` - -`chromium` reflects `ChromiumEngine::healthy().await`; same for -`libreoffice`. If either is `down`, the overall HTTP status is still -`200` (matches Gotenberg convention) but the body indicates the issue. - -### `version` - -Returns `text/plain` body with `env!("CARGO_PKG_VERSION")`. - -### Middleware stack (outer β†’ inner) - -1. `tower_http::trace::TraceLayer` with a custom span - (`request_id`, `method`, `uri`, `status`, `latency_ms`). -2. `tower_http::request_id::SetRequestIdLayer` (use `X-Request-Id` if - incoming, else generate a UUIDv4). -3. `tower_http::limit::RequestBodyLimitLayer::new(max_body_bytes)`. -4. `tower::timeout::TimeoutLayer::new(request_timeout)` β€” bypassed for - `/health` and `/version`. -5. `tower_http::cors::CorsLayer::permissive()` (operator-overridable - later via flag, MVP keeps it permissive). -6. The router. - -### Error mapping - -Single `IntoResponse` for `EngineError`: - -| Variant | Status | Body | -|----------------------------------|--------|---------------------------------------| -| `InvalidOption` | 400 | `{ "error": "...", "code": "INVALID_OPTION" }` | -| `InvalidPageRange` | 400 | `{ "error": "...", "code": "INVALID_PAGE_RANGE" }` | -| `Navigation { url, reason }` | 502 | `{ "error": "...", "code": "NAVIGATION", "url": "...", "reason": "..." }` | -| `Timeout(d)` | 504 | `{ "error": "...", "code": "TIMEOUT" }` | -| `ChromeNotFound | ChromeLaunch` | 500 | `{ "error": "...", "code": "ENGINE_UNAVAILABLE" }` | -| `Cdp | Internal | Io` | 500 | `{ "error": "...", "code": "INTERNAL" }` | - -All error responses are `application/json`. The originating -`EngineError` `Display` text becomes the `error` field; the chain (when -present) joins via `: `. - -### Concurrency model - -- Outer cap: `Semaphore::new(concurrency)`. Permit acquired in handler - prelude, dropped when the handler future ends (success or error). -- Inner: `ChromiumEngine` opens a fresh page per request (spec 11 - guarantees safe concurrency). -- LibreOffice: each `convert*` call serialises through the engine's - internal semaphore (spec 12). -- PDF ops are pure CPU; offload via `tokio::task::spawn_blocking` for - any input larger than 1 MiB so we don't block the runtime. - -## Errors - -See "Error mapping" above. The server's surface contains no error -variants of its own β€” every failure ultimately maps to an `EngineError` -or to one of the standard HTTP errors: - -- 400 β€” multipart parse failure, missing required field. -- 405 β€” wrong HTTP method on a known path. -- 413 β€” body exceeds `--max-body-bytes` (returned by tower-http layer). -- 415 β€” non-multipart `Content-Type`. - -## Edge cases - -| Scenario | Required behavior | -|---------------------------------------------------------------------|-------------------------------------------------------------------------| -| Multipart missing required `files` | 400 with `{"error":"missing required file 'index.html'"}`. | -| `files` includes a `..` path traversal | Reject; 400. | -| Body exactly at `--max-body-bytes` | Accepted. | -| Body 1 byte over | 413, structured error code `BODY_TOO_LARGE`. | -| Chrome dies mid-request | `EngineError::Cdp` β†’ 500. Server keeps running; next request triggers re-launch attempt (out of MVP β€” for now we exit). | -| `/health` while engines are down | 200 with `{ "status": "up", "chromium": "down" }`. Operator's monitor decides. | -| SIGINT during slow render | Graceful shutdown waits up to 30s, then forces engine shutdown and exits. In-flight client receives 503 (TimeoutLayer) or connection close. | -| Concurrent identical requests | Each gets its own page; results returned independently. | -| `extraHttpHeaders` not valid JSON | 400 `{"code":"INVALID_OPTION","error":"extraHttpHeaders is not valid JSON"}`. | -| `cookies` JSON has unknown attributes | Unknown attrs ignored; documented in OpenAPI later. | -| Output too large to fit in 4 GiB Vec | Hypothetical; 500 with internal error. Not optimised for in MVP. | - -## Test plan - -### Unit tests (`crates/server/src/...`) - -- `app_state_clone_is_cheap` β€” `static_assertions` for `Clone + Send + Sync`. -- `parse_pdf_options_from_form_map_round_trip`. -- `parse_request_context_from_form_map_round_trip`. -- `extra_http_headers_invalid_json_returns_invalid_option`. -- `cookies_with_attrs_parse`. -- `fail_on_status_codes_parse`. -- `error_mapping_table` β€” for each `EngineError` variant produce the - documented HTTP status + JSON shape. - -### Router-level tests (`tower::ServiceExt::oneshot` against `Router`) - -These do not launch real engines; they use a test double -(`ChromiumEngine` mocked behind a trait `PdfBackend`). The trait is -introduced *only* for the server's unit tests; production code uses the -concrete engine. - -- `health_returns_200_when_engines_up`. -- `version_returns_pkg_version`. -- `chromium_html_returns_pdf_bytes_on_success` β€” mock returns - `b"%PDF-1.7..."`. -- `chromium_html_400_on_missing_index_html`. -- `chromium_url_400_on_missing_url_field`. -- `chromium_html_504_when_backend_returns_timeout`. -- `chromium_html_502_when_backend_returns_navigation_error`. -- `chromium_screenshot_html_returns_png_on_success` β€” mock returns - PNG bytes (`\x89PNG`). -- `chromium_screenshot_url_returns_jpeg_when_format_set` β€” mock returns - JPEG bytes (`0xFF 0xD8`). -- `chromium_screenshot_markdown_returns_webp` β€” mock returns WebP. -- `body_too_large_returns_413`. -- `nonexistent_route_returns_404`. - -### Integration tests (`crates/server/tests/`) - -Marked `#[ignore]`, require Chrome and `soffice` on the host: - -- `e2e_chromium_html` β€” start server on ephemeral port, POST a tiny - HTML, assert PDF bytes returned. -- `e2e_chromium_url_against_local_axum_app`. -- `e2e_libreoffice_docx`. -- `e2e_pdfengines_merge_split_round_trip`. -- `graceful_shutdown_drains_inflight` β€” start a long render, send - SIGINT, assert the in-flight request completes (or 503s cleanly) and - the process exits within 35s. - -### Smoke - -A `crates/server/tests/smoke.sh` (or Rust harness) script `curl`s every -documented route against a launched server and asserts non-error -responses for a small fixture set. Runs in CI on Linux runners only. - -## Acceptance - -- [ ] All routes implemented per the table (including screenshot routes). -- [ ] Multipart parser handles repeated fields and named files - (Gotenberg-style: `files` repeated; the *file name* matters for - `index.html`). -- [ ] `axum`, `tower`, `tower-http`, `multer`, `tempfile`, `uuid`, - `serde`, `serde_json`, `serde_urlencoded`, `tracing-subscriber` - added via `workspace.dependencies`. -- [ ] Error mapping matches the table; covered by the dedicated unit test. -- [ ] CLI flags + env vars resolve in the documented precedence order - (flag > env > default). Verified by a unit test on - `ServerConfig::resolve(args, env)`. -- [ ] Graceful shutdown verified by the integration test. -- [ ] `cargo clippy -p server -- -D warnings` clean. -- [ ] No `unsafe`. No `unwrap`/`expect` outside `#[cfg(test)]`. -- [ ] Output content types: `application/pdf` for single PDFs, - `application/zip` for multi, `application/json` for metadata read, - `image/png`/`image/jpeg`/`image/webp` for screenshots. -- [ ] `Content-Disposition: attachment; filename="result.pdf"` (or - `result.zip` / `result.json` / `result.{png|jpg|webp}`) on success. -- [ ] Screenshot routes return correct image format based on `format` field. - -## Out of scope / follow-ups - -- Webhook routes (`/forms/webhook`) β€” Gotenberg has them; defer until - user demand. -- Full route set behind `/forms/pdfengines/*` (encrypt, watermark, - stamp, rotate, embed, bookmarks) β€” wired as their backing `pdfops` - functions land. -- Prometheus / OpenTelemetry exporters β€” separate optional feature. -- Multi-tenant API keys / quotas β€” assume reverse-proxy fronting. -- Hot-restart of crashed engines (today the process exits on engine - death; a supervisor is expected externally). diff --git a/docs/specs/36-chromium-wait-conditions.md b/docs/specs/36-chromium-wait-conditions.md deleted file mode 100644 index 4e46665..0000000 --- a/docs/specs/36-chromium-wait-conditions.md +++ /dev/null @@ -1,377 +0,0 @@ -# Spec 36 β€” Chromium Wait Conditions & Advanced Options - -> Advanced Chromium wait conditions and request context options -> that Folio is missing compared to Gotenberg. These fields provide -> finer control over page loading and resource handling. - -## Goal - -Implement missing Chromium form fields that control wait behavior, -resource validation, and rendering options. These are critical for -production use cases where precise timing and error handling are required. - -## Scope - -**In:** - -- `waitForSelector` - Wait for DOM element visibility -- `skipNetworkIdleEvent` - Skip network idle detection -- `skipNetworkAlmostIdleEvent` - Skip "almost idle" (≀2 connections) -- `waitWindowStatus` - Wait for `window.status` value -- `failOnResourceHttpStatusCodes` - Resource status code validation -- `ignoreResourceHttpStatusDomains` - Exclude domains from checks -- `failOnResourceLoadingFailed` - Fail on resource errors -- `failOnConsoleExceptions` - Fail on JS exceptions -- `omitBackground` - Transparent background rendering - -**Out:** - -- `failOnHttpStatusCodes` - Already implemented βœ… -- `failOnConsoleExceptions` - Future: capture console.error() calls - -## Form Fields - -### Wait Conditions (Missing in Folio) - -| Field | Type | Gotenberg Source | Description | -|-------|------|------------------|-------------| -| `waitForSelector` | string (CSS selector) | `pkg/modules/chromium/formfield.go:WaitForSelector` | Wait for element to be visible before rendering | -| `skipNetworkIdleEvent` | boolean | `pkg/modules/chromium/formfield.go:SkipNetworkIdleEvent` | Skip waiting for network idle (0 connections) | -| `skipNetworkAlmostIdleEvent` | boolean | `pkg/modules/chromium/formfield.go:SkipNetworkAlmostIdleEvent` | Skip "almost idle" (≀2 connections) | -| `waitWindowStatus` | string | `pkg/modules/chromium/formfield.go:WaitWindowStatus` | Wait for `window.status === value` | - -### Resource Validation (Missing in Folio) - -| Field | Type | Gotenberg Source | Description | -|-------|------|------------------|-------------| -| `failOnResourceHttpStatusCodes` | JSON array | `pkg/modules/chromium/formfield.go:FailOnResourceHttpStatusCodes` | HTTP status codes that fail the conversion | -| `ignoreResourceHttpStatusDomains` | JSON array | `pkg/modules/chromium/formfield.go:IgnoreResourceHttpStatusDomains` | Domains to exclude from status checks | -| `failOnResourceLoadingFailed` | boolean | `pkg/modules/chromium/formfield.go:FailOnResourceLoadingFailed` | Fail when any resource fails to load | -| `failOnConsoleExceptions` | boolean | `pkg/modules/chromium/formfield.go:FailOnConsoleExceptions` | Fail when `console.error()` is called | - -### Rendering Options (Missing in Folio) - -| Field | Type | Gotenberg Source | Description | -|-------|------|------------------|-------------| -| `omitBackground` | boolean | `pkg/modules/chromium/formfield.go:OmitBackground` | Omit background graphics (transparent background) | - -## Implementation - -### 1. Extend `PdfOptions` in `crates/engine/src/types.rs` - -```rust -pub struct PdfOptions { - // ... existing fields ... - - // Wait conditions - pub wait_for_selector: Option<String>, - pub skip_network_idle: bool, - pub skip_network_almost_idle: bool, - pub wait_window_status: Option<String>, - - // Resource validation - pub fail_on_resource_http_status_codes: Vec<u16>, - pub ignore_resource_http_status_domains: Vec<String>, - pub fail_on_resource_loading_failed: bool, - pub fail_on_console_exceptions: bool, - - // Rendering - pub omit_background: bool, -} -``` - -### 2. Update Form Field Parsing in `crates/server/src/routes/chromium.rs` - -```rust -// In parse_chromium_form function: -if let Some(selector) = form.get("waitForSelector") { - opts.wait_for_selector = Some(selector.clone()); -} - -if let Some(val) = form.get("skipNetworkIdleEvent") { - opts.skip_network_idle = val == "true"; -} - -if let Some(val) = form.get("skipNetworkAlmostIdleEvent") { - opts.skip_network_almost_idle = val == "true"; -} - -if let Some(status) = form.get("waitWindowStatus") { - opts.wait_window_status = Some(status.clone()); -} - -if let Some(codes) = form.get("failOnResourceHttpStatusCodes") { - // Parse JSON array: [404, 500, 502] - opts.fail_on_resource_http_status_codes = serde_json::from_str(codes) - .map_err(|e| EngineError::InvalidOption(...))?; -} - -if let Some(domains) = form.get("ignoreResourceHttpStatusDomains") { - // Parse JSON array: ["cdn.example.com", "*.cloudfront.net"] - opts.ignore_resource_http_status_domains = serde_json::from_str(domains) - .map_err(|e| EngineError::InvalidOption(...))?; -} - -if let Some(val) = form.get("failOnResourceLoadingFailed") { - opts.fail_on_resource_loading_failed = val == "true"; -} - -if let Some(val) = form.get("failOnConsoleExceptions") { - opts.fail_on_console_exceptions = val == "true"; -} - -if let Some(val) = form.get("omitBackground") { - opts.omit_background = val == "true"; -} -``` - -### 3. Implement in `ChromiumEngine` (`crates/engine/src/chromium/render.rs`) - -#### Wait for Selector - -```rust -use chromiumoxide::page::Page; - -async fn wait_for_selector(page: &Page, selector: &str) -> Result<(), EngineError> { - use chromiumoxide::cdp::browser_protocol::dom::*; - - // Wait for element to be visible - let cmd = GetElementById { - node_id: page.find_element(selector).await - .map_err(|e| EngineError::Navigation(...))? - }; - - // Poll until visible or timeout - let start = std::time::Instant::now(); - while start.elapsed() < Duration::from_secs(30) { - if page.is_visible(selector).await.unwrap_or(false) { - return Ok(()); - } - tokio::time::sleep(Duration::from_millis(100)).await; - } - - Err(EngineError::Timeout(Duration::from_secs(30))) -} -``` - -#### Skip Network Idle Events - -```rust -// In navigate_and_render: -if !opts.skip_network_idle { - // Wait for network idle (0 connections) - page.wait_for_network_idle().await?; -} - -if !opts.skip_network_almost_idle { - // Wait for "almost idle" (≀2 connections) - wait_for_almost_idle(page).await?; -} -``` - -#### Wait for Window Status - -```rust -async fn wait_for_window_status(page: &Page, status: &str) -> Result<(), EngineError> { - let start = std::time::Instant::now(); - while start.elapsed() < Duration::from_secs(30) { - let current: String = page.evaluate("window.status").await?; - if current == status { - return Ok(()); - } - tokio::time::sleep(Duration::from_millis(100)).await; - } - Err(EngineError::Timeout(Duration::from_secs(30))) -} -``` - -#### Resource Status Validation - -```rust -use chromiumoxide::handler::network::{RequestPaused, ResponseReceived}; - -struct ResourceValidator { - fail_codes: Vec<u16>, - ignore_domains: Vec<String>, - failed_resources: Vec<String>, -} - -impl ResourceValidator { - fn new(codes: Vec<u16>, domains: Vec<String>) -> Self { - Self { - fail_codes: codes, - ignore_domains: domains, - failed_resources: Vec::new(), - } - } - - fn check_response(&mut self, url: &str, status: u16) { - if self.fail_codes.contains(&status) { - if !self.should_ignore(url) { - self.failed_resources.push(format!("{}: {}", url, status)); - } - } - } - - fn should_ignore(&self, url: &str) -> bool { - self.ignore_domains.iter().any(|domain| url.contains(domain)) - } -} -``` - -#### Console Exceptions - -```rust -use chromiumoxide::cdp::browser_protocol::runtime::ExceptionThrown; - -fn enable_console_exception_detection(page: &Page) -> ConsoleExceptionDetector { - let detector = ConsoleExceptionDetector::new(); - page.enable_runtime().await.unwrap(); - // Listen for ExceptionThrown events - // If console.error() called, add to exceptions list - detector -} -``` - -#### Omit Background - -```rust -// In PDF printing options: -let mut print_opts = PrintToPdfParams::builder(); - -if opts.omit_background { - print_opts.background_graphics(false); -} -``` - -## References to Gotenberg Source - -| Feature | Gotenberg File | Line Numbers | -|---------|------------------|-------------| -| Form field definitions | `pkg/modules/chromium/formfield.go` | Full file | -| WaitForSelector handling | `pkg/modules/chromium/libreoffice.go` | ~L400-450 | -| Network idle logic | `pkg/modules/chromium/chromium.go` | ~L200-300 | -| Resource validation | `pkg/modules/chromium/chromium.go` | ~L300-400 | -| Window status wait | `pkg/modules/chromium/chromium.go` | ~L400-450 | -| Console exceptions | `pkg/modules/chromium/chromium.go` | ~L450-500 | -| Omit background | `pkg/modules/chromium/formfield.go` | ~L150-200 | - -To read Gotenberg source: -```bash -cd /Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg -cat pkg/modules/chromium/formfield.go | grep -A5 "WaitForSelector" -``` - -## Expected Behavior - -### `waitForSelector` -- Accept CSS selector string -- Wait until element is visible in DOM -- Timeout after 30s (configurable via `waitDelay`) -- Return error if element not found - -### `skipNetworkIdleEvent` -- When `true`, don't wait for network idle (0 connections) -- Speeds up conversion for pages with persistent connections -- Default: `false` (wait for idle) - -### `skipNetworkAlmostIdleEvent` -- When `true`, don't wait for "almost idle" (≀2 connections) -- Useful for pages with long-polling or websockets -- Default: `false` - -### `waitWindowStatus` -- Wait for `window.status` to equal specified value -- Poll every 100ms with 30s timeout -- Useful for SPA frameworks that set status on render complete - -### `failOnResourceHttpStatusCodes` -- Accept JSON array: `[404, 500, 502]` -- Check all subresource requests (images, scripts, XHR) -- Fail conversion if any resource matches -- Ignore domains in `ignoreResourceHttpStatusDomains` - -### `ignoreResourceHttpStatusDomains` -- Accept JSON array: `["cdn.example.com", "*.cloudfront.net"]` -- Supports wildcard `*` prefix -- Case-insensitive domain matching - -### `failOnResourceLoadingFailed` -- When `true`, fail if any resource fails to load (network error) -- Includes 4xx, 5xx, DNS failure, timeout, etc. -- Default: `false` - -### `failOnConsoleExceptions` -- When `true`, fail if `console.error()` is called -- Captures exceptions thrown in `window.onerror` -- Useful for catching JS errors during render -- Default: `false` - -### `omitBackground` -- When `true`, render with transparent background -- Sets `background-graphics: false` in print params -- Useful for overlaying PDF on other content -- Default: `false` - -## Test Plan - -### Unit Tests - -- `parse_wait_for_selector_from_form` -- `parse_skip_network_idle_from_form` -- `parse_fail_on_resource_codes_json_array` -- `parse_ignore_domains_wildcard` -- `omit_background_sets_print_param` - -### Integration Tests - -- `wait_for_selector_success` - Element appears after JS render -- `wait_for_selector_timeout` - Element never appears -- `skip_network_idle_speeds_up_conversion` -- `fail_on_resource_404` - Image 404 fails conversion -- `ignore_domain_cdn` - CDN 404 ignored -- `fail_on_console_error` - JS error fails conversion -- `omit_background_transparent` - PDF has no background - -### BDD Scenarios (Port from Gotenberg) - -```gherkin -Scenario: Wait for selector before rendering - Given Chromium is available - When I POST to "/forms/chromium/convert/url" with: - | url | http://example.com/dynamic | - | waitForSelector | #content | - | waitDelay | 5s | - Then I should receive a PDF - And the PDF should contain "Dynamic Content" - -Scenario: Fail on resource HTTP status - Given Chromium is available - When I POST to "/forms/chromium/convert/url" with: - | url | http://example.com/broken | - | failOnResourceHttpStatusCodes | [404, 500] | - Then the response status code should be 502 - And the error code should be "NAVIGATION" -``` - -## Acceptance - -- [ ] `PdfOptions` extended with all new fields -- [ ] Form field parsing in `chromium.rs` route handler -- [ ] `wait_for_selector` implemented in `ChromiumEngine` -- [ ] Network idle skip options implemented -- [ ] `wait_window_status` implemented -- [ ] Resource validation with domain ignore list -- [ ] Console exception detection -- [ ] `omit_background` sets print parameter -- [ ] Unit tests for all form field parsers -- [ ] Integration tests for each new feature -- [ ] BDD scenarios ported from Gotenberg -- [ ] `cargo clippy -p engine -- -D warnings` clean - -## References - -- Gotenberg form fields: `/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg/pkg/modules/chromium/formfield.go` -- Gotenberg Chromium module: `/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg/pkg/modules/chromium/` -- Chromiumoxide docs: https://docs.rs/chromiumoxide/ -- Chrome DevTools Protocol: https://chromedevtools.github.io/ diff --git a/docs/specs/37-libreoffice-advanced.md b/docs/specs/37-libreoffice-advanced.md deleted file mode 100644 index e84d897..0000000 --- a/docs/specs/37-libreoffice-advanced.md +++ /dev/null @@ -1,396 +0,0 @@ -# Spec 37 β€” LibreOffice Advanced Form Fields - -> Comprehensive list of LibreOffice form fields that Gotenberg -> supports but Folio is missing. These 30+ fields control PDF -> export options, bookmarks, notes, viewer preferences, and native -> watermarks. - -## Goal - -Implement all missing LibreOffice form fields to achieve full parity -with Gotenberg's LibreOffice conversion capabilities. - -## Scope - -**In:** - -All LibreOffice form fields from Gotenberg that Folio is missing: - -### Bookmarks & Index (5 fields) -- `exportBookmarks` - Export bookmarks to PDF -- `exportBookmarksToPdfDestination` - Export to Named Destination -- `updateIndexes` - Update document indexes -- `autoIndexBookmarks` - Auto-index bookmarks (merge) -- `bookmarks` (for merge) - Custom bookmarks with offsets - -### Form Fields & Placeholders (3 fields) -- `exportFormFields` - Export as interactive form widgets -- `allowDuplicateFieldNames` - Allow duplicate field names -- `exportPlaceholders` - Export placeholder markings - -### Notes & Margins (4 fields) -- `exportNotes` - Export notes to PDF -- `exportNotesPages` - Export notes pages (Impress) -- `exportOnlyNotesPages` - Export only notes pages -- `exportNotesInMargin` - Export notes in margin - -### Advanced Options (8 fields) -- `convertOooTargetToPdfTarget` - Convert .od* links to .pdf -- `exportLinksRelativeFsys` - Export links as relative -- `exportHiddenSlides` - Export hidden slides (Impress) -- `skipEmptyPages` - Suppress empty pages -- `addOriginalDocumentAsStream` - Add source doc as stream -- `singlePageSheets` - Single page sheets -- `losslessImageCompression` - Use lossless compression -- `reduceImageResolution` - Reduce image resolution - -### Native Watermarks (6 fields) -- `nativeWatermarkText` - Watermark text -- `nativeWatermarkColor` - RGB color -- `nativeWatermarkFontHeight` - Font height -- `nativeWatermarkRotateAngle` - Rotation angle -- `nativeWatermarkFontName` - Font name -- `nativeTiledWatermarkText` - Tiled watermark text - -### PDF Viewer Preferences (15 fields, Gotenberg 8.29.0+) -- `initialView` - Initial view mode -- `initialPage` - Initial page number -- `magnification` - Magnification level -- `zoom` - Zoom level -- `pageLayout` - Page layout -- `firstPageOnLeft` - First page on left -- `resizeWindowToInitialPage` - Resize to initial page -- `centerWindow` - Center window -- `openInFullScreenMode` - Open fullscreen -- `displayPDFDocumentTitle` - Display document title -- `hideViewerMenubar` - Hide menu bar -- `hideViewerToolbar` - Hide toolbar -- `hideViewerWindowControls` - Hide window controls -- `useTransitionEffects` - Use transition effects -- `openBookmarkLevels` - Open bookmark levels - -**Out:** - -- Fields that require LibreOffice API access beyond command-line flags -- Fields that are deprecated in LibreOffice 7.x+ - -## Form Fields (Missing in Folio) - -### 1. Bookmarks & Index - -| Field | Type | Gotenberg Source | LibreOffice Flag | Description | -|-------|------|------------------|-------------------|-------------| -| `exportBookmarks` | boolean | `pkg/modules/libreoffice/formfield.go:ExportBookmarks` | `--export-bookmarks` | Export bookmarks to PDF outline | -| `exportBookmarksToPdfDestination` | boolean | `pkg/modules/libreoffice/formfield.go:ExportBookmarksToPdfDestination` | `--export-bookmarks-to-pdf-destination` | Export to PDF Named Destination | -| `updateIndexes` | boolean | `pkg/modules/libreoffice/formfield.go:UpdateIndexes` | `--update-indexes` | Update document indexes | -| `autoIndexBookmarks` | boolean | `pkg/modules/libreoffice/formfield.go:AutoIndexBookmarks` | (merge only) | Auto-index when merging | -| `bookmarks` | JSON | `pkg/modules/libreoffice/formfield.go:Bookmarks` | (merge only) | Custom bookmarks with page offsets | - -#### `bookmarks` JSON Format - -```json -[ - { - "title": "Chapter 1", - "page": 1, - "children": [ - {"title": "Section 1.1", "page": 2, "children": []} - ] - } -] -``` - -### 2. Form Fields & Placeholders - -| Field | Type | Gotenberg Source | LibreOffice Flag | Description | -|-------|------|------------------|-------------------|-------------| -| `exportFormFields` | boolean | `pkg/modules/libreoffice/formfield.go:ExportFormFields` | `--export-form-fields` | Export as interactive form widgets | -| `allowDuplicateFieldNames` | boolean | `pkg/modules/libreoffice/formfield.go:AllowDuplicateFieldNames` | `--allow-duplicate-field-names` | Allow duplicate field names | -| `exportPlaceholders` | boolean | `pkg/modules/libreoffice/formfield.go:ExportPlaceholders` | `--export-placeholders` | Export placeholder markings | - -### 3. Notes & Margins - -| Field | Type | Gotenberg Source | LibreOffice Flag | Description | -|-------|------|------------------|-------------------|-------------| -| `exportNotes` | boolean | `pkg/modules/libreoffice/formfield.go:ExportNotes` | `--export-notes` | Export notes to PDF | -| `exportNotesPages` | boolean | `pkg/modules/libreoffice/formfield.go:ExportNotesPages` | `--export-notes-pages` | Export notes pages (Impress) | -| `exportOnlyNotesPages` | boolean | `pkg/modules/libreoffice/formfield.go:ExportOnlyNotesPages` | `--export-only-notes-pages` | Export only notes pages | -| `exportNotesInMargin` | boolean | `pkg/modules/libreoffice/formfield.go:ExportNotesInMargin` | `--export-notes-in-margin` | Export notes in margin | - -### 4. Advanced Options - -| Field | Type | Gotenberg Source | LibreOffice Flag | Description | -|-------|------|------------------|-------------------|-------------| -| `convertOooTargetToPdfTarget` | boolean | `pkg/modules/libreoffice/formfield.go:ConvertOooTargetToPdfTarget` | `--convert-ooo-target-to-pdf-target` | Convert .od* links to .pdf | -| `exportLinksRelativeFsys` | boolean | `pkg/modules/libreoffice/formfield.go:ExportLinksRelativeFsys` | `--export-links-relative-fsys` | Export links as relative | -| `exportHiddenSlides` | boolean | `pkg/modules/libreoffice/formfield.go:ExportHiddenSlides` | `--export-hidden-slides` | Export hidden slides (Impress) | -| `skipEmptyPages` | boolean | `pkg/modules/libreoffice/formfield.go:SkipEmptyPages` | `--skip-empty-pages` | Suppress empty pages | -| `addOriginalDocumentAsStream` | boolean | `pkg/modules/libreoffice/formfield.go:AddOriginalDocumentAsStream` | `--add-original-document-as-stream` | Add source doc as stream | -| `singlePageSheets` | boolean | `pkg/modules/libreoffice/formfield.go:SinglePageSheets` | `--single-page-sheets` | Single page sheets | -| `losslessImageCompression` | boolean | `pkg/modules/libreoffice/formfield.go:LosslessImageCompression` | `--lossless-image-compression` | Use lossless compression | -| `reduceImageResolution` | boolean | `pkg/modules/libreoffice/formfield.go:ReduceImageResolution` | `--reduce-image-resolution` | Reduce image resolution | - -### 5. Native Watermarks (LibreOffice-side) - -| Field | Type | Gotenberg Source | LibreOffice Flag | Description | -|-------|------|------------------|-------------------|-------------| -| `nativeWatermarkText` | string | `pkg/modules/libreoffice/formfield.go:NativeWatermarkText` | `--watermark-text` | Watermark text | -| `nativeWatermarkColor` | integer | `pkg/modules/libreoffice/formfield.go:NativeWatermarkColor` | `--watermark-color` | RGB color (0xRRGGBB) | -| `nativeWatermarkFontHeight` | integer | `pkg/modules/libreoffice/formfield.go:NativeWatermarkFontHeight` | `--watermark-font-height` | Font height in points | -| `nativeWatermarkRotateAngle` | integer | `pkg/modules/libreoffice/formfield.go:NativeWatermarkRotateAngle` | `--watermark-rotate-angle` | Rotation angle (degrees) | -| `nativeWatermarkFontName` | string | `pkg/modules/libreoffice/formfield.go:NativeWatermarkFontName` | `--watermark-font-name` | Font name | -| `nativeTiledWatermarkText` | string | `pkg/modules/libreoffice/formfield.go:NativeTiledWatermarkText` | `--tiled-watermark-text` | Tiled watermark text | - -### 6. PDF Viewer Preferences (Gotenberg 8.29.0+) - -| Field | Type | Gotenberg Source | Description | -|-------|------|------------------|-------------| -| `initialView` | integer | `pkg/modules/libreoffice/formfield.go:InitialView` | Initial view mode (0=Default, 1=Bookmarks, 2=Thumbnails, 3=Layers) | -| `initialPage` | integer | `pkg/modules/libreoffice/formfield.go:InitialPage` | Initial page number (1-indexed) | -| `magnification` | integer | `pkg/modules/libreoffice/formfield.go:Magnification` | Magnification level (0=Default, 1=Fit width, 2=Fit page, 3=10-400%) | -| `zoom` | integer | `pkg/modules/libreoffice/formfield.go:Zoom` | Zoom level (percentage) | -| `pageLayout` | integer | `pkg/modules/libreoffice/formfield.go:PageLayout` | Page layout (0=Default, 1=Single page, 2=Continuous, 3=Facing, 4=Continuous facing) | -| `firstPageOnLeft` | boolean | `pkg/modules/libreoffice/formfield.go:FirstPageOnLeft` | First page on left | -| `resizeWindowToInitialPage` | boolean | `pkg/modules/libreoffice/formfield.go:ResizeWindowToInitialPage` | Resize to initial page | -| `centerWindow` | boolean | `pkg/modules/libreoffice/formfield.go:CenterWindow` | Center window | -| `openInFullScreenMode` | boolean | `pkg/modules/libreoffice/formfield.go:OpenInFullScreenMode` | Open fullscreen | -| `displayPDFDocumentTitle` | boolean | `pkg/modules/libreoffice/formfield.go:DisplayPDFDocumentTitle` | Display document title | -| `hideViewerMenubar` | boolean | `pkg/modules/libreoffice/formfield.go:HideViewerMenubar` | Hide menu bar | -| `hideViewerToolbar` | boolean | `pkg/modules/libreoffice/formfield.go:HideViewerToolbar` | Hide toolbar | -| `hideViewerWindowControls` | boolean | `pkg/modules/libreoffice/formfield.go:HideViewerWindowControls` | Hide window controls | -| `useTransitionEffects` | boolean | `pkg/modules/libreoffice/formfield.go:UseTransitionEffects` | Use transition effects | -| `openBookmarkLevels` | integer | `pkg/modules/libreoffice/formfield.go:OpenBookmarkLevels` | Open bookmark levels (0=none, 1+=expand N levels) | - -## Implementation - -### 1. Extend `OfficeOptions` in `crates/engine/src/libreoffice/mod.rs` - -```rust -pub struct OfficeOptions { - // ... existing fields ... - - // Bookmarks & Index - pub export_bookmarks: bool, - pub export_bookmarks_to_pdf_destination: bool, - pub update_indexes: bool, - pub auto_index_bookmarks: bool, - pub bookmarks: Option<Vec<Bookmark>>, - - // Form Fields - pub export_form_fields: bool, - pub allow_duplicate_field_names: bool, - pub export_placeholders: bool, - - // Notes - pub export_notes: bool, - pub export_notes_pages: bool, - pub export_only_notes_pages: bool, - pub export_notes_in_margin: bool, - - // Advanced - pub convert_ooo_target_to_pdf_target: bool, - pub export_links_relative_fsys: bool, - pub export_hidden_slides: bool, - pub skip_empty_pages: bool, - pub add_original_document_as_stream: bool, - pub single_page_sheets: bool, - pub lossless_image_compression: bool, - pub reduce_image_resolution: bool, - - // Native Watermarks - pub native_watermark_text: Option<String>, - pub native_watermark_color: Option<u32>, // RGB as 0xRRGGBB - pub native_watermark_font_height: Option<u32>, - pub native_watermark_rotate_angle: Option<i32>, - pub native_watermark_font_name: Option<String>, - pub native_tiled_watermark_text: Option<String>, - - // PDF Viewer Preferences - pub initial_view: Option<i32>, - pub initial_page: Option<i32>, - pub magnification: Option<i32>, - pub zoom: Option<i32>, - pub page_layout: Option<i32>, - pub first_page_on_left: bool, - pub resize_window_to_initial_page: bool, - pub center_window: bool, - pub open_in_full_screen_mode: bool, - pub display_pdf_document_title: bool, - pub hide_viewer_menubar: bool, - pub hide_viewer_toolbar: bool, - pub hide_viewer_window_controls: bool, - pub use_transition_effects: bool, - pub open_bookmark_levels: Option<i32>, -} -``` - -### 2. Build LibreOffice Command Args - -```rust -impl OfficeOptions { - pub fn to_libreoffice_args(&self) -> Vec<String> { - let mut args = Vec::new(); - - // Bookmarks - if self.export_bookmarks { - args.push("--export-bookmarks".into()); - } - if self.export_bookmarks_to_pdf_destination { - args.push("--export-bookmarks-to-pdf-destination".into()); - } - if self.update_indexes { - args.push("--update-indexes".into()); - } - - // Form Fields - if self.export_form_fields { - args.push("--export-form-fields".into()); - } - if self.allow_duplicate_field_names { - args.push("--allow-duplicate-field-names".into()); - } - if self.export_placeholders { - args.push("--export-placeholders".into()); - } - - // Notes - if self.export_notes { - args.push("--export-notes".into()); - } - if self.export_notes_pages { - args.push("--export-notes-pages".into()); - } - if self.export_only_notes_pages { - args.push("--export-only-notes-pages".into()); - } - if self.export_notes_in_margin { - args.push("--export-notes-in-margin".into()); - } - - // Advanced - if self.convert_ooo_target_to_pdf_target { - args.push("--convert-ooo-target-to-pdf-target".into()); - } - if self.export_links_relative_fsys { - args.push("--export-links-relative-fsys".into()); - } - if self.export_hidden_slides { - args.push("--export-hidden-slides".into()); - } - if self.skip_empty_pages { - args.push("--skip-empty-pages".into()); - } - if self.add_original_document_as_stream { - args.push("--add-original-document-as-stream".into()); - } - if self.single_page_sheets { - args.push("--single-page-sheets".into()); - } - if self.lossless_image_compression { - args.push("--lossless-image-compression".into()); - } - if self.reduce_image_resolution { - args.push("--reduce-image-resolution".into()); - } - - // Native Watermarks - if let Some(ref text) = self.native_watermark_text { - args.push(format!("--watermark-text={}", text)); - } - if let Some(color) = self.native_watermark_color { - args.push(format!("--watermark-color={}", color)); - } - // ... etc. - - // Viewer Preferences - if let Some(view) = self.initial_view { - args.push(format!("--initial-view={}", view)); - } - // ... etc. - - args - } -} -``` - -### 3. Form Field Parsing in `crates/server/src/routes/libreoffice.rs` - -```rust -// Parse all new fields from form data: -if let Some(val) = form.get("exportBookmarks") { - opts.export_bookmarks = val == "true"; -} - -if let Some(json) = form.get("bookmarks") { - opts.bookmarks = serde_json::from_str(json).ok(); -} - -// ... parse all 30+ fields -``` - -## References to Gotenberg Source - -| Feature | Gotenberg File | Line Numbers | -|---------|------------------|-------------| -| All form fields | `pkg/modules/libreoffice/formfield.go` | Full file (300+ lines) | -| Command arg building | `pkg/modules/libreoffice/libreoffice.go` | ~L100-200 | -| Viewer preferences | `pkg/modules/libreoffice/formfield.go` | ~L200-300 | - -To read Gotenberg source: -```bash -cd /Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg -cat pkg/modules/libreoffice/formfield.go | grep -A3 "ExportBookmarks" -``` - -## Expected Behavior - -### Bookmarks -- `exportBookmarks=true` β†’ PDF has outline/bookmarks panel open -- `bookmarks` JSON β†’ Custom bookmark tree with page offsets -- `autoIndexBookmarks=true` β†’ Auto-generate bookmarks when merging - -### Form Fields -- `exportFormFields=true` β†’ PDF has interactive form widgets -- `allowDuplicateFieldNames=true` β†’ Allow duplicate field names in forms - -### Notes -- `exportNotes=true` β†’ Writer notes exported to PDF -- `exportNotesPages=true` β†’ Impress notes pages included -- `exportNotesInMargin=true` β†’ Notes appear in margin - -### Viewer Preferences -- `initialView=1` β†’ Open with bookmarks panel visible -- `zoom=150` β†’ Default zoom level 150% -- `openInFullScreenMode=true` β†’ Open in fullscreen -- `hideViewerToolbar=true` β†’ Hide toolbar - -## Test Plan - -### Unit Tests - -- `parse_export_bookmarks_from_form` -- `parse_native_watermark_text` -- `parse_viewer_preferences_all_fields` -- `bookmarks_json_deserializes_correctly` - -### Integration Tests - -- `export_bookmarks_creates_outline` -- `native_watermark_appears_in_pdf` -- `viewer_preference_initial_view` -- `export_notes_pages_impress` - -## Acceptance - -- [ ] `OfficeOptions` extended with all 30+ fields -- [ ] Form field parsing in `libreoffice.rs` route -- [ ] LibreOffice command args built correctly -- [ ] Unit tests for all parsers -- [ ] Integration tests for key features -- [ ] `cargo clippy -p engine -- -D warnings` clean - -## References - -- Gotenberg LibreOffice form fields: `/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg/pkg/modules/libreoffice/formfield.go` -- LibreOffice CLI options: https://help.libreoffice.org/latest/en-US/text/shared/guide/pdf_params.html -- PDF viewer preferences: PDF spec ISO 32000-2, clause 12.3 diff --git a/docs/specs/38-pdfengines-backends.md b/docs/specs/38-pdfengines-backends.md deleted file mode 100644 index 6047e96..0000000 --- a/docs/specs/38-pdfengines-backends.md +++ /dev/null @@ -1,295 +0,0 @@ -# Spec 38 β€” PDF Engine Backends - -> Support multiple PDF engine backends (QPDF, PDFCPU, pdftk) -> for different operations. Gotenberg allows selecting which backend -> to use for merge, split, flatten, etc. - -## Goal - -Implement support for multiple PDF engine backends, allowing -operators to choose the best tool for each operation. -Matches Gotenberg's `--pdfengines-*-engines` flags. - -## Scope - -**In:** - -- Configurable backends per operation type: - - Merge engines: QPDF, PDFCPU, pdftk - - Split engines: QPDF, PDFCPU - - Flatten engines: QPDF, PDFCPU, pdftk - - Convert engines: QPDF (PDF/A) - - Encrypt engines: QPDF, pdftk - - Metadata engines: QPDF, pdftk - - Bookmark engines: QPDF, pdftk - - Watermark engines: PDFCPU, pdftk - - Stamp engines: PDFCPU, pdftk - - Rotate engines: QPDF, pdftk - -**Out:** - -- Auto-detection of available backends -- Fallback to lopdf when no external tool available -- Custom backends via plugin system - -## Configuration Flags - -| Flag | Env Variable | Gotenberg Source | Description | -|------|-------------|------------------|-------------| -| `--pdfengines-merge-engines` | `PDFENGINES_MERGE_ENGINES` | `pkg/modules/pdfengines/config.go:MergeEngines` | Comma-separated list (qpdf,pdfcpu,pdftk) | -| `--pdfengines-split-engines` | `PDFENGINES_SPLIT_ENGINES` | `pkg/modules/pdfengines/config.go:SplitEngines` | Comma-separated list | -| `--pdfengines-flatten-engines` | `PDFENGINES_FLATTEN_ENGINES` | `pkg/modules/pdfengines/config.go:FlattenEngines` | Comma-separated list | -| `--pdfengines-convert-engines` | `PDFENGINES_CONVERT_ENGINES` | `pkg/modules/pdfengines/config.go:ConvertEngines` | Usually just qpdf | -| `--pdfengines-read-metadata-engines` | `PDFENGINES_READ_METADATA_ENGINES` | `pkg/modules/pdfengines/config.go:ReadMetadataEngines` | QPDF, pdftk | -| `--pdfengines-write-metadata-engines` | `PDFENGINES_WRITE_METADATA_ENGINES` | `pkg/modules/pdfengines/config.go:WriteMetadataEngines` | QPDF, pdftk | -| `--pdfengines-encrypt-engines` | `PDFENGINES_ENCRYPT_ENGINES` | `pkg/modules/pdfengines/config.go:EncryptEngines` | QPDF, pdftk | -| `--pdfengines-decrypt-engines` | `PDFENGINES_DECRYPT_ENGINES` | `pkg/modules/pdfengines/config.go:DecryptEngines` | QPDF, pdftk | -| `--pdfengines-embed-engines` | `PDFENGINES_EMBED_ENGINES` | `pkg/modules/pdfengines/config.go:EmbedEngines` | QPDF | -| `--pdfengines-read-bookmarks-engines` | `PDFENGINES_READ_BOOKMARKS_ENGINES` | `pkg/modules/pdfengines/config.go:ReadBookmarksEngines` | QPDF, pdftk | -| `--pdfengines-write-bookmarks-engines` | `PDFENGINES_WRITE_BOOKMARKS_ENGINES` | `pkg/modules/pdfengines/config.go:WriteBookmarksEngines` | QPDF, pdftk | -| `--pdfengines-watermark-engines` | `PDFENGINES_WATERMARK_ENGINES` | `pkg/modules/pdfengines/config.go:WatermarkEngines` | PDFCPU, pdftk | -| `--pdfengines-stamp-engines` | `PDFENGINES_STAMP_ENGINES` | `pkg/modules/pdfengines/config.go:StampEngines` | PDFCPU, pdftk | -| `--pdfengines-rotate-engines` | `PDFENGINES_ROTATE_ENGINES` | `pkg/modules/pdfengines/config.go:RotateEngines` | QPDF, pdftk | - -## Engine Capabilities Matrix - -| Operation | QPDF | PDFCPU | pdftk | lopdf (Folio native) | -|-----------|------|--------|-------|---------------------| -| Merge | βœ… | βœ… | βœ… | βœ… | -| Split | βœ… | βœ… | ❌ | βœ… | -| Flatten | βœ… | βœ… | βœ… | βœ… | -| PDF/A Convert | βœ… | ❌ | ❌ | Partial | -| Encrypt | βœ… | ❌ | βœ… | βœ… | -| Decrypt | βœ… | ❌ | βœ… | βœ… | -| Read Metadata | βœ… | ❌ | βœ… | βœ… | -| Write Metadata | βœ… | ❌ | βœ… | βœ… | -| Read Bookmarks | βœ… | ❌ | βœ… | βœ… | -| Write Bookmarks | βœ… | ❌ | βœ… | βœ… | -| Watermark | ❌ | βœ… | βœ… | βœ… | -| Stamp | ❌ | βœ… | βœ… | βœ… | -| Rotate | βœ… | ❌ | βœ… | βœ… | -| Embed Files | βœ… | ❌ | ❌ | βœ… | - -## Implementation - -### 1. Enum for Engine Type - -```rust -// crates/engine/src/pdfops/mod.rs - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum PdfEngineType { - Qpdf, - PdfCpu, - PdfTk, - LoPdf, // Folio native -} - -impl PdfEngineType { - pub fn from_str(s: &str) -> Option<Self> { - match s.to_lowercase().as_str() { - "qpdf" => Some(Self::Qpdf), - "pdfcpu" => Some(Self::PdfCpu), - "pdftk" => Some(Self::PdfTk), - "lopdf" => Some(Self::LoPdf), - _ => None, - } - } - - pub fn binary_name(&self) -> &'static str { - match self { - Self::Qpdf => "qpdf", - Self::PdfCpu => "pdfcpu", - Self::PdfTk => "pdftk", - Self::LoPdf => "lopdf (built-in)", - } - } - - pub fn is_available(&self) -> bool { - match self { - Self::LoPdf => true, // Always available - _ => which::which(self.binary_name()).is_ok(), - } - } -} -``` - -### 2. Configuration Struct - -```rust -// crates/server/src/config.rs - -pub struct PdfEnginesConfig { - pub merge_engines: Vec<PdfEngineType>, - pub split_engines: Vec<PdfEngineType>, - pub flatten_engines: Vec<PdfEngineType>, - pub convert_engines: Vec<PdfEngineType>, - pub read_metadata_engines: Vec<PdfEngineType>, - pub write_metadata_engines: Vec<PdfEngineType>, - pub encrypt_engines: Vec<PdfEngineType>, - pub decrypt_engines: Vec<PdfEngineType>, - pub embed_engines: Vec<PdfEngineType>, - pub read_bookmarks_engines: Vec<PdfEngineType>, - pub write_bookmarks_engines: Vec<PdfEngineType>, - pub watermark_engines: Vec<PdfEngineType>, - pub stamp_engines: Vec<PdfEngineType>, - pub rotate_engines: Vec<PdfEngineType>, -} - -impl Default for PdfEnginesConfig { - fn default() -> Self { - Self { - merge_engines: vec![PdfEngineType::Qpdf, PdfEngineType::PdfCpu, PdfEngineType::PdfTk], - split_engines: vec![PdfEngineType::Qpdf, PdfEngineType::PdfCpu], - // ... etc. - } - } -} -``` - -### 3. Engine Selection Logic - -```rust -// crates/engine/src/pdfops/mod.rs - -pub struct PdfOps { - config: PdfEnginesConfig, -} - -impl PdfOps { - /// Select first available engine for operation. - fn select_engine(&self, engines: &[PdfEngineType]) -> Option<PdfEngineType> { - engines.iter() - .find(|e| e.is_available()) - .copied() - } - - pub fn merge(&self, inputs: &[PathBuf]) -> Result<Vec<u8>, EngineError> { - let engine = self.select_engine(&self.config.merge_engines) - .ok_or_else(|| EngineError::Internal("No merge engine available".into()))?; - - match engine { - PdfEngineType::Qpdf => self.merge_qpdf(inputs), - PdfEngineType::PdfCpu => self.merge_pdfcpu(inputs), - PdfEngineType::PdfTk => self.merge_pdftk(inputs), - PdfEngineType::LoPdf => self.merge_lopdf(inputs), - } - } - - fn merge_qpdf(&self, inputs: &[PathBuf]) -> Result<Vec<u8>, EngineError> { - let mut cmd = std::process::Command::new("qpdf"); - cmd.arg("--empty").arg("output.pdf"); - - for input in inputs { - cmd.arg("--pages").arg(input).arg("1-z").arg("--"); - } - - // ... execute command - todo!() - } - - fn merge_pdfcpu(&self, inputs: &[PathBuf]) -> Result<Vec<u8>, EngineError> { - let mut cmd = std::process::Command::new("pdfcpu"); - cmd.arg("import"); - - for input in inputs { - cmd.arg(input); - } - - // ... execute command - todo!() - } -} -``` - -### 4. CLI Flags Parsing - -```rust -// crates/server/src/config.rs - -impl ServerConfig { - fn parse_pdfengines_args(args: &Args) -> PdfEnginesConfig { - let parse_engines = |arg: Option<&str>| { - arg.unwrap_or("") - .split(',') - .filter_map(PdfEngineType::from_str) - .collect::<Vec<_>>() - }; - - PdfEnginesConfig { - merge_engines: parse_engines(args.value_of("pdfengines-merge-engines")), - // ... parse all 14 engine lists - } - } -} -``` - -## References to Gotenberg Source - -| Feature | Gotenberg File | Line Numbers | -|---------|------------------|-------------| -| Engine config struct | `pkg/modules/pdfengines/config.go` | Full file (~100 lines) | -| Engine selection | `pkg/modules/pdfengines/pdfengines.go` | ~L200-300 | -| QPDF wrapper | `pkg/modules/pdfengines/qpdf.go` | Full file | -| PDFCPU wrapper | `pkg/modules/pdfengines/pdfcpu.go` | Full file | -| pdftk wrapper | `pkg/modules/pdfengines/pdftk.go` | Full file | - -To read Gotenberg source: -```bash -cd /Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg -cat pkg/modules/pdfengines/config.go | grep -A3 "MergeEngines" -``` - -## Expected Behavior - -### Engine Priority -1. Try first engine in list -2. If not available (not installed), try next -3. If none available, return error - -### Default Behavior (No Flags) -- Use all available engines in order: qpdf, pdfcpu, pdftk, lopdf - -### Custom Engine Selection -```bash -# Use only QPDF for merge (fast, reliable) ---pdfengines-merge-engines=qpdf - -# Try PDFCPU first, fallback to pdftk ---pdfengines-split-engines=pdfcpu,pdftk -``` - -## Test Plan - -### Unit Tests - -- `engine_type_from_str_parses_correctly` -- `engine_type_is_available_qpdf_installed` -- `select_engine_returns_first_available` -- `select_engine_falls_back_to_next` - -### Integration Tests - -- `merge_uses_qpdf_when_available` -- `merge_falls_back_to_pdfcpu` -- `merge_uses_lopdf_as_last_resort` - -## Acceptance - -- [ ] `PdfEngineType` enum with all 4 types -- [ ] `PdfEnginesConfig` with 14 engine lists -- [ ] CLI flags for all engine selections -- [ ] Engine selection logic with fallback -- [ ] QPDF wrapper for merge/split/encrypt -- [ ] PDFCPU wrapper for merge/split/watermark -- [ ] pdftk wrapper for merge/encrypt/bookmarks -- [ ] Unit tests for engine selection -- [ ] Integration tests with real tools -- [ ] `cargo clippy -p engine -- -D warnings` clean - -## References - -- Gotenberg PDF engines: `/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg/pkg/modules/pdfengines/` -- QPDF documentation: https://qpdf.readthedocs.io/ -- PDFCPU documentation: https://pdfcpu.io/ -- pdftk documentation: https://www.pdftk.com/ diff --git a/docs/specs/39-config-flags.md b/docs/specs/39-config-flags.md deleted file mode 100644 index ece866c..0000000 --- a/docs/specs/39-config-flags.md +++ /dev/null @@ -1,276 +0,0 @@ -# Spec 39 β€” Configuration CLI Flags - -> Comprehensive list of CLI flags and environment variables -> that Gotenberg supports but Folio is missing. These control -> Chromium, LibreOffice, API server, and PDF engine behavior. - -## Goal - -Implement all missing CLI flags and environment variables to achieve -full configuration parity with Gotenberg. - -## Scope - -**In:** - -All missing CLI flags from Gotenberg: - -### Chromium Options (16 flags) - -| Flag | Env Variable | Gotenberg Source | Default | Description | -|------|-------------|------------------|---------|-------------| -| `--chromium-restart-after` | `CHROMIUM_RESTART_AFTER` | `pkg/modules/chromium/config.go:RestartAfter` | 0 (never) | Restart after N conversions | -| `--chromium-max-queue-size` | `CHROMIUM_MAX_QUEUE_SIZE` | `pkg/modules/chromium/config.go:MaxQueueSize` | 0 (unlimited) | Max queue size | -| `--chromium-max-concurrency` | `CHROMIUM_MAX_CONCURRENCY` | `pkg/modules/chromium/config.go:MaxConcurrency` | NumCPUs | Max concurrent renders | -| `--chromium-auto-start` | `CHROMIUM_AUTO_START` | `pkg/modules/chromium/config.go:AutoStart` | true | Auto-start Chromium | -| `--chromium-start-timeout` | `CHROMIUM_START_TIMEOUT` | `pkg/modules/chromium/config.go:StartTimeout` | 20s | Start timeout | -| `--chromium-allow-list` | `CHROMIUM_ALLOW_LIST` | `pkg/modules/chromium/config.go:AllowList` | (none) | Allowed URL patterns (regex) | -| `--chromium-deny-list` | `CHROMIUM_DENY_LIST` | `pkg/modules/chromium/config.go:DenyList` | (none) | Denied URL patterns (regex) | -| `--chromium-clear-cache` | `CHROMIUM_CLEAR_CACHE` | `pkg/modules/chromium/config.go:ClearCache` | false | Clear cache on restart | -| `--chromium-clear-cookies` | `CHROMIUM_CLEAR_COOKIES` | `pkg/modules/chromium/config.go:ClearCookies` | false | Clear cookies on restart | -| `--chromium-disable-javascript` | `CHROMIUM_DISABLE_JAVASCRIPT` | `pkg/modules/chromium/config.go:DisableJavascript` | false | Disable JavaScript | -| `--chromium-allow-insecure-localhost` | `CHROMIUM_ALLOW_INSECURE_LOCALHOST` | `pkg/modules/chromium/config.go:AllowInsecureLocalhost` | false | Allow insecure localhost | -| `--chromium-ignore-certificate-errors` | `CHROMIUM_IGNORE_CERTIFICATE_ERRORS` | `pkg/modules/chromium/config.go:IgnoreCertificateErrors` | false | Ignore cert errors | -| `--chromium-disable-web-security` | `CHROMIUM_DISABLE_WEB_SECURITY` | `pkg/modules/chromium/config.go:DisableWebSecurity` | false | Disable web security | -| `--chromium-allow-file-access-from-files` | `CHROMIUM_ALLOW_FILE_ACCESS_FROM_FILES` | `pkg/modules/chromium/config.go:AllowFileAccessFromFile` | false | Allow file access | -| `--chromium-host-resolver-rules` | `CHROMIUM_HOST_RESOLVER_RULES` | `pkg/modules/chromium/config.go:HostResolverRules` | (none) | Custom DNS rules | -| `--chromium-proxy-server` | `CHROMIUM_PROXY_SERVER` | `pkg/modules/chromium/config.go:ProxyServer` | (none) | Proxy server | -| `--chromium-idle-shutdown-timeout` | `CHROMIUM_IDLE_SHUTDOWN_TIMEOUT` | `pkg/modules/chromium/config.go:IdleShutdownTimeout` | 0 (disabled) | Idle shutdown timeout | - -### LibreOffice Options (6 flags) - -| Flag | Env Variable | Gotenberg Source | Default | Description | -|------|-------------|------------------|---------|-------------| -| `--libreoffice-restart-after` | `LIBREOFFICE_RESTART_AFTER` | `pkg/modules/libreoffice/config.go:RestartAfter` | 0 (never) | Restart after N conversions | -| `--libreoffice-max-queue-size` | `LIBREOFFICE_MAX_QUEUE_SIZE` | `pkg/modules/libreoffice/config.go:MaxQueueSize` | 0 (unlimited) | Max queue size | -| `--libreoffice-auto-start` | `LIBREOFFICE_AUTO_START` | `pkg/modules/libreoffice/config.go:AutoStart` | true | Auto-start LibreOffice | -| `--libreoffice-start-timeout` | `LIBREOFFICE_START_TIMEOUT` | `pkg/modules/libreoffice/config.go:StartTimeout` | 20s | Start timeout | -| `--libreoffice-disable-routes` | `LIBREOFFICE_DISABLE_ROUTES` | `pkg/modules/libreoffice/config.go:DisableRoutes` | false | Disable LibreOffice routes | -| `--libreoffice-idle-shutdown-timeout` | `LIBREOFFICE_IDLE_SHUTDOWN_TIMEOUT` | `pkg/modules/libreoffice/config.go:IdleShutdownTimeout` | 0 (disabled) | Idle shutdown timeout | - -### API Server Options (9 flags) - -| Flag | Env Variable | Gotenberg Source | Default | Description | -|------|-------------|------------------|---------|-------------| -| `--api-disable-health-route-telemetry` | `API_DISABLE_HEALTH_ROUTE_TELEMETRY` | `pkg/modules/api/config.go:DisableHealthRouteTelemetry` | false | Disable health telemetry | -| `--api-disable-root-route-telemetry` | `API_DISABLE_ROOT_ROUTE_TELEMETRY` | `pkg/modules/api/config.go:DisableRootRouteTelemetry` | false | Disable root telemetry | -| `--api-disable-debug-route-telemetry` | `API_DISABLE_DEBUG_ROUTE_TELEMETRY` | `pkg/modules/api/config.go:DisableDebugRouteTelemetry` | false | Disable debug telemetry | -| `--api-disable-version-route-telemetry` | `API_DISABLE_VERSION_ROUTE_TELEMETRY` | `pkg/modules/api/config.go:DisableVersionRouteTelemetry` | false | Disable version telemetry | -| `--api-enable-debug-route` | `API_ENABLE_DEBUG_ROUTE` | `pkg/modules/api/config.go:EnableDebugRoute` | false | Enable debug route | -| Basic auth username | `API_BASIC_AUTH_USERNAME` | `pkg/modules/api/config.go:BasicAuthUsername` | (none) | HTTP basic auth username | -| Basic auth password | `API_BASIC_AUTH_PASSWORD` | `pkg/modules/api/config.go:BasicAuthPassword` | (none) | HTTP basic auth password | -| TLS cert file | `API_TLS_CERT_FILE` | `pkg/modules/api/config.go:TlsCertFile` | (none) | TLS certificate file | -| TLS key file | `API_TLS_KEY_FILE` | `pkg/modules/api/config.go:TlsKeyFile` | (none) | TLS key file | - -### PDF Engines Options (14 flags) - -Already documented in spec-38, but need CLI flags: - -| Flag | Env Variable | -|------|-------------| -| `--pdfengines-disable-routes` | `PDFENGINES_DISABLE_ROUTES` | -| `--pdfengines-merge-engines` | `PDFENGINES_MERGE_ENGINES` | -| `--pdfengines-split-engines` | `PDFENGINES_SPLIT_ENGINES` | -| (14 total, see spec-38) | - -## Implementation - -### 1. Extend `BrowserConfig` in `crates/engine/src/chromium/mod.rs` - -```rust -pub struct BrowserConfig { - // ... existing fields ... - - // Supervision - pub restart_after: u32, // --chromium-restart-after - pub max_queue_size: usize, // --chromium-max-queue-size - pub max_concurrency: usize, // --chromium-max-concurrency - - // Lifecycle - pub auto_start: bool, // --chromium-auto-start - pub start_timeout: Duration, // --chromium-start-timeout - - // Security - pub allow_list: Vec<String>, // --chromium-allow-list (regex) - pub deny_list: Vec<String>, // --chromium-deny-list (regex) - pub clear_cache: bool, // --chromium-clear-cache - pub clear_cookies: bool, // --chromium-clear-cookies - pub disable_javascript: bool, // --chromium-disable-javascript - pub allow_insecure_localhost: bool, // --chromium-allow-insecure-localhost - pub ignore_certificate_errors: bool, // --chromium-ignore-certificate-errors - pub disable_web_security: bool, // --chromium-disable-web-security - pub allow_file_access_from_files: bool, // --chromium-allow-file-access-from-files - - // Network - pub host_resolver_rules: Option<String>, // --chromium-host-resolver-rules - pub proxy_server: Option<String>, // --chromium-proxy-server - - // Idle - pub idle_shutdown_timeout: Option<Duration>, // --chromium-idle-shutdown-timeout -} -``` - -### 2. Extend `LibreOfficeConfig` in `crates/engine/src/libreoffice/mod.rs` - -```rust -pub struct LibreOfficeConfig { - // ... existing fields ... - - // Supervision - pub restart_after: u32, - pub max_queue_size: usize, - - // Lifecycle - pub auto_start: bool, - pub start_timeout: Duration, - - // Routes - pub disable_routes: bool, - - // Idle - pub idle_shutdown_timeout: Option<Duration>, -} -``` - -### 3. Extend `ServerConfig` in `crates/server/src/config.rs` - -```rust -pub struct ServerConfig { - // ... existing fields ... - - // API telemetry - pub disable_health_route_telemetry: bool, - pub disable_root_route_telemetry: bool, - pub disable_debug_route_telemetry: bool, - pub disable_version_route_telemetry: bool, - pub enable_debug_route: bool, - - // Basic auth - pub basic_auth_username: Option<String>, - pub basic_auth_password: Option<String>, - - // TLS - pub tls_cert_file: Option<PathBuf>, - pub tls_key_file: Option<PathBuf>, - - // PDF engines config - pub pdfengines: PdfEnginesConfig, // from spec-38 -} -``` - -### 4. CLI Flag Definitions - -```rust -// crates/server/src/config.rs - -pub fn clap_app() -> Command { - Command::new("folio-server") - // ... existing flags ... - - // Chromium flags - .arg(Arg::new("chromium-restart-after") - .long("chromium-restart-after") - .env("CHROMIUM_RESTART_AFTER") - .default_value("0")) - .arg(Arg::new("chromium-max-queue-size") - .long("chromium-max-queue-size") - .env("CHROMIUM_MAX_QUEUE_SIZE") - .default_value("0")) - // ... all 16 chromium flags - - // LibreOffice flags - .arg(Arg::new("libreoffice-restart-after") - .long("libreoffice-restart-after") - .env("LIBREOFFICE_RESTART_AFTER") - .default_value("0")) - // ... all 6 libreoffice flags - - // API flags - .arg(Arg::new("api-disable-health-route-telemetry") - .long("api-disable-health-route-telemetry") - .env("API_DISABLE_HEALTH_ROUTE_TELEMETRY") - .action(clap::ArgAction::SetTrue)) - // ... all 9 API flags -} -``` - -## References to Gotenberg Source - -| Feature | Gotenberg File | Line Numbers | -|---------|------------------|-------------| -| Chromium config | `pkg/modules/chromium/config.go` | Full file (~150 lines) | -| LibreOffice config | `pkg/modules/libreoffice/config.go` | Full file (~80 lines) | -| API config | `pkg/modules/api/config.go` | Full file (~120 lines) | -| PDF engines config | `pkg/modules/pdfengines/config.go` | Full file (~100 lines) | - -To read Gotenberg source: -```bash -cd /Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg -cat pkg/modules/chromium/config.go | grep -A2 "RestartAfter" -``` - -## Expected Behavior - -### Flag Priority -1. CLI flag (highest priority) -2. Environment variable -3. Default value (lowest priority) - -### URL Allow/Deny Lists -```bash -# Only allow example.com and subdomains ---chromium-allow-list="^https://.*\.example\.com" - -# Deny tracking domains ---chromium-deny-list="^https://.*\.google-analytics\.com" -``` - -### Idle Shutdown -```bash -# Shutdown Chromium after 10 minutes idle ---chromium-idle-shutdown-timeout=10m - -# Disable idle shutdown ---chromium-idle-shutdown-timeout=0 -``` - -### Basic Auth -```bash -# Enable HTTP basic auth ---api-basic-auth-username=admin --api-basic-auth-password=secret -``` - -## Test Plan - -### Unit Tests - -- `chromium_restart_after_parses_correctly` -- `url_allow_list_regex_matches` -- `url_deny_list_blocks_tracking` -- `basic_auth_credentials_parsed` - -### Integration Tests - -- `idle_shutdown_stops_chromium` -- `url_allow_list_blocks_denied` -- `basic_auth_rejects_unauthorized` - -## Acceptance - -- [ ] `BrowserConfig` extended with all 16 Chromium flags -- [ ] `LibreOfficeConfig` extended with all 6 LibreOffice flags -- [ ] `ServerConfig` extended with all 9 API flags -- [ ] CLI flag parsing with env var fallback -- [ ] Flag priority: CLI > env > default -- [ ] URL allow/deny list regex matching -- [ ] Basic auth middleware -- [ ] TLS support in Axum -- [ ] Unit tests for all flag parsers -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References - -- Gotenberg config files: `/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg/pkg/modules/*/config.go` -- clap crate: https://docs.rs/clap/ -- Axum TLS: https://docs.rs/axum/latest/axum/#tls -- HTTP basic auth: https://docs.rs/axum/latest/axum/middleware/#basic-auth diff --git a/docs/specs/40-bindings-py.md b/docs/specs/40-bindings-py.md deleted file mode 100644 index 0308d22..0000000 --- a/docs/specs/40-bindings-py.md +++ /dev/null @@ -1,425 +0,0 @@ -# Spec 40 β€” Python bindings (`py` crate) - -> Self-contained PyO3 wrapper exposing `import folio` to Python users. -> No external HTTP service required at runtime. - -## Goal - -Allow Python users to convert HTML / URL / Markdown to PDF in-process via -the same `engine` crate the server uses, matching the README example in -`@/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/README.md:99-114`. - -## Scope - -**In:** - -- `ChromiumEngine` Python class with `html_to_pdf`, `url_to_pdf`, - `markdown_to_pdf`, `shutdown`, `healthy`. -- Exception hierarchy mapping each `EngineError` variant. -- `PdfOptions`, `RequestContext`, `BrowserConfig` exposed as Python - `@dataclass`-style classes (constructed positionally or via kwargs). -- Type stubs (`folio.pyi`) shipped with the wheel. -- Wheels built by CI for cp3.9..cp3.13 on linux-x64/aarch64, - macos-x64/arm64, win-x64. - -**Out:** - -- LibreOffice and pdfops surfaces β€” Python users for those use the HTTP - server today. Follow-up spec. -- Async Python (`async def`) β€” Python remains synchronous; we - `block_on` internally. Async support is a follow-up. -- Streaming PDF output (chunked writes) β€” return a single `bytes` for - MVP. - -## Public API - -### Python surface (excerpt of `folio.pyi`) - -```python -from typing import Any, Optional, Mapping, Sequence - -class FolioError(Exception): - """Base class for all engine errors raised by folio.""" - code: str # e.g. "INVALID_OPTION", "TIMEOUT", "NAVIGATION", ... - -class InvalidOptionError(FolioError): ... -class InvalidPageRangeError(FolioError): ... -class ChromeNotFoundError(FolioError): ... -class ChromeLaunchError(FolioError): ... -class CdpError(FolioError): ... -class NavigationError(FolioError): - url: str - reason: str -class TimeoutError(FolioError): ... -class IoError(FolioError): ... -class InternalError(FolioError): ... - -class PaperSize: - A4: "PaperSize" - LETTER: "PaperSize" - LEGAL: "PaperSize" - A3: "PaperSize" - A5: "PaperSize" - def __init__(self, width_in: float, height_in: float) -> None: ... - width_in: float - height_in: float - -class Margins: - ZERO: "Margins" - DEFAULT: "Margins" - @staticmethod - def uniform(inches: float) -> "Margins": ... - def __init__(self, top: float, right: float, bottom: float, left: float) -> None: ... - top: float - right: float - bottom: float - left: float - -class WaitCondition: - @staticmethod - def load() -> "WaitCondition": ... - @staticmethod - def dom_content_loaded() -> "WaitCondition": ... - @staticmethod - def network_idle() -> "WaitCondition": ... - @staticmethod - def selector(css: str) -> "WaitCondition": ... - @staticmethod - def expression(js: str) -> "WaitCondition": ... - @staticmethod - def delay(seconds: float) -> "WaitCondition": ... - -class PdfOptions: - def __init__( - self, *, - paper: PaperSize = ..., - margin: Margins = ..., - landscape: bool = False, - scale: float = 1.0, - print_background: bool = True, - prefer_css_page_size: bool = False, - emulate_media: str = "print", # "print" | "screen" - page_ranges: Optional[str] = None, - header_template: Optional[str] = None, - footer_template: Optional[str] = None, - wait: WaitCondition = ..., - ) -> None: ... - -class Cookie: - def __init__( - self, name: str, value: str, *, - domain: Optional[str] = None, - path: Optional[str] = None, - secure: bool = False, - http_only: bool = False, - ) -> None: ... - -class RequestContext: - def __init__( - self, *, - user_agent: Optional[str] = None, - extra_headers: Optional[Mapping[str, str]] = None, - cookies: Optional[Sequence[Cookie]] = None, - fail_on_status: Optional[Sequence[int]] = None, - ) -> None: ... - -class BrowserConfig: - def __init__( - self, *, - executable: Optional[str] = None, - headless: bool = True, - extra_args: Sequence[str] = (), - no_sandbox: Optional[bool] = None, # None = platform default - timeout_secs: float = 60.0, - ) -> None: ... - -class ChromiumEngine: - def __init__(self, config: Optional[BrowserConfig] = None) -> None: ... - - def html_to_pdf( - self, html: str, *, - base_url: Optional[str] = None, - options: Optional[PdfOptions] = None, - request: Optional[RequestContext] = None, - ) -> bytes: ... - - def url_to_pdf( - self, url: str, *, - options: Optional[PdfOptions] = None, - request: Optional[RequestContext] = None, - ) -> bytes: ... - - def markdown_to_pdf( - self, markdown: str, *, - options: Optional[PdfOptions] = None, - request: Optional[RequestContext] = None, - ) -> bytes: ... - - def healthy(self) -> bool: ... - - def shutdown(self) -> None: ... - - # Context manager support (calls shutdown on exit): - def __enter__(self) -> "ChromiumEngine": ... - def __exit__(self, *exc_info: Any) -> None: ... - -__version__: str -``` - -### Rust surface (`crates/py/src/lib.rs`) - -```rust -use pyo3::prelude::*; - -#[pymodule] -fn folio(py: Python<'_>, m: &PyModule) -> PyResult<()> { - m.add("__version__", env!("CARGO_PKG_VERSION"))?; - m.add_class::<py_types::PaperSize>()?; - m.add_class::<py_types::Margins>()?; - m.add_class::<py_types::WaitCondition>()?; - m.add_class::<py_types::PdfOptions>()?; - m.add_class::<py_types::Cookie>()?; - m.add_class::<py_types::RequestContext>()?; - m.add_class::<py_types::BrowserConfig>()?; - m.add_class::<py_engine::ChromiumEngine>()?; - py_errors::register(py, m)?; - Ok(()) -} -``` - -Internal modules: - -- `py_types` β€” `#[pyclass]` wrappers around the engine's value types. -- `py_engine::ChromiumEngine` β€” wraps `Arc<engine::ChromiumEngine>` and a - shared `tokio::runtime::Runtime`. -- `py_errors` β€” defines and registers the exception hierarchy. - -## Behavior - -### Runtime ownership - -A single multi-thread tokio runtime is built lazily on first use and -reused across all engines in the process: - -```rust -static RUNTIME: OnceLock<tokio::runtime::Runtime> = OnceLock::new(); -fn rt() -> &'static tokio::runtime::Runtime { - RUNTIME.get_or_init(|| { - tokio::runtime::Builder::new_multi_thread() - .enable_all() - .thread_name("folio-py") - .build() - .expect("tokio runtime build") - }) -} -``` - -Rationale: PyO3 modules are loaded once per process, so a `OnceLock` is -the standard idiom; multiple `ChromiumEngine` instances all share the -runtime. - -### `ChromiumEngine.__init__` - -1. Resolve config: `config or BrowserConfig()`. -2. Convert to `engine::types::BrowserConfig`. -3. `rt().block_on(engine::ChromiumEngine::launch_with(cfg))`. -4. Store `Arc<engine::ChromiumEngine>` inside the `#[pyclass]`. - -### `html_to_pdf` / `url_to_pdf` / `markdown_to_pdf` - -```rust -fn html_to_pdf( - &self, - py: Python<'_>, - html: &str, - base_url: Option<&str>, - options: Option<&PdfOptions>, - request: Option<&RequestContext>, -) -> PyResult<Py<PyBytes>> { - let opts = options.map(|o| o.to_native()).unwrap_or_default(); - let req = request.map(|r| r.to_native()).unwrap_or_default(); - let engine = self.inner.clone(); - let html_owned = html.to_owned(); - let base = base_url.map(str::to_owned); - - py.allow_threads(|| { - rt().block_on(async move { - engine.html_to_pdf(&html_owned, base.as_deref(), &opts, &req).await - }) - }) - .map_err(into_py_err) - .map(|bytes| PyBytes::new(py, &bytes).into()) -} -``` - -Critical points: - -- `Python::allow_threads` releases the GIL during the async work. -- All inputs cloned into owned `String`s so the closure is `Send`. -- Output `Vec<u8>` re-acquires the GIL and is wrapped in `PyBytes` - (`PyBytes::new` copies; that's acceptable in MVP). - -### `markdown_to_pdf` - -Same pattern as `html_to_pdf` but no `base_url` parameter. - -### `healthy()` - -`rt().block_on(self.inner.healthy())`. Holds the GIL across the call β€” -acceptable since `healthy` is bounded by `BrowserConfig::timeout`. - -### `shutdown()` and context manager - -- `shutdown` is idempotent. After the first successful call, subsequent - calls raise nothing. -- `__exit__` calls `shutdown` and never re-raises engine errors when - another exception is already in flight (logs at `warn` instead). - -### Error mapping - -All `EngineError`s convert to a corresponding Python exception. Each -exception class: - -- Inherits from `FolioError`. -- Carries a string `code` attribute equal to the variant name (e.g. - `"INVALID_OPTION"`). -- Preserves source-chain text in `__cause__` via - `PyErr::set_cause` when the engine error has a `source()`. - -Mapping table: - -| `EngineError` | Python class | Extra attributes | -|------------------------------|---------------------------|-------------------| -| `InvalidOption` | `InvalidOptionError` | β€” | -| `InvalidPageRange` | `InvalidPageRangeError` | β€” | -| `ChromeNotFound { searched }`| `ChromeNotFoundError` | `searched: list[str]` | -| `ChromeLaunch(msg)` | `ChromeLaunchError` | β€” | -| `Cdp(msg)` | `CdpError` | β€” | -| `Navigation { url, reason }` | `NavigationError` | `url`, `reason` | -| `Timeout(d)` | `TimeoutError` | `seconds: float` | -| `Io(_)` | `IoError` | β€” | -| `Internal(msg)` | `InternalError` | β€” | - -Note: `folio.TimeoutError` shadows Python's builtin name *only* inside -the `folio` module's namespace; users who do `from folio import -TimeoutError` accept that. The class is importable as -`folio.TimeoutError`. - -### Python type conversion - -| Engine Rust type | Python wrapper | Conversion | -|------------------------|---------------------------------|-----------------------| -| `PaperSize` | `PaperSize` `#[pyclass(frozen)]`| `to_native` cheap copy | -| `Margins` | `Margins` | same | -| `WaitCondition` | tagged enum mirrored in Python | factory functions | -| `MediaType` | string ("print"/"screen") | parsed in `PdfOptions::__init__` | -| `PageRanges` | `Optional[str]` | parsed via spec 10's `PageRanges::parse` and re-stringified | -| `Cookie` | `Cookie` | direct field copy | -| `RequestContext` | `RequestContext` | dict-like | -| `BrowserConfig` | `BrowserConfig` | direct | - -Wrapper types implement `__repr__` returning a stable form like -`PaperSize(width_in=8.27, height_in=11.69)` and `__eq__` based on -field equality. They are NOT mutable from Python (`#[pyclass(frozen)]`). - -### Threading - -- Python instances are safe to share across threads (the wrapped - `Arc<ChromiumEngine>` is `Sync`). -- The wrapper class is annotated with `#[pyclass(unsendable = false)]` - and asserted via `static_assertions::assert_impl_all!`. - -### Cleanup - -- `__del__` is **not** implemented (avoids the GIL/destructor pitfall). -- `__exit__` covers the deterministic-cleanup path. -- If a `ChromiumEngine` is dropped without `shutdown`, the underlying - Chrome process exits when the last `Arc` clone drops (chromiumoxide - semantics). A `tracing::warn!` records this. - -## Errors - -Every public Python method only raises subclasses of `FolioError`, -`TypeError` (for misused kwargs caught by PyO3 type extraction), or -`ValueError` (for `PaperSize.__init__` etc. failures translated from -`EngineError::InvalidOption`). - -## Edge cases - -| Scenario | Required behavior | -|--------------------------------------------------------------|--------------------------------------------------------------------| -| `ChromiumEngine()` while no Chrome is on PATH | Raises `ChromeNotFoundError(searched=[...])`. | -| `html_to_pdf("")` with default options | Returns valid PDF bytes (delegates to engine). | -| Calling `html_to_pdf` after `shutdown()` | Raises `InternalError` with the documented engine message. | -| Multiple Python threads calling concurrently | Allowed; GIL released during each call; engine handles concurrency.| -| `with ChromiumEngine(...) as e: raise RuntimeError` | `__exit__` runs shutdown but does not mask the user exception. | -| Garbage collection while a render is in flight | The wrapper holds an `Arc` so the engine is alive until the future resolves. | -| `PdfOptions(emulate_media="invalid")` | `ValueError("emulate_media must be 'print' or 'screen'")`. | -| `Cookie(name="", value="x")` | `ValueError("cookie name must not be empty")`. | -| Passing a dict where a wrapper class is expected | Allowed in MVP only for `RequestContext.extra_headers`. Other params require typed instances. | - -## Test plan - -### Rust unit tests (`crates/py/src/...`) - -- `paper_size_constants_match_engine`. -- `wait_condition_factory_round_trip`. -- `request_context_extra_headers_dict_to_native`. -- `error_conversion_table` β€” for each `EngineError` variant, build a - `PyErr` and assert its class name and `code` attribute. - -### Python integration tests (`crates/py/tests/test_folio.py`) - -Run via `pytest` against the built wheel (or `maturin develop`). - -Without Chrome (skipped if absent): - -- `test_module_has_version`. -- `test_paper_size_constants`. -- `test_pdf_options_kwargs_round_trip`. -- `test_invalid_emulate_media_raises_valueerror`. -- `test_chromium_engine_constructs_and_reports_chrome_not_found_when_path_unset` - (sets a bogus `LIBREOFFICE_PATH` is irrelevant; uses a bogus - `BrowserConfig(executable="/no/such")`). - -With Chrome (`pytest.mark.skipif(not has_chrome())`): - -- `test_html_to_pdf_returns_pdf_bytes` β€” bytes start with `b"%PDF-"`. -- `test_url_to_pdf_against_local_http_server`. -- `test_markdown_to_pdf_renders_table`. -- `test_concurrent_calls_from_threads`. -- `test_context_manager_shuts_down_on_exit`. -- `test_shutdown_is_idempotent`. -- `test_navigation_error_carries_url_and_reason`. -- `test_timeout_error_raised_when_selector_never_appears`. - -### Stub validation - -- `mypy --strict crates/py/python/folio/__init__.pyi` runs as part of CI. -- `pyright` smoke check against the same stubs. - -## Acceptance - -- [ ] `crates/py/Cargo.toml` declares `[lib] crate-type = ["cdylib"]`, - `name = "folio"`, depends on `pyo3` and `engine` (workspace). -- [ ] `crates/py/pyproject.toml` configures `maturin` builds with the - target Python ABIs and platform list. -- [ ] `crates/py/python/folio/__init__.pyi` shipped in the wheel, - exact signatures matching *Public API*. -- [ ] All listed Rust unit tests pass with `cargo test -p py`. -- [ ] All Python tests pass with `maturin develop` + `pytest`. -- [ ] `mypy --strict` passes against the stub. -- [ ] `cargo clippy -p py -- -D warnings` clean. -- [ ] No `unsafe` outside what PyO3 macros generate. -- [ ] `__version__` matches the workspace package version. -- [ ] Wheel size < 30 MiB on linux-x64 (sanity). - -## Out of scope / follow-ups - -- LibreOffice + pdfops Python surfaces β€” separate spec. -- Async Python API (`async def html_to_pdf`) β€” likely a `pyo3-async` - follow-up; non-trivial because of the GIL/runtime dance. -- Streaming output via a Python file-like protocol. -- Type protocol exports for non-engine types (e.g. `Sequence[Cookie]` - Protocols). -- Deeper structural typing (`TypedDict` for headers) once API stabilises. diff --git a/docs/specs/40-special-features.md b/docs/specs/40-special-features.md deleted file mode 100644 index 6a51532..0000000 --- a/docs/specs/40-special-features.md +++ /dev/null @@ -1,408 +0,0 @@ -# Spec 40 β€” Special Features - -> Advanced features that Gotenberg supports but Folio is missing: -> downloading files from remote URLs, Basic Authentication, TLS, -> Cloud Run/Lambda support, and URL allow/deny lists. - -## Goal - -Implement special features that enable Folio to be deployed -in production environments with security, cloud integration, -and remote file access capabilities. - -## Scope - -**In:** - -### 1. Download from Remote URLs - -- Download files from HTTP/HTTPS URLs for conversion -- Support S3, GCS, Azure Blob URLs -- Timeout and retry logic -- Size limit for downloads - -### 2. Basic Authentication - -- HTTP basic auth for API endpoints -- Configurable username/password -- Exempt health/version endpoints - -### 3. TLS Support - -- HTTPS listener with cert/key -- Auto-redirect HTTP to HTTPS -- TLS version configuration - -### 4. Cloud Deployment - -- Cloud Run (GCP) configuration -- AWS Lambda handler -- Health check endpoints for load balancers - -### 5. URL Allow/Deny Lists (Security) - -- Regex-based URL filtering -- Separate allow and deny lists -- Deny list takes precedence - -**Out:** - -- OAuth2/OpenID Connect (complex, separate feature) -- mTLS client certificates (nice to have) -- Rate limiting (separate feature) - -## 1. Download from Remote URLs - -### Gotenberg Implementation - -| Field | Gotenberg Source | Description | -|-------|------------------|-------------| -| Download from URL | `pkg/modules/chromium/chromium.go:~L500-600` | Uses `download.FromURL()` | - -### Implementation - -#### New Endpoint: `POST /forms/chromium/convert/url` (extend existing) - -Already accepts `url` field. Need to: -1. Download URL content to temp file -2. Convert downloaded file - -#### New Feature: Download Files from URLs in Multipart - -```rust -// crates/server/src/routes/chromium.rs - -use reqwest::Client; - -async fn download_url(url: &str, max_size: u64) -> Result<Vec<u8>, EngineError> { - let client = Client::new(); - let response = client.get(url) - .send() - .await - .map_err(|e| EngineError::Navigation { - url: url.into(), - reason: format!("Download failed: {}", e), - })?; - - // Check content length - if let Some(len) = response.content_length() { - if len > max_size { - return Err(EngineError::InvalidOption( - format!("File too large: {} bytes", len) - )); - } - } - - let bytes = response.bytes() - .await - .map_err(|e| EngineError::Navigation { - url: url.into(), - reason: format!("Download failed: {}", e), - })?; - - Ok(bytes.to_vec()) -} -``` - -#### Form Field: `downloadFiles` - -| Field | Type | Description | -|-------|------|-------------| -| `downloadFiles` | JSON array | URLs to download and include in conversion | - -Example: -```json -[ - "https://example.com/image.png", - "https://s3.amazonaws.com/bucket/document.pdf" -] -``` - -## 2. Basic Authentication - -### Gotenberg Implementation - -| Flag | Gotenberg Source | Description | -|------|------------------|-------------| -| `--api-basic-auth-username` | `pkg/modules/api/config.go:BasicAuthUsername` | Username | -| `--api-basic-auth-password` | `pkg/modules/api/config.go:BasicAuthPassword` | Password | - -### Implementation - -#### Middleware for Axum - -```rust -// crates/server/src/auth.rs - -use axum::middleware::Next; -use axum::http::{Request, StatusCode}; -use base64::{engine::general_purpose, Engine as _}; - -pub async fn basic_auth_middleware( - request: Request, - next: Next, - username: Option<String>, - password: Option<String>, -) -> Result<(), StatusCode> { - // Skip auth for health/version endpoints - if request.uri().path() == "/health" || request.uri().path() == "/version" { - return Ok(()); - } - - let Some(auth_header) = request.headers().get("Authorization") else { - return Err(StatusCode::UNAUTHORIZED); - }; - - let Some(auth_str) = auth_header.to_str().ok() else { - return Err(StatusCode::UNAUTHORIZED); - }; - - if !auth_str.starts_with("Basic ") { - return Err(StatusCode::UNAUTHORIZED); - } - - let encoded = &auth_str[6..]; - let Ok(decoded) = general_purpose::STANDARD.decode(encoded) else { - return Err(StatusCode::UNAUTHORIZED); - }; - - let Ok(credentials) = String::from_utf8(decoded) else { - return Err(StatusCode::UNAUTHORIZED); - }; - - let Some((user, pass)) = credentials.split_once(':') else { - return Err(StatusCode::UNAUTHORIZED); - }; - - if Some(user.to_string()) == username && Some(pass.to_string()) == password { - Ok(()) - } else { - Err(StatusCode::UNAUTHORIZED) - } -} -``` - -## 3. TLS Support - -### Gotenberg Implementation - -| Flag | Gotenberg Source | Description | -|------|------------------|-------------| -| `--api-tls-cert-file` | `pkg/modules/api/config.go:TlsCertFile` | TLS certificate | -| `--api-tls-key-file` | `pkg/modules/api/config.go:TlsKeyFile` | TLS private key | - -### Implementation - -#### TLS in Axum with `tokio-rustls` - -```rust -// crates/server/src/tls.rs - -use tokio_rustls::TlsAcceptor; -use rustls::{Certificate, PrivateKey, ServerConfig}; -use std::fs::File; -use std::io::Read; - -pub fn load_tls_config(cert_path: &Path, key_path: &Path) -> Result<ServerConfig, Box<dyn std::error::Error>> { - // Load certificate - let mut cert_file = File::open(cert_path)?; - let mut cert_buf = Vec::new(); - cert_file.read_to_end(&mut cert_buf)?; - let cert = Certificate(cert_buf); - - // Load private key - let mut key_file = File::open(key_path)?; - let mut key_buf = Vec::new(); - key_file.read_to_end(&mut key_buf)?; - let key = PrivateKey(key_buf); - - let config = ServerConfig::builder() - .with_safe_defaults() - .with_no_client_auth() - .with_single_cert(vec![cert], key)?; - - Ok(config) -} -``` - -#### Server Startup with TLS - -```rust -// crates/server/src/main.rs - -if let (Some(cert), Some(key)) = (&config.tls_cert_file, &config.tls_key_file) { - // TLS mode - let tls_config = load_tls_config(cert, key)?; - // Bind with TLS -} else { - // Plain HTTP mode (existing) -} -``` - -## 4. Cloud Deployment - -### Cloud Run (GCP) - -#### Gotenberg Reference - -Gotenberg has pre-built Docker images for Cloud Run: -- `gcr.io/gotenberg/gotenberg:latest` -- Health check endpoint: `/health` - -#### Folio Implementation - -```dockerfile -# Dockerfile.cloudrun -FROM rust:1.75 as builder -WORKDIR /app -COPY . . -RUN cargo build --release -p server - -FROM debian:bullseye -COPY --from=builder /app/target/release/folio-server /usr/local/bin/ -RUN apt-get update && apt-get install -y chromium libreoffice -EXPOSE 8080 -CMD ["folio-server", "--port", "8080"] -``` - -Environment variables for Cloud Run: -- `PORT=8080` (Cloud Run sets this automatically) - -### AWS Lambda - -#### Gotenberg Reference - -Gotenberg has Lambda runtime support via `github.com/aws/aws-lambda-go`. - -#### Folio Implementation (Future) - -Use `lambda_runtime` crate for Rust Lambda support. - -## 5. URL Allow/Deny Lists - -### Gotenberg Implementation - -| Flag | Gotenberg Source | Description | -|------|------------------|-------------| -| `--chromium-allow-list` | `pkg/modules/chromium/config.go:AllowList` | Allowed URL patterns | -| `--chromium-deny-list` | `pkg/modules/chromium/config.go:DenyList` | Denied URL patterns | - -### Implementation - -#### URL Validation - -```rust -// crates/server/src/url_filter.rs - -use regex::Regex; - -pub struct UrlFilter { - allow_list: Vec<Regex>, - deny_list: Vec<Regex>, -} - -impl UrlFilter { - pub fn new(allow: &[String], deny: &[String]) -> Result<Self, regex::Error> { - let allow_list = allow.iter() - .map(|p| Regex::new(p)) - .collect::<Result<Vec<_>, _>>()?; - - let deny_list = deny.iter() - .map(|p| Regex::new(p)) - .collect::<Result<Vec<_>, _>>()?; - - Ok(Self { allow_list, deny_list }) - } - - pub fn is_allowed(&self, url: &str) -> bool { - // Check deny list first (takes precedence) - if self.deny_list.iter().any(|re| re.is_match(url)) { - return false; - } - - // If allow list is empty, allow all (that aren't denied) - if self.allow_list.is_empty() { - return true; - } - - // Otherwise, must be in allow list - self.allow_list.iter().any(|re| re.is_match(url)) - } -} -``` - -## References to Gotenberg Source - -| Feature | Gotenberg File | Line Numbers | -|---------|------------------|-------------| -| Download URLs | `pkg/modules/chromium/chromium.go` | ~L500-600 | -| Basic auth | `pkg/modules/api/api.go` | ~L100-150 | -| TLS support | `pkg/modules/api/api.go` | ~L150-200 | -| URL filter | `pkg/modules/chromium/chromium.go` | ~L600-700 | -| Cloud Run | `Dockerfile` | Full file | - -To read Gotenberg source: -```bash -cd /Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg -cat pkg/modules/chromium/chromium.go | grep -A10 "FromURL" -``` - -## Expected Behavior - -### Download from URLs -- Accept HTTP/HTTPS URLs in `downloadFiles` field -- Download to temp directory -- Apply size limit (default 50 MiB) -- Return error if download fails - -### Basic Auth -- Return `401 Unauthorized` if no credentials -- Return `401` if wrong credentials -- Skip auth for `/health` and `/version` - -### TLS -- Load cert/key from files -- Accept HTTPS connections -- Reject non-TLS connections (or redirect) - -### URL Filtering -- Deny list checked first (higher priority) -- Allow list empty = allow all (except denied) -- Regex patterns matched against full URL - -## Test Plan - -### Unit Tests - -- `download_url_returns_bytes` -- `download_url_exceeds_size_limit` -- `basic_auth_validates_credentials` -- `basic_auth_exempts_health_endpoint` -- `url_filter_deny_list_blocks` -- `url_filter_allow_list_permits` - -### Integration Tests - -- `download_and_convert_remote_html` -- `basic_auth_rejects_unauthorized_request` -- `tls_accepts_https_connections` -- `url_deny_list_blocks_navigation` - -## Acceptance - -- [ ] Download from remote URLs in multipart -- [ ] Basic auth middleware with exemption list -- [ ] TLS support with cert/key loading -- [ ] URL allow/deny lists with regex -- [ ] Cloud Run Dockerfile -- [ ] Unit tests for all features -- [ ] Integration tests for key scenarios -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References - -- Gotenberg source: `/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/gotenberg/pkg/modules/` -- reqwest crate: https://docs.rs/reqwest/ -- Axum TLS: https://docs.rs/axum/latest/axum/#tls -- Cloud Run: https://cloud.google.com/run/docs -- AWS Lambda Rust: https://github.com/awslabs/aws-lambda-rust-runtime diff --git a/docs/specs/41-bindings-js.md b/docs/specs/41-bindings-js.md deleted file mode 100644 index cdf856a..0000000 --- a/docs/specs/41-bindings-js.md +++ /dev/null @@ -1,360 +0,0 @@ -# Spec 41 β€” Node bindings (`js` crate) - -> Self-contained napi-rs wrapper exposing `require('folio')` (or -> `import folio from 'folio'`) to Node.js users. - -## Goal - -Allow Node.js users to convert HTML / URL / Markdown to PDF in-process -via the same `engine` crate, returning real `Promise`s without -`block_on`, matching the README example in -`@/Users/__deesh_reddy__/projects/personal_git/rust_builds/folio/README.md:125-137`. - -## Scope - -**In:** - -- `ChromiumEngine` JS class with async methods `htmlToPdf`, `urlToPdf`, - `markdownToPdf`, `healthy`, `shutdown`. -- Plain TS objects (interfaces) for `PdfOptions`, `RequestContext`, - `BrowserConfig`, `Cookie`, `WaitCondition` (discriminated union). -- Auto-generated `.d.ts` shipped in the npm package. -- Prebuilt binaries on darwin-x64, darwin-arm64, linux-x64-gnu, - linux-arm64-gnu, win32-x64-msvc. -- Node β‰₯ 18 (`napi8`). - -**Out:** - -- LibreOffice and pdfops surfaces β€” Node users use the HTTP server today. - Follow-up. -- ESM-first published surface β€” package supports both CJS and ESM via - `"exports"`, default export is the `ChromiumEngine` class. -- Streaming output / chunked Buffer responses β€” return one `Buffer` for MVP. -- Worker-thread isolation helpers β€” out of MVP. - -## Public API - -### TypeScript surface (auto-generated `index.d.ts`) - -```ts -export type EmulateMedia = 'print' | 'screen'; - -export interface PaperSize { - widthIn: number; - heightIn: number; -} -export const PAPER_A4: PaperSize; -export const PAPER_LETTER: PaperSize; -export const PAPER_LEGAL: PaperSize; -export const PAPER_A3: PaperSize; -export const PAPER_A5: PaperSize; - -export interface Margins { - top: number; right: number; bottom: number; left: number; -} -export const MARGINS_ZERO: Margins; -export const MARGINS_DEFAULT: Margins; - -export type WaitCondition = - | { kind: 'load' } - | { kind: 'domContentLoaded' } - | { kind: 'networkIdle' } - | { kind: 'selector'; selector: string } - | { kind: 'expression'; expression: string } - | { kind: 'delay'; durationMs: number }; - -export interface PdfOptions { - paper?: PaperSize; - margin?: Margins; - landscape?: boolean; - scale?: number; - printBackground?: boolean; - preferCssPageSize?: boolean; - emulateMedia?: EmulateMedia; - pageRanges?: string; - headerTemplate?: string; - footerTemplate?: string; - wait?: WaitCondition; -} - -export interface Cookie { - name: string; - value: string; - domain?: string; - path?: string; - secure?: boolean; - httpOnly?: boolean; -} - -export interface RequestContext { - userAgent?: string; - extraHeaders?: Record<string, string>; - cookies?: Cookie[]; - failOnStatus?: number[]; -} - -export interface BrowserConfig { - executable?: string; - headless?: boolean; - extraArgs?: string[]; - noSandbox?: boolean; - timeoutMs?: number; -} - -export class ChromiumEngine { - constructor(config?: BrowserConfig); - htmlToPdf(html: string, opts?: { baseUrl?: string; options?: PdfOptions; request?: RequestContext }): Promise<Buffer>; - urlToPdf(url: string, opts?: { options?: PdfOptions; request?: RequestContext }): Promise<Buffer>; - markdownToPdf(markdown: string, opts?: { options?: PdfOptions; request?: RequestContext }): Promise<Buffer>; - healthy(): Promise<boolean>; - shutdown(): Promise<void>; -} - -export class FolioError extends Error { - code: string; // e.g. 'INVALID_OPTION', 'TIMEOUT', 'NAVIGATION' - /** Present only when code === 'NAVIGATION'. */ - url?: string; - /** Present only when code === 'NAVIGATION'. */ - reason?: string; - /** Present only when code === 'CHROME_NOT_FOUND'. */ - searched?: string[]; -} - -export const VERSION: string; -``` - -### Rust surface (`crates/js/src/lib.rs`) - -```rust -use napi_derive::napi; - -#[napi] -pub struct ChromiumEngine { /* Arc<engine::ChromiumEngine> */ } - -#[napi] -impl ChromiumEngine { - #[napi(constructor)] - pub fn new(config: Option<BrowserConfigJs>) -> napi::Result<Self>; - - #[napi] - pub async fn html_to_pdf( - &self, - html: String, - opts: Option<HtmlToPdfArgs>, - ) -> napi::Result<napi::bindgen_prelude::Buffer>; - - #[napi] - pub async fn url_to_pdf( - &self, - url: String, - opts: Option<UrlToPdfArgs>, - ) -> napi::Result<napi::bindgen_prelude::Buffer>; - - #[napi] - pub async fn markdown_to_pdf( - &self, - markdown: String, - opts: Option<MarkdownToPdfArgs>, - ) -> napi::Result<napi::bindgen_prelude::Buffer>; - - #[napi] - pub async fn healthy(&self) -> bool; - - #[napi] - pub async fn shutdown(&self) -> napi::Result<()>; -} -``` - -`BrowserConfigJs`, `PdfOptionsJs`, etc. are `#[napi(object)]` plain -structs that map directly to the TS interfaces above. Field names are -camelCase via `#[napi(js_name = "...")]` where rename is needed. - -## Behavior - -### Runtime / async - -napi-rs ships with a built-in tokio integration: any `async fn` -annotated with `#[napi]` is converted into a JS `Promise` automatically. -**No** `block_on` is needed β€” napi-rs schedules futures on its own -runtime and resolves the JS Promise when the future completes. - -To use the same engine across many calls efficiently we keep an -`Arc<engine::ChromiumEngine>` inside the napi class. - -### `ChromiumEngine.constructor` - -The constructor cannot be `async` in napi-rs; instead: - -1. Build `engine::types::BrowserConfig` from the provided `BrowserConfigJs`. -2. Synchronously call `engine::ChromiumEngine::launch_with` via a small - helper that uses `napi::tokio::block_on` (napi-rs exposes this for - construction-time work). -3. Store the resulting engine in `Arc`. - -If launch fails, throw a `FolioError` (see *Error mapping*). JS callers -see a thrown error from `new ChromiumEngine(...)`. - -### `htmlToPdf` / `urlToPdf` / `markdownToPdf` - -Each: - -1. Convert `Option<*Args>` into the engine's owned types - (`PdfOptions`, `RequestContext`, optional `base_url`). -2. Validate: `opts.options.validate()?`. Validation errors throw a - `FolioError` with code `INVALID_OPTION`. -3. Call the corresponding `engine::ChromiumEngine` method. -4. Wrap the resulting `Vec<u8>` in `napi::bindgen_prelude::Buffer` (this - is zero-copy: napi-rs hands ownership of the Rust `Vec` to V8). - -### `healthy` / `shutdown` - -- `healthy` mirrors the engine's method. -- `shutdown` is idempotent. Subsequent calls return `Ok(())` quickly. - After shutdown, other methods reject with `FolioError(code = 'INTERNAL', message = 'engine shut down')`. - -### Error mapping - -Each `EngineError` variant produces a `napi::Error` with both: - -- A `code` (also exposed as a property on the JS `Error` object). -- A `reason` string (used as the JS `Error.message`). - -Mapping table: - -| `EngineError` | `code` (string) | Extra props on `Error` | -|------------------------------|------------------------|--------------------------------| -| `InvalidOption` | `INVALID_OPTION` | β€” | -| `InvalidPageRange` | `INVALID_PAGE_RANGE` | β€” | -| `ChromeNotFound { searched }`| `CHROME_NOT_FOUND` | `searched: string[]` | -| `ChromeLaunch(msg)` | `CHROME_LAUNCH` | β€” | -| `Cdp(msg)` | `CDP` | β€” | -| `Navigation { url, reason }` | `NAVIGATION` | `url: string`, `reason: string`| -| `Timeout(d)` | `TIMEOUT` | `seconds: number` | -| `Io(_)` | `IO` | β€” | -| `Internal(msg)` | `INTERNAL` | β€” | - -A small helper `into_napi_err(e: engine::EngineError) -> napi::Error` -handles this. Extra properties are attached via -`napi::Error::with_status` / `napi_create_error` and a JS-side wrapper -(`makeFolioError(rawErr)`) that copies fields onto a real `FolioError` -class instance. The JS wrapper lives in `crates/js/index.js` (or the -generated stub augmented post-build). - -### Concurrency - -A single `ChromiumEngine` instance is safe to use from any number of -concurrent JS calls (the underlying engine handles parallelism). Workers -created via `worker_threads` each get their own native instance β€” they -do not share state across the Worker boundary (this matches V8 isolation -guarantees and napi-rs's runtime model). - -### Module shape - -`require('folio')` returns the auto-generated module object with: - -- `ChromiumEngine` class. -- `FolioError` class (defined in JS to allow `instanceof`). -- Constants (`PAPER_A4`, `MARGINS_DEFAULT`, etc.). -- `VERSION` string. - -Distribution: - -- `crates/js/package.json` is the published npm package, name `folio`. -- The Rust artifact is loaded via `@napi-rs/cli`'s host loader pattern; - prebuilt binaries are downloaded by the post-install script per - platform. - -## Errors - -Every public method throws (sync) or rejects (async) only with -`FolioError` instances. Type errors arising from incorrect JS argument -shapes produce `TypeError` (napi-rs default). - -## Edge cases - -| Scenario | Required behavior | -|--------------------------------------------------------------|--------------------------------------------------------------------| -| `new ChromiumEngine()` with no Chrome installed | Throws `FolioError(code='CHROME_NOT_FOUND', searched=[...])`. | -| `htmlToPdf("")` | Resolves with a valid PDF Buffer. | -| `htmlToPdf` after `await shutdown()` | Rejects with `FolioError(code='INTERNAL')`. | -| Many parallel `htmlToPdf` from event loop | All resolve; engine handles concurrency. | -| Caller passes `delay: { durationMs: -1 }` | `INVALID_OPTION` error. | -| Caller passes `paper: { widthIn: 0, heightIn: 11 }` | `INVALID_OPTION` error. | -| User cancels by dropping the Promise | The render runs to completion (engine doesn't cancel mid-render in MVP); response is dropped harmlessly. | -| Large PDF (>1 GiB) | Buffer transfer succeeds but allocation may fail; rejects with `INTERNAL`. Not optimised for in MVP. | -| GC of `ChromiumEngine` without `await shutdown()` | The `Arc` keeps Chrome alive until last clone drops; emits a `tracing::warn!`. | -| Use from a `worker_thread` | Each worker has its own instance; no cross-worker sharing. | - -## Test plan - -### Rust unit tests (`crates/js/src/...`) - -- `browser_config_js_to_native_round_trip`. -- `pdf_options_js_to_native_round_trip` β€” every field defaulted vs set. -- `wait_condition_discriminated_union_to_native` β€” every variant. -- `cookie_js_to_native_round_trip`. -- `error_mapping_table` β€” for each `EngineError` variant, build a - `napi::Error`, assert `code` string and extra fields. - -### JS integration tests (`crates/js/__tests__/folio.test.ts`) - -Run via `vitest` against the built native module. - -Without Chrome (skipped if absent): - -- `module exports VERSION as semver`. -- `paper and margin constants frozen`. -- `creates ChromiumEngine and reports CHROME_NOT_FOUND when path is bogus`. -- `pdfOptions with invalid scale rejects`. - -With Chrome (`describe.skipIf(!hasChrome())`): - -- `htmlToPdf returns a Buffer starting with %PDF-`. -- `urlToPdf against a local http server`. -- `markdownToPdf renders a table`. -- `parallel calls all resolve`. -- `failOnStatus rejects with NAVIGATION carrying url and reason`. -- `selector wait timeout rejects with TIMEOUT carrying seconds`. -- `shutdown is idempotent and subsequent calls reject with INTERNAL`. -- `error.instanceof FolioError`. - -### Type-level tests - -- `tsd` snapshots assert that the generated `.d.ts` types match the - documented surface; CI fails if the snapshot drifts. - -### Build sanity - -A CI job per platform builds the addon and runs the test suite. -Prebuilt binaries are uploaded via `@napi-rs/cli artifacts`. - -## Acceptance - -- [ ] `crates/js/Cargo.toml` declares `[lib] crate-type = ["cdylib"]`, - `name = "folio"`, depends on `napi`, `napi-derive`, `engine`. -- [ ] `crates/js/package.json` is configured for `@napi-rs/cli` build, - with platform-specific optional dependencies (`@folio/folio-darwin-arm64` - style scoped sub-packages, or whatever the chosen distribution - pattern is β€” to be finalised before publish). -- [ ] Auto-generated `index.d.ts` matches the documented surface - (verified by `tsd` snapshot). -- [ ] All Rust unit tests pass with `cargo test -p js`. -- [ ] All JS tests pass with `npm test`. -- [ ] `cargo clippy -p js -- -D warnings` clean. -- [ ] `FolioError` JS class has subclass-friendly `instanceof` semantics - (verified by test). -- [ ] No `unsafe` outside what `#[napi]` macros generate. -- [ ] Released package publishes a CJS entry point (`require('folio')`) - and an ESM entry point (`import folio from 'folio'`). -- [ ] Wheel/binary size is reasonable (< 30 MiB per platform). - -## Out of scope / follow-ups - -- LibreOffice + pdfops surfaces β€” separate spec. -- AbortSignal cancellation of in-flight renders. -- Worker-thread shared engine handles via SharedArrayBuffer / message - passing. -- Streaming output: writable-stream-friendly responses. -- ESM-only re-architecture once Node 22 is the floor. -- Direct N-API zero-copy when the engine learns to write into a - pre-allocated buffer. diff --git a/docs/specs/41-github-issues-analysis.md b/docs/specs/41-github-issues-analysis.md deleted file mode 100644 index 4edcd52..0000000 --- a/docs/specs/41-github-issues-analysis.md +++ /dev/null @@ -1,358 +0,0 @@ -# GitHub Issues Analysis: PDF Generation Pain Points - -> Analysis of user complaints and feature requests from Gotenberg, -> wkhtmltopdf, and WeasyPrint GitHub issues. Reveals what -> users hate and what they want in PDF generation tools. - -## Executive Summary - -Based on 200+ GitHub issues analyzed across Gotenberg, wkhtmltopdf, -and WeasyPrint, the top user complaints are: - -1. **Large PDF file sizes** (2-10x larger than expected) -2. **Font rendering problems** (webfonts, missing system fonts) -3. **Image rendering failures** in HTMLβ†’PDF conversion -4. **Chromium version regressions** breaking existing workflows -5. **Performance degradation** after upgrades -6. **Poor error messages** (generic 500 errors) -7. **Header/footer crashes** with certain content - -Folio (Rust) has inherent advantages over Gotenberg (Go/Chromium) -and wkhtmltopdf (unmaintained WebKit). - ---- - -## 1. Gotenberg Issues Analysis - -### 1.1 File Size Problems (Critical) - -| Issue | Title | Pain Level | -|-------|-------|------------| -| #521 | Gotenberg generates larger PDFs than Chromium | πŸ”₯ High | -| #1056 | HTML to PDF file size 8X larger than wkhtmltopdf | πŸ”₯ High | -| #1067 | Generated PDF sizes v8.x 2-3x larger than v7.x | πŸ”₯ High | - -**Root Causes:** -- Webfonts embedded in PDF (264KB β†’ 131KB with local fonts) -- White background paths always rendered (Chromium bug) -- Chromium generates bloated PDF structure - -**User Workarounds:** -```bash -# Install fonts locally in Docker -apt-get install ttf-mscorefonts-installer - -# Post-process with Ghostscript -gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ - -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET \ - -sOutputFile=output.pdf input.pdf -``` - -**Folio Advantage:** -- βœ… Could use lopdf directly (no Chromium bloat) -- βœ… Native font subsetting -- βœ… No white background bug - ---- - -### 1.2 Font Rendering Issues (High) - -| Issue | Title | Pain Level | -|-------|-------|------------| -| #921 | Numbers deformed converting HTML to PDF | πŸ”₯ High | -| #1371 | Custom fonts not working on versions >8.21.1 | πŸ”₯ High | -| #861 | How to debug intermittent font/text rendering? | πŸ”₯ High | -| #1356 | Webfonts in header/footer cause 500 error | πŸ”₯ High | - -**Root Causes:** -- Chromium doesn't wait for webfonts to load -- `waitForSelector` / `waitWindowStatus` not used correctly -- Header/footer don't load external assets - -**User Complaints:** -> "Every so often a PDF generated with Gotenberg 8 will lack all fonts loaded with CSS @font-face" - -> "Numbers 6 and 8 get a bigger font size than other numbers" - -> "Including webfonts in header or footer will cause 500 Error" - -**Folio Advantage:** -- βœ… `waitForSelector` spec'ed (spec-36) -- βœ… Better font loading detection -- βœ… No header/footer crash (Rust safety) - ---- - -### 1.3 Image Rendering Failures (Medium-High) - -| Issue | Title | Pain Level | -|-------|-------|------------| -| #1178 | HTML conversion images not converted v8+ | πŸ”₯ High | -| #1356 | Webfonts cause 500 error | πŸ”₯ High | - -**Root Cause:** -```html -<!-- loading="lazy" breaks Chromium rendering --> -<img src="image.png" loading="lazy"> -``` - -**User Quote:** -> "In version 7.4.3: images display correctly. In version 8.20.1: images are not shown" - -**Folio Advantage:** -- βœ… Could auto-strip `loading="lazy"` attribute -- βœ… Better error messages (which image failed?) - ---- - -### 1.4 Chromium Regressions (Upgrade Blockers) - -| Issue | Title | Pain Level | -|-------|-------|------------| -| #1491 | backdrop-filter: blur() renders blank sections | πŸ”₯ High | -| #1397 | Increased conversion times after upgrade | πŸ”₯ High | - -**User Pain:** -> "We can't upgrade from v7 to v8 because of PDF size increase" - -> "Conversion times went from 2s to 15s after upgrading" - -**Folio Advantage:** -- βœ… Not dependent on Chromium version -- βœ… Consistent performance (no GC pauses like Go) - ---- - -### 1.5 Feature Requests (What Users Want) - -| Issue | Title | Priority | -|-------|-------|----------| -| #1454 | Add OCR support | πŸ”₯ High | -| #1484 | Switch from unoconv to LibreOfficeKit | πŸ”₯ High | -| #1390 | Landscape single page generation - auto cropping | πŸ”₯ Medium | -| #1482 | LibreOffice image preview | πŸ”₯ Medium | -| #1350 | Flatten configuration/qpdf expansion | πŸ”₯ Medium | - ---- - -## 2. wkhtmltopdf Issues (Archived 2023 - Unmaintained) - -### 2.1 Why Users Are Leaving - -| Issue | Title | Pain Level | -|-------|-------|------------| -| #4705 | Generates unportable PDF (font names blank) | πŸ”₯ Critical | -| #1926 | Testing HTML/CSS fails to render correctly | πŸ”₯ Critical | -| #5295 | Doesn't recognize justify-content | πŸ”₯ High | -| #5288 | Q: why does the font look so bad? | πŸ”₯ High | -| #2234 | SVG rendering problem | πŸ”₯ High | - -**Root Causes:** -- **Old WebKit (2012)** - No modern CSS support -- **No JavaScript** (ES3 only) -- **Poor font handling** - Generates blank font names -- **SVG broken** - `stroke-width: 1` causes black text - -**User Migration:** -> "I used to use wkhtmltopdf, but the project has been archived as the webkit binary hasn't been updated since 2015, so I have been looking for a replacement" - -**Folio Advantage:** -- βœ… Modern CSS support (via Chromium) -- βœ… Full JavaScript support -- βœ… Better font handling (system font detection) - ---- - -## 3. WeasyPrint Issues (Limited CSS Engine) - -| Issue | Title | Pain Level | -|-------|-------|------------| -| #1926 | Testing HTML/CSS fails to render correctly | πŸ”₯ Critical | -| #2234 | SVG rendering problem | πŸ”₯ High | - -**Root Causes:** -- **Custom engine** (not browser-grade) -- **No JavaScript at all** -- **Limited CSS** - Doesn't support `paged` media well - -**User Complaint:** -> "WeasyPrint got borked by CSS relative positioning. After I changed to absolute positioning the page comes out." - -**Folio Advantage:** -- βœ… Browser-grade rendering (Chromium) -- βœ… Full CSS support -- βœ… JavaScript support - ---- - -## 4. Common Pain Points (All Tools) - -### 4.1 Font Problems (Universal) - -| Problem | Gotenberg | wkhtmltopdf | WeasyPrint | Folio | -|---------|-----------|-------------|------------|-------| -| Webfont size bloat | πŸ”₯ Yes | πŸ”₯ Yes | ⚠️ Maybe | βœ… No (native) | -| Missing system fonts | πŸ”₯ Yes | πŸ”₯ Yes | πŸ”₯ Yes | ⚠️ Needs improvement | -| Custom font loading | πŸ”₯ Yes | πŸ”₯ Yes | πŸ”₯ Yes | βœ… Better | -| Font rendering bugs | πŸ”₯ Yes | πŸ”₯ Yes | ⚠️ Some | βœ… No (direct) | - -### 4.2 Performance Issues - -| Problem | Gotenberg (Go) | wkhtmltopdf | WeasyPrint | Folio (Rust) | -|---------|----------------|-------------|------------|---------------| -| GC pauses | πŸ”₯ Yes | ❌ No | ❌ No | βœ… No GC | -| Memory bloat | πŸ”₯ Yes (Chromium) | ⚠️ Medium | ⚠️ Medium | βœ… Lower | -| Slow upgrades | πŸ”₯ Yes | πŸ”₯ Yes (dead) | ⚠️ Some | βœ… Fast Rust | - -### 4.3 Error Handling - -| Problem | Gotenberg | wkhtmltopdf | WeasyPrint | Folio | -|---------|-----------|-------------|------------|-------| -| Generic 500 errors | πŸ”₯ Yes | πŸ”₯ Yes | πŸ”₯ Yes | ⚠️ Partial | -| No debug info | πŸ”₯ Yes | πŸ”₯ Yes | πŸ”₯ Yes | βœ… Structured logs | -| Opaque failures | πŸ”₯ Yes | πŸ”₯ Yes | πŸ”₯ Yes | βœ… Tracing | - ---- - -## 5. What Users Wish Existed - -Based on 200+ issues, here's what users want: - -### 5.1 Must-Have Features - -1. **OCR Support** - "We need to convert scanned PDFs to searchable PDFs" -2. **Better Font Handling** - "Auto-detect and embed system fonts" -3. **PDF Size Optimization** - "Why is my PDF 10x larger than expected?" -4. **Better Error Messages** - "500 error with no details is useless" -5. **LibreOfficeKit Integration** - "unoconv is slow and buggy" - -### 5.2 Nice-to-Have Features - -6. **Landscape Auto-Crop** - "Single page landscape generation" -7. **Image Preview for LibreOffice** - "See what's being converted" -8. **Flatten Config** - "Better control over qpdf options" -9. **Debug Mode for Fonts** - "Why is my font not loading?" -10. **PDF/A-3 Embed Files** - "Need to embed XML with PDF/A-3" - ---- - -## 6. Folio's Competitive Advantages - -### 6.1 Technical Advantages - -| Feature | Gotenberg (Go) | wkhtmltopdf | WeasyPrint | Folio (Rust) | -|---------|----------------|-------------|------------|---------------| -| **Memory Safety** | ⚠️ GC | βœ… C++ | βœ… Python | βœ… Compile-time | -| **Modern CSS** | βœ… Yes | ❌ No | ⚠️ Limited | βœ… Yes | -| **JavaScript** | βœ… Yes | ❌ No | ❌ No | βœ… Yes | -| **Multiple Modes** | ❌ Server only | ❌ CLI only | ❌ Library | βœ… 4 modes | -| **Bindings** | ❌ No | ❌ No | ❌ No | βœ… Python/Node | - -### 6.2 Solving User Pain Points - -| Pain Point | How Folio Solves It | -|-------------|----------------------| -| Large PDFs | Native lopdf + font subsetting | -| Font issues | Direct PDF manipulation, no Chromium bloat | -| Image failures | Better error messages + `loading="lazy"` strip | -| GC pauses | No GC (Rust) | -| Generic errors | Structured logging + tracing | -| Upgrade blockers | Semver + stable API | - ---- - -## 7. Recommendations for Folio - -### High Priority (Based on User Pain) - -1. **Implement OCR support** (Gotenberg #1454) - - Use `tesseract` or `ocrs` crate - - Endpoint: `POST /forms/ocr/recognize` - -2. **Improve font handling** - - Auto-detect system fonts - - Warn if webfont might bloat PDF - - Spec: `spec-36-chromium-wait-conditions.md` - -3. **PDF size optimization** - - Post-process with Ghostscript/qpdf - - Warn if PDF > threshold - - Add `optimize` field to endpoints - -4. **Better error messages** - - Structured error responses - - Include which resource failed - - Spec: `spec-35-logging.md` βœ… - -### Medium Priority - -5. **LibreOfficeKit integration** (Gotenberg #1484) - - Faster than unoconv - - Better font handling - -6. **Landscape auto-crop** (Gotenberg #1390) - - Detect content bounds - - Trim whitespace - -7. **Debug mode for fonts** - - Log which fonts are loaded - - Warn if fallback font used - ---- - -## 8. References - -### Gotenberg Issues Analyzed - -| Issue | Title | Impact | -|-------|-------|--------| -| #521 | Larger PDFs than Chromium/AthenaPDF | πŸ”₯ High | -| #1056 | 8X larger than wkhtmltopdf | πŸ”₯ High | -| #1067 | v8.x 2-3x larger than v7.x | πŸ”₯ High | -| #921 | Numbers deformed in PDF | πŸ”₯ High | -| #1371 | Custom fonts not working | πŸ”₯ High | -| #861 | Intermittent font rendering | πŸ”₯ High | -| #1178 | Images not converted v8+ | πŸ”₯ High | -| #1356 | Webfonts cause 500 error | πŸ”₯ High | -| #1491 | backdrop-filter blank sections | πŸ”₯ High | -| #1397 | Increased conversion times | πŸ”₯ High | -| #1454 | Add OCR support | πŸ”₯ High | -| #1484 | Switch to LibreOfficeKit | πŸ”₯ High | -| #1390 | Landscape auto-crop | πŸ”₯ Medium | -| #1482 | LibreOffice image preview | πŸ”₯ Medium | - -### wkhtmltopdf Issues Analyzed - -| Issue | Title | Impact | -|-------|-------|--------| -| #4705 | Unportable PDF (blank font names) | πŸ”₯ Critical | -| #1926 | CSS fails to render | πŸ”₯ Critical | -| #5295 | Doesn't recognize justify-content | πŸ”₯ High | -| #5288 | Font looks bad | πŸ”₯ High | -| #2234 | SVG rendering problem | πŸ”₯ High | - -### WeasyPrint Issues Analyzed - -| Issue | Title | Impact | -|-------|-------|--------| -| #1926 | Testing HTML/CSS fails | πŸ”₯ Critical | -| #2234 | SVG rendering problem | πŸ”₯ High | - ---- - -## 9. Conclusion - -**Users are desperate for:** -1. A **maintained** tool (wkhtmltopdf is dead) -2. **Smaller PDFs** (Gotenberg's #1 complaint) -3. **Better font handling** (universal pain point) -4. **Clearer error messages** (debuggability) -5. **OCR support** (emerging requirement) - -**Folio is well-positioned to solve these** with: -- βœ… Rust's memory safety + performance -- βœ… Modern Chromium rendering -- βœ… Multiple interface modes -- βœ… Active development (unlike wkhtmltopdf) - -**Next steps:** Implement OCR (#1454), improve font handling, add PDF optimization. diff --git a/docs/specs/42-smart-pdf-optimiser.md b/docs/specs/42-smart-pdf-optimiser.md deleted file mode 100644 index bfd8f30..0000000 --- a/docs/specs/42-smart-pdf-optimiser.md +++ /dev/null @@ -1,368 +0,0 @@ -# Spec 42 β€” Smart PDF Optimiser - -> Automatically detect and reduce oversized PDFs generated from -> HTML/URL conversions. Solves the #1 complaint: "PDFs 8x larger -> than expected" (Gotenberg issues #521, #1056, #1067). - -## Goal - -Create an intelligent PDF optimisation system that automatically -detects bloated PDFs and offers one-click compression -with multiple quality presets. This directly addresses the -top user complaint across all PDF generation tools. - -## Problem Analysis - -### Gotenberg Issues (Real User Quotes) - -> "We recently switched from AthenaPDF to Gotenberg... noticed a -> significant increase of file size... broke our integration with -> other tools which enforce a file size limit." -> β€” Issue #521 - -> "Generated PDF sizes with v8.x are ~2-3x larger than -> same generated PDF on v7.x... 286kb vs 795kb" -> β€” Issue #1067 - -> "With Google web font: 264 KB. With locally installed -> version of that font: 131 KB... Ghostscript can reduce -> even more... 27 MB β†’ 12 MB β†’ 1.1 MB" -> β€” Issue #521 - -### Root Causes Identified - -| Cause | Impact | Solution | -|------|--------|----------| -| Web fonts embedded in PDF | +200% size | Detect & warn, suggest local install | -| White background paths (Chromium bug) | +50% size | Strip background paths | -| No compression applied | +300% size | Apply Ghostscript/qpdf compression | -| Duplicate images (Chromium bug #1077) | +100% size | Deduplicate images | -| Unused fonts subset not applied | +150% size | Proper font subsetting | - -## Scope - -**In:** - -- `POST /forms/pdfengines/optimise` endpoint -- Auto-detection of bloated PDFs (>5MB threshold) -- Three quality presets: `screen`, `ebook`, `printer` -- Backend selection: Ghostscript (best), qpdf, pdfcpu -- Pre-conversion size estimation endpoint -- Size warning headers in responses -- Image deduplication (Chromium bug #1077) -- Font subsetting verification - -**Out:** - -- Automatic optimisation without user request (too magic) -- PDF/A compliance breaking (document in spec-22) -- Lossy image compression (separate feature) - -## Implementation - -### 1. New Endpoint: `POST /forms/pdfengines/optimise` - -```rust -// crates/server/src/routes/pdfengines.rs - -/// Optimise PDF file size. -pub async fn optimise( - State(state): State<AppState>, - mp: Multipart, -) -> ApiResult<impl IntoResponse> { - let start = Instant::now(); - let form = parse_multipart(mp).await?; - - // Extract options - let preset = form.get("preset").unwrap_or("screen").to_string(); - let files = extract_files(&form)?; - - if files.len() != 1 { - return Err(ApiError::InvalidOption( - "optimise requires exactly one PDF file".into() - )); - } - - // Optimise - let result = state - .pdfops - .as_ref() - .unwrap() - .optimise(&files[0], &preset) - .await?; - - let duration = start.elapsed().as_secs_f64(); - - // Log optimisation stats - tracing::info!( - bytes_in = files[0].len(), - bytes_out = result.len(), - ratio = result.len() as f64 / files[0].len() as f64, - duration_ms = duration * 1000.0, - "PDF optimised" - ); - - pdf_response(result, "result.pdf") -} -``` - -### 2. PDF Ops Implementation - -```rust -// crates/engine/src/pdfops/optimise.rs - -use std::process::{Command, Stdio}; - -pub struct OptimiseOptions { - pub preset: OptimisePreset, - pub backend: OptimiseBackend, -} - -#[derive(Debug, Clone, Copy)] -pub enum OptimisePreset { - Screen, // Low quality, 72 DPI, heavy compression - Ebook, // Medium quality, 150 DPI - Printer, // High quality, 300 DPI, light compression -} - -#[derive(Debug, Clone, Copy)] -pub enum OptimiseBackend { - Ghostscript, // Best compression, slow - Qpdf, // Medium compression, fast - PdfCpu, // Light compression, fastest -} - -impl PdfOps { - pub async fn optimise( - &self, - input: &[u8], - preset: &str, - ) -> Result<Vec<u8>, EngineError> { - let preset = match preset.to_lowercase().as_str() { - "screen" => OptimisePreset::Screen, - "ebook" => OptimisePreset::Ebook, - "printer" => OptimisePreset::Printer, - _ => return Err(EngineError::InvalidOption( - format!("Unknown preset: {}, use screen/ebook/printer", preset) - )), - }; - - // Try backends in order of compression quality - let backends: Vec<OptimiseBackend> = vec![ - OptimiseBackend::Ghostscript, - OptimiseBackend::Qpdf, - OptimiseBackend::PdfCpu, - ]; - - for backend in backends { - if backend.is_available() { - tracing::info!(?backend, "Using backend for optimisation"); - return self.optimise_with_backend(input, &preset, backend).await; - } - } - - Err(EngineError::Internal( - "No optimisation backend available (install ghostscript/qpdf/pdfcpu)".into() - )) - } - - async fn optimise_with_backend( - &self, - input: &[u8], - preset: &OptimisePreset, - backend: OptimiseBackend, - ) -> Result<Vec<u8>, EngineError> { - match backend { - OptimiseBackend::Ghostscript => self.optimise_ghostscript(input, preset).await, - OptimiseBackend::Qpdf => self.optimise_qpdf(input, preset).await, - OptimiseBackend::PdfCpu => self.optimise_pdfcpu(input, preset).await, - } - } - - async fn optimise_ghostscript( - &self, - input: &[u8], - preset: &OptimisePreset, - ) -> Result<Vec<u8>, EngineError> { - let preset_args = match preset { - OptimisePreset::Screen => vec![ - "-dPDFSETTINGS=/screen", - "-dCompatibilityLevel=1.4", - "-dDownsampleColorImages=true", - "-dColorImageResolution=72", - "-dAutoFilterColorImages=false", - "-dColorImageFilter=/DCTEncode", - ], - OptimisePreset::Ebook => vec![ - "-dPDFSETTINGS=/ebook", - "-dCompatibilityLevel=1.5", - "-dDownsampleColorImages=true", - "-dColorImageResolution=150", - ], - OptimisePreset::Printer => vec![ - "-dPDFSETTINGS=/printer", - "-dCompatibilityLevel=1.6", - "-dColorImageResolution=300", - ], - }; - - let mut cmd = Command::new("gs"); - cmd.arg("-sDEVICE=pdfwrite") - .arg("-dNOPAUSE") - .arg("-dQUIET") - .arg(format!("-sOutputFile={}", output_path.display())) - .args(&preset_args) - .arg(input_path.display()); - - let output = cmd.output() - .map_err(|e| EngineError::Internal( - format!("Ghostscript failed: {}", e) - ))?; - - if !output.status.success() { - return Err(EngineError::Internal( - format!("Ghostscript error: {}", String::from_utf8_lossy(&output.stderr)) - )); - } - - tokio::fs::read(&output_path).await - .map_err(|e| EngineError::Internal(e.to_string())) - } -} -``` - -### 3. Size Estimation Endpoint - -```rust -// New endpoint: POST /estimate - -pub async fn estimate_size( - State(state): State<AppState>, - mp: Multipart, -) -> ApiResult<impl IntoResponse> { - let form = parse_multipart(mp).await?; - - // Parse the conversion request - let options = parse_chromium_options(&form)?; - - // Estimate size based on inputs - let estimate = SizeEstimate { - estimated_mb: calculate_estimate(&form).await?, - warnings: vec![], - }; - - // Check for web fonts - if has_web_fonts(&form) { - estimate.warnings.push( - "Uses web fonts - may increase size by 200%".into() - ); - } - - // Check for images - if has_large_images(&form) { - estimate.warnings.push( - "Contains large images - consider optimisation".into() - ); - } - - Ok(Json(estimate)) -} - -#[derive(Serialize)] -struct SizeEstimate { - estimated_mb: f64, - warnings: Vec<String>, -} -``` - -### 4. Response Headers (Size Warnings) - -```rust -// Add to all PDF conversion responses - -if let Some(ref response) = result { - let size_mb = response.body().len() as f64 / 1_000_000.0; - - if size_mb > 5.0 { - response.headers_mut().insert( - HeaderName::from_static("X-Size-Warning"), - HeaderValue::from_str(&format!( - "PDF size {:.1} MB exceeds recommended 5 MB. Consider POST /forms/pdfengines/optimise", - size_mb - )).unwrap(), - ); - } -} -``` - -## Form Fields - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `files` | file | required | PDF file to optimise | -| `preset` | string | "screen" | Compression preset: screen/ebook/printer | -| `backend` | string | "auto" | Force backend: ghostscript/qpdf/pdfcpu | - -## Expected Behaviour - -### Optimise Endpoint - -1. Accept PDF file + preset -2. Detect best available backend (Ghostscript > qpdf > pdfcpu) -3. Apply compression based on preset -4. Return optimised PDF -5. Include compression stats in response headers - -### Size Estimation - -1. Accept same form data as conversion endpoints -2. Analyse inputs (HTML, CSS, images, fonts) -3. Return estimated output size -4. Warn about web fonts, large images - -### Response Headers - -``` -X-Original-Size: 10240 (10 MB) -X-Optimised-Size: 2048 (2 MB) -X-Compression-Ratio: 20% (80% reduction) -X-Warnings: Uses web fonts -``` - -## Test Plan - -### Unit Tests - -- `optimise_ghostscript_screen_preset` -- `optimise_qpdf_fallback_when_ghostscript_missing` -- `estimate_size_with_web_fonts` -- `parse_preset_from_form` - -### Integration Tests - -- `optimise_10mb_pdf_to_2mb` - Real compression -- `optimise_presets_produce_different_sizes` -- `estimate_warns_about_web_fonts` -- `response_header_includes_size_warning` - -### Performance Tests - -- `optimise_100mb_pdf_completes_in_30s` - -## Acceptance - -- [ ] `POST /forms/pdfengines/optimise` endpoint -- [ ] Three presets: screen/ebook/printer -- [ ] Auto backend selection (Ghostscript first) -- [ ] `POST /estimate` endpoint for size estimation -- [ ] Response headers with size warnings -- [ ] Unit tests for all functions -- [ ] Integration tests with real PDFs -- [ ] `cargo clippy -p engine -- -D warnings` clean - -## References - -- Gotenberg issue #521: https://github.com/gotenberg/gotenberg/issues/521 -- Gotenberg issue #1056: https://github.com/gotenberg/gotenberg/issues/1056 -- Ghostscript documentation: https://www.ghostscript.com/doc/9.56.1/Use.htm -- qpdf documentation: https://qpdf.readthedocs.io/ diff --git a/docs/specs/43-font-doctor.md b/docs/specs/43-font-doctor.md deleted file mode 100644 index 72b2796..0000000 --- a/docs/specs/43-font-doctor.md +++ /dev/null @@ -1,391 +0,0 @@ -# Spec 43 β€” Font Doctor - -> Diagnose and fix font-related rendering issues, the #2 -> complaint across PDF generation tools. Provides endpoints to -> detect missing fonts, suggest fixes, and validate font loading. - -## Goal - -Create a comprehensive font diagnostics system that detects, -diagnoses, and helps fix font-related issues in PDF -generation. Addresses Gotenberg issues #921, #1371, #861 -where users struggle with deformed numbers, missing fonts, and -intermittent rendering failures. - -## Problem Analysis - -### Real User Quotes (Gotenberg Issues) - -> "Numbers 6 and 8 get a bigger font size than other -> numbers after conversion... The problem isn't with the HTML, -> everything renders just fine. After conversion the resulted -> PDF file shows this problem." -> β€” Issue #921 - -> "Every so often a PDF generated with Gotenberg 8 will -> lack all fonts loaded with CSS @font-face... It seems -> standard fonts work, the header and footer are both using -> font-family: 'Helvetica Neue', Helvetica, Roboto, Arial, -> sans-serif; I suppose a workaround could be to rebuild -> the Docker container" -> β€” Discussion #861 - -> "Custom fonts not working on versions >8.21.1... -> After upgrading to 8.30.0: The font stack was -> simplified from 30+ packages to 8. Documents relying on -> Microsoft Core Fonts now use metric-compatible replacements." -> β€” Issue #1371 - -### Root Causes - -| Problem | Impact | Detection Method | -|----------|--------|-------------------| -| Font not installed in container | Deformed text, wrong fonts | Check system fonts | -| Web fonts not loaded in time | Missing text | `waitForSelector` + font check | -| Chromium font cache issues | Intermittent failures | Clear cache, retry | -| Fallback fonts used | Layout shifts | Compare requested vs actual | -| Large web fonts | 10x PDF size | Check font file sizes | - -## Scope - -**In:** - -- `GET /debug/fonts` - List all system fonts -- `POST /debug/validate-fonts` - Check if fonts will render -- `POST /debug/diagnose-html` - Full font diagnostics for HTML -- Font loading wait mechanism (extend spec-36) -- Auto-suggestion for missing fonts -- Dockerfile generator for custom fonts - -**Out:** - -- Font installation via API (security risk) -- Automatic font downloading (copyright concerns) -- Font substitution algorithm (too complex) - -## Implementation - -### 1. Font Detection (`GET /debug/fonts`) - -```rust -// crates/server/src/routes/debug.rs - -use font_kit::source::SystemSource; - -/// List all system fonts with metadata. -pub async fn list_fonts() -> ApiResult<impl IntoResponse> { - let source = SystemSource::new(); - let fonts = source.all_fonts().map_err(|e| { - ApiError::Internal(format!("Failed to list fonts: {}", e)) - })?; - - let font_list: Vec<FontInfo> = fonts - .iter() - .map(|(path, font)| FontInfo { - name: font.name().to_string(), - family: font.family_name().to_string(), - style: format!("{:?}", font.style()), - path: path.to_string_lossy().to_string(), - size_bytes: std::fs::metadata(path) - .map(|m| m.len()) - .unwrap_or(0), - }) - .collect(); - - Ok(Json(FontList { fonts: font_list })) -} - -#[derive(Serialize)] -struct FontInfo { - name: String, - family: String, - style: String, - path: String, - size_bytes: u64, -} - -#[derive(Serialize)] -struct FontList { - fonts: Vec<FontInfo>, -} -``` - -### 2. Font Validation (`POST /debug/validate-fonts`) - -```rust -// Validate that fonts in CSS will render correctly - -pub async fn validate_fonts( - mp: Multipart, -) -> ApiResult<impl IntoResponse> { - let form = parse_multipart(mp).await?; - - let mut html = form.get("html").cloned(); - let mut css = form.get("css").cloned(); - let url = form.get("url").cloned(); - - // Extract font families from CSS/HTML - let font_families = extract_font_families(html, css, url).await?; - - // Check each font - let mut results = Vec::new(); - for family in font_families { - let status = check_font_availability(&family).await; - results.push(FontValidation { - family: family.clone(), - available: status.available, - installed_font: status.installed_font, - suggestion: status.suggestion, - }); - } - - Ok(Json(FontValidationResponse { fonts: results })) -} - -struct FontAvailability { - available: bool, - installed_font: Option<String>, - suggestion: Option<String>, -} - -async fn check_font_availability(family: &str) -> FontAvailability { - let source = SystemSource::new(); - - // Check if font is installed - if let Ok(fonts) = source.select_family_by_name(family) { - if !fonts.is_empty() { - return FontAvailability { - available: true, - installed_font: Some(fonts[0].name().to_string()), - suggestion: None, - }; - } - } - - // Not installed - suggest similar or default - let suggestion = find_similar_font(family); - - FontAvailability { - available: false, - installed_font: None, - suggestion: Some(format!( - "Font '{}' not installed. {}", - family, - suggestion.unwrap_or_else(|| "Install via: apt-get install ttf-mscorefonts-installer".into()) - )), - } -} -``` - -### 3. HTML Diagnostics (`POST /debug/diagnose-html`) - -```rust -// Full diagnostics for an HTML file - -pub async fn diagnose_html( - State(state): State<AppState>, - mp: Multipart, -) -> ApiResult<impl IntoResponse> { - let form = parse_multipart(mp).await?; - - let html = form.get("html").ok_or_else(|| { - ApiError::InvalidOption("html field required".into()) - })?; - - let mut diagnostics = HtmlDiagnostics { - fonts: Vec::new(), - warnings: Vec::new(), - suggestions: Vec::new(), - }; - - // 1. Extract all font families - let font_families = extract_font_families_from_html(&html); - for family in font_families { - let available = check_font_availability(&family).await; - if !available.available { - diagnostics.warnings.push(format!( - "Font '{}' not installed", - family - )); - if let Some(suggestion) = available.suggestion { - diagnostics.suggestions.push(suggestion); - } - } - diagnostics.fonts.push(FontDetail { - family: family, - installed: available.available, - path: available.installed_font, - }); - } - - // 2. Check for web fonts (will bloat PDF) - if has_web_fonts(&html) { - diagnostics.warnings.push( - "HTML uses web fonts - PDF size may increase by 200%".into() - ); - diagnostics.suggestions.push( - "Install fonts locally in Docker: apt-get install ttf-mscorefonts-installer".into() - ); - } - - // 3. Validate CSS @font-face declarations - let font_face_issues = validate_font_face(&html).await?; - diagnostics.warnings.extend(font_face_issues); - - Ok(Json(diagnostics)) -} - -#[derive(Serialize)] -struct HtmlDiagnostics { - fonts: Vec<FontDetail>, - warnings: Vec<String>, - suggestions: Vec<String>, -} -``` - -### 4. Font Wait Mechanism (Chromium) - -```rust -// Extend spec-36: wait for fonts to load - -// In chromium/mod.rs render function -if let Some(ref font_wait) = opts.wait_for_fonts { - // Wait for fonts to be loaded - let js = format!( - r#" - const fontsLoaded = await document.fonts.ready; - return fontsLoaded; - "# - ); - - page.evaluate(&js).await.map_err(|e| { - EngineError::Navigation { - url: "font-wait".into(), - reason: format!("Font loading timeout: {}", e), - } - })?; -} -``` - -### 5. Dockerfile Generator - -```bash -# Generated Dockerfile for custom fonts - -# Usage: POST /debug/generate-dockerfile -# Body: { "fonts": ["Comic Sans", "Helvetica Neue"] } - -pub async fn generate_dockerfile( - Json(request): Json<DockerfileRequest>, -) -> ApiResult<impl IntoResponse> { - let mut dockerfile = vec![ - "FROM gotenberg/gotenberg:latest".to_string(), - ]; - - for font in &request.fonts { - match font.as_str() { - "Comic Sans" => { - dockerfile.push("RUN apt-get update && apt-get install -y fonts-comic-sans".into()); - } - "Helvetica Neue" => { - dockerfile.push( - "COPY helvetica-neue.ttf /usr/share/fonts/truetype/".into() - ); - } - _ => { - dockerfile.push(format!( - "# TODO: Add installation command for {}", - font - )); - } - } - } - - Ok(TextResponse(dockerfile.join("\n"))) -} -``` - -## Expected Behaviour - -### `GET /debug/fonts` - -```json -{ - "fonts": [ - { - "name": "Arial", - "family": "Arial", - "style": "Normal", - "path": "/usr/share/fonts/truetype/arial.ttf", - "size_bytes": 786432 - } - ] -} -``` - -### `POST /debug/validate-fonts` - -```json -{ - "fonts": [ - { - "family": "Comic Sans", - "available": false, - "installed_font": null, - "suggestion": "Font 'Comic Sans' not installed. Install via: apt-get install fonts-comic-sans" - } - ] -} -``` - -### `POST /debug/diagnose-html` - -```json -{ - "fonts": [ - {"family": "Arial", "installed": true, "path": "/usr/share/fonts/arial.ttf"} - ], - "warnings": [ - "Font 'Helvetica Neue' not installed", - "HTML uses web fonts - PDF size may increase by 200%" - ], - "suggestions": [ - "Install fonts locally in Docker: apt-get install ttf-mscorefonts-installer" - ] -} -``` - -## Test Plan - -### Unit Tests - -- `list_fonts_returns_system_fonts` -- `check_font_availability_detects_missing` -- `extract_font_families_from_css` -- `validate_font_face_returns_errors` - -### Integration Tests - -- `diagnose_html_finds_missing_fonts` -- `validate_fonts_returns_suggestions` -- `dockerfile_generator_creates_valid_dockerfile` - -## Acceptance - -- [ ] `GET /debug/fonts` endpoint -- [ ] `POST /debug/validate-fonts` endpoint -- [ ] `POST /debug/diagnose-html` endpoint -- [ ] Font availability checking with suggestions -- [ ] Web font detection and warnings -- [ ] Dockerfile generator for custom fonts -- [ ] Unit tests for all font functions -- [ ] Integration tests with real HTML/CSS -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References - -- Gotenberg issue #921: https://github.com/gotenberg/gotenberg/issues/921 -- Gotenberg issue #1371: https://github.com/gotenberg/gotenberg/issues/1371 -- Gotenberg discussion #861: https://github.com/gotenberg/gotenberg/discussions/861 -- font-kit crate: https://docs.rs/font-kit/ -- CSS @font-face spec: https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face diff --git a/docs/specs/44-crystal-clear-errors.md b/docs/specs/44-crystal-clear-errors.md deleted file mode 100644 index 97d0841..0000000 --- a/docs/specs/44-crystal-clear-errors.md +++ /dev/null @@ -1,389 +0,0 @@ -# Spec 44 β€” Crystal-Clear Error Messages - -> Replace generic "500 Internal Server Error" with actionable, -> structured error responses. Addresses Gotenberg issues #1356, -> #921, #1926 where users get opaque errors with no guidance. - -## Goal - -Transform error handling from generic HTTP status codes to -rich, actionable error responses that tell users exactly -what went wrong and how to fix it. This is the #3 complaint -across all PDF generation tools. - -## Problem Analysis - -### Real User Quotes - -> "Including web fonts in header or footer will cause 500 -> Error / Printing failed (-32000)... I feel Gotenberg should -> ignore it without performance impact or we should update the -> docs to reflect that." -> β€” Issue #1356 - -> "I've noticed some problems with converting html to pdf: for -> some reason the numbers 6 and 8 get a bigger font size -> than other numbers... I suppose a workaround could be to -> rebuild the Docker container" -> β€” Issue #921 - -> "Testing HTML / CSS fails to render correctly... it fails -> to render correctly. I am not sure where to start because -> it generated no error messages." -> β€” WeasyPrint issue #1926 - -### Current State (Bad) - -```json -{ - "error": "Printing failed (-32000)", - "code": "INTERNAL" -} -``` - -### Desired State (Good) - -```json -{ - "error": "PDF generation failed: image not loaded", - "code": "RESOURCE_TIMEOUT", - "details": { - "url": "https://cdn.example.com/image.png", - "timeout_ms": 30000, - "suggestion": "Add --form 'waitDelay=5s' or check URL accessibility" - }, - "documentation": "https://folio.dev/docs/troubleshooting#image-not-loaded" -} -``` - -## Scope - -**In:** - -- Structured error responses with suggestions -- Error code taxonomy (not just INTERNAL) -- Suggestions field with fix instructions -- Documentation links for each error type -- Field-level validation errors -- Resource-level error details (which URL failed) -- Stack trace in debug mode only - -**Out:** - -- Exposing internal paths (security risk) -- Full Chromium logs in production -- Arbitrary error message from engine (sanitisation needed) - -## Error Code Taxonomy - -### Conversion Errors - -| Code | HTTP Status | Description | Suggestion | -|------|-------------|-------------|------------| -| `NAVIGATION` | 502 | Failed to navigate to URL | Check URL accessibility | -| `TIMEOUT` | 504 | Conversion timed out | Increase `--request-timeout` | -| `INVALID_OPTION` | 400 | Bad form field value | Check field format | -| `INVALID_PAGE_RANGE` | 400 | Bad page range syntax | Use format "1-5,7" | -| `RESOURCE_TIMEOUT` | 502 | Sub-resource failed to load | Check CDN/network | -| `RESOURCE_404` | 502 | Sub-resource not found | Fix missing images/CSS | -| `CHROMIUM_CRASH` | 503 | Chromium process died | Restart or check memory | -| `LIBREOFFICE_CRASH` | 503 | LibreOffice failed | Check document format | -| `FONT_MISSING` | 200 + warning | Font not installed | Install font in Docker | -| `WEB_FONT_BLOAT` | 200 + warning | Web font increases size | Use local fonts | - -### Validation Errors - -| Code | Description | Suggestion | -|------|-------------|------------| -| `MISSING_FIELD` | Required field not provided | Add `files` or `url` field | -| `INVALID_PAPER_SIZE` | Bad paper dimensions | Use format "8.5,11" or "A4" | -| `INVALID_MARGIN` | Bad margin value | Use float like "1.0" | -| `INVALID_BOOL` | Not true/false | Use "true" or "false" | -| `INVALID_JSON` | Bad JSON in field | Check JSON syntax | - -## Implementation - -### 1. Enhanced Error Type - -```rust -// crates/engine/src/error.rs - -#[derive(Debug, Clone, Serialize)] -pub struct ApiErrorResponse { - pub error: String, - pub code: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option<ErrorDetails>, - #[serde(skip_serialising_if = "Option::is_none")] - pub suggestion: Option<String>, - #[serde(skip_serialising_if = "Option::is_none")] - pub documentation: Option<String>, -} - -#[derive(Debug, Clone, Serialize)] -pub struct ErrorDetails { - pub url: Option<String>, - pub timeout_ms: Option<u64>, - pub field: Option<String>, - pub value: Option<String>, - pub resource_errors: Option<Vec<ResourceError>>, -} - -#[derive(Debug, Clone, Serialize)] -pub struct ResourceError { - pub url: String, - pub status_code: Option<u16>, - pub error: String, -} - -impl ApiError { - pub fn to_response(&self) -> (StatusCode, Json<ApiErrorResponse>) { - match self { - ApiError::Navigation { url, reason } => ( - StatusCode::BAD_GATEWAY, - Json(ApiErrorResponse { - error: format!("Navigation failed: {}", reason), - code: "NAVIGATION".into(), - details: Some(ErrorDetails { - url: Some(url.clone()), - ..Default::default() - }), - suggestion: Some(format!( - "Check that {} is accessible. Try with waitDelay=5s", - url - )), - documentation: Some( - "https://folio.dev/docs/troubleshooting#navigation-failed".into() - ), - }) - ), - - ApiError::Timeout(duration) => ( - StatusCode::GATEWAY_TIMEOUT, - Json(ApiErrorResponse { - error: "Conversion timed out".into(), - code: "TIMEOUT".into(), - details: Some(ErrorDetails { - timeout_ms: Some(duration.as_millis() as u64), - ..Default::default() - }), - suggestion: Some(format!( - "Increase timeout: --request-timeout {}s", - duration.as_secs() * 2 - )), - documentation: Some( - "https://folio.dev/docs/troubleshooting#timeout".into() - ), - }) - ), - - ApiError::InvalidOption(msg) => ( - StatusCode::BAD_REQUEST, - Json(ApiErrorResponse { - error: msg.clone(), - code: "INVALID_OPTION".into(), - suggestion: Some( - "Check field format in documentation".into() - ), - documentation: Some( - "https://folio.dev/docs/api#form-fields".into() - ), - ..Default::default() - }) - ), - - // ... handle all error variants - } - } -} -``` - -### 2. Resource Error Collection - -```rust -// In chromium/mod.rs - collect resource errors - -struct ResourceErrorCollector { - errors: Vec<ResourceError>, -} - -impl ResourceErrorCollector { - fn new() -> Self { - Self { errors: Vec::new() } - } - - async fn monitor_page(&mut self, page: &Page) { - // Listen for failed requests - page.event_listener::<RequestFailed>() - .await - .for_each(|event| { - if let Some(status) = event.response_status { - if status >= 400 { - self.errors.push(ResourceError { - url: event.request_url.unwrap_or_default(), - status_code: Some(status), - error: format!("HTTP {}", status), - }); - } - } - }); - } - - fn into_api_error(self) -> Option<ApiError> { - if self.errors.is_empty() { - None - } else { - Some(ApiError::ResourceErrors(self.errors)) - } - } -} -``` - -### 3. Field-Level Validation - -```rust -// Improved form parsing with field-level errors - -pub fn parse_paper_size(form: &HashMap<String, String>) -> Result<(f64, f64), ApiError> { - let value = form.get("paperSize").ok_or_else(|| { - ApiError::InvalidOption( - "paperSize field is required".into() - ) - })?; - - // Try named sizes - let dimensions = match value.as_str() { - "A4" => (210.0, 297.0), - "Letter" => (215.9, 279.4), - "Legal" => (215.9, 355.6), - _ => { - // Try "W,H" format - let parts: Vec<&str> = value.split(',').collect(); - if parts.len() != 2 { - return Err(ApiError::InvalidOption( - format!( - "Invalid paperSize: '{}'. Use 'A4', 'Letter', or 'W,H' format (e.g., '8.5,11')", - value - ) - )); - } - - let w = parts[0].parse::<f64>().map_err(|_| { - ApiError::InvalidOption(format!( - "Invalid paperSize width: '{}'. Must be a number", - parts[0] - )) - })?; - - let h = parts[1].parse::<f64>().map_err(|_| { - ApiError::InvalidOption(format!( - "Invalid paperSize height: '{}'. Must be a number", - parts[1] - )) - })?; - - (w, h) - } - }; - - Ok(dimensions) -} -``` - -### 4. Documentation Links - -```rust -// Auto-generate documentation links - -fn documentation_link(error_code: &str) -> String { - match error_code { - "NAVIGATION" => "https://folio.dev/docs/troubleshooting#navigation-failed", - "TIMEOUT" => "https://folio.dev/docs/troubleshooting#timeout", - "INVALID_OPTION" => "https://folio.dev/docs/api#form-fields", - "RESOURCE_TIMEOUT" => "https://folio.dev/docs/troubleshooting#resource-failed", - "CHROMIUM_CRASH" => "https://folio.dev/docs/troubleshooting#chromium-crash", - _ => "https://folio.dev/docs/troubleshooting", - }.into() -} -``` - -## Expected Behaviour - -### Good Error (Resource Failed) - -```json -{ - "error": "Image not loaded", - "code": "RESOURCE_TIMEOUT", - "details": { - "url": "https://cdn.example.com/image.png", - "timeout_ms": 30000 - }, - "suggestion": "Add --form 'waitDelay=5s' or check URL accessibility. CDN may be blocking requests.", - "documentation": "https://folio.dev/docs/troubleshooting#resource-timeout" -} -``` - -### Good Error (Invalid Option) - -```json -{ - "error": "Invalid paperSize: 'A5'. Use 'A4', 'Letter', or 'W,H' format (e.g., '8.5,11')", - "code": "INVALID_OPTION", - "details": { - "field": "paperSize", - "value": "A5" - }, - "suggestion": "Valid values: A4, Letter, Legal, or 'W,H' (e.g., '8.5,11')", - "documentation": "https://folio.dev/docs/api#form-fields" -} -``` - -### Warning (Not Error) - -```json -{ - "result": "ok", - "warnings": [ - { - "code": "FONT_MISSING", - "message": "Font 'Comic Sans' not installed", - "suggestion": "Install in Docker: apt-get install fonts-comic-sans" - } - ] -} -``` - -## Test Plan - -### Unit Tests - -- `error_response_has_suggestion_field` -- `resource_error_collection_captures_failed_requests` -- `field_validation_returns_helpful_message` -- `documentation_link_matches_error_code` - -### Integration Tests - -- `navigation_error_returns_url_in_details` -- `timeout_error_suggests_increasing_timeout` -- `invalid_option_error_shows_valid_values` -- `resource_errors_list_all_failed_urls` - -## Acceptance - -- [ ] `ApiErrorResponse` struct with all fields -- [ ] All error variants return structured responses -- [ ] Resource error collection in Chromium -- [ ] Field-level validation with suggestions -- [ ] Documentation links for each error type -- [ ] Unit tests for error formatting -- [ ] Integration tests for all error scenarios -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References - -- Gotenberg issue #1356: https://github.com/gotenberg/gotenberg/issues/1356 -- Gotenberg issue #921: https://github.com/gotenberg/gotenberg/issues/921 -- WeasyPrint issue #1926: https://github.com/Kozea/WeasyPrint/issues/1926 -- RFC 7807: Problem Details for HTTP APIs: https://tools.ietf.org/html/rfc7807 diff --git a/docs/specs/45-live-preview-mode.md b/docs/specs/45-live-preview-mode.md deleted file mode 100644 index 94b6609..0000000 --- a/docs/specs/45-live-preview-mode.md +++ /dev/null @@ -1,292 +0,0 @@ -# Spec 45 β€” Live Preview Mode - -> Provide lightweight preview of HTML/URL before full PDF -> generation. Helps debug rendering issues - a unique Folio -> feature that Gotenberg cannot easily replicate. - -## Goal - -Create a live preview system that renders HTML/URL to -lightweight images for quick debugging. Solves the "why does -my PDF look bad?" problem (Gotenberg issues #921, #861). - -## Problem Analysis# - -### User Complaints (Gotenberg Discussions) - -> "Every so often a PDF generated with Gotenberg 8 will -> lack all fonts loaded with CSS @font-face... Tryed -> implementing waitForExpression as 'document.readyState === -> \"complete\"'... No idea what's going on" -> β€” Discussion #861 - -> "Numbers 6 and 8 get a bigger font size than other -> numbers after conversion... I suppose a workaround could -> be to rebuild the Docker container" -> β€” Issue #921 - -### Root Cause - -Users have no way to see what the browser is rendering BEFORE -generating the full PDF. They're flying blind. - -## Scope# - -**In:** - -- `GET /preview/html?url=...` - Preview URL as image -- `POST /preview/html` - Preview HTML as image -- `GET /preview/markdown?url=...` - Preview Markdown -- Multiple preview formats: png, jpeg, webp -- Preview dimensions: viewport size, clip region -- Auto-refresh for iterative debugging -- Compare mode: before/after changes - -**Out:** - -- Full PDF preview (too heavy) -- Interactive browser session (complex) -- Screenshot comparison (separate tool) - -## Implementation# - -### 1. Preview Endpoints# - -```rust -// crates/server/src/routes/preview.rs - -use axum::extract::Query; - -#[derive(Deserialize)] -struct PreviewQuery { - url: String, - format: Option<String>, // png, jpeg, webp - width: Option<u32>, // viewport width - height: Option<u32>, // viewport height - clip_x: Option<f64>, - clip_y: Option<f64>, - clip_width: Option<f64>, - clip_height: Option<f64>, -} - -/// Preview URL as image. -pub async fn preview_url( - State(state): State<AppState>, - Query(query): Query<PreviewQuery>, -) -> ApiResult<impl IntoResponse> { - let start = Instant::now(); - - // Validate format - let format = query.format.as_deref().unwrap_or("png"); - if !["png", "jpeg", "webp"].contains(&format) { - return Err(ApiError::InvalidOption( - format!("Invalid format: '{}'. Use png/jpeg/webp", format) - )); - } - - // Build screenshot options - let mut opts = ScreenshotOptions::default(); - if let Some(w) = query.width { - opts.viewport_width = w; - } - if let Some(h) = query.height { - opts.viewport_height = h; - } - - // Capture screenshot - let result = state - .chromium - .as_ref() - .unwrap() - .screenshot_url(&query.url, &opts) - .await - .map_err(|e| ApiError::from(e))?; - - let duration = start.elapsed().as_secs_f64(); - tracing::info!( - url = %query.url, - format = %format, - duration_ms = duration * 1000.0, - "Preview generated" - ); - - // Return image - let content_type = match format { - "jpeg" => "image/jpeg", - "webp" => "image/webp", - _ => "image/png", - }; - - Ok(( - [(header::CONTENT_TYPE, HeaderValue::from_static(content_type))], - result, - )) -} -``` - -### 2. HTML Preview with Form# - -```rust -/// Preview HTML file as image. -pub async fn preview_html( - State(state): State<AppState>, - mp: Multipart, -) -> ApiResult<impl IntoResponse> { - let form = parse_multipart(mp).await?; - - let html = form.get("files") - .ok_or_else(|| ApiError::InvalidOption("HTML file required".into()))?; - - let mut opts = ScreenshotOptions::default(); - if let Some(format) = form.get("format") { - opts.format = format.clone(); - } - - let result = state - .chromium - .as_ref() - .unwrap() - .screenshot_html(html, None, &opts) - .await - .map_err(|e| ApiError::from(e))?; - - image_response(result, &opts.format) -} -``` - -### 3. Preview Options# - -```rust -// crates/engine/src/chromium/screenshot.rs - -pub struct ScreenshotOptions { - pub format: String, // png, jpeg, webp - pub quality: u8, // 1-100 for jpeg/webp - pub viewport_width: u32, // Default 1920 - pub viewport_height: u32, // Default 1080 - pub clip: Option<ClipRect>, - pub full_page: bool, // Screenshot full scrollable page -} - -pub struct ClipRect { - pub x: f64, - pub y: f64, - pub width: f64, - pub height: f64, -} -``` - -### 4. Compare Mode (Advanced)# - -```rust -/// Compare two versions side by side. -pub async fn preview_compare( - State(state): State<AppState>, - mp: Multipart, -) -> ApiResult<impl IntoResponse> { - let form = parse_multipart(mp).await?; - - let before = form.get("before") - .ok_or_else(|| ApiError::InvalidOption("'before' required".into()))?; - let after = form.get("after") - .ok_or_else(|| ApiError::InvalidOption("'after' required".into()))?; - - // Screenshot both - let img1 = state.chromium.as_ref().unwrap() - .screenshot_html(before, None, &Default::default()) - .await?; - let img2 = state.chromium.as_ref().unwrap() - .screenshot_html(after, None, &Default::default()) - .await?; - - // Create side-by-side comparison image - let comparison = create_comparison_image(&img1, &img2)?; - - image_response(comparison, "png") -} -``` - -## Form Fields# - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `url` | string | required | URL to preview | -| `files` | file | required | HTML file to preview | -| `format` | string | "png" | Output format: png/jpeg/webp | -| `quality` | int | 90 | JPEG/WebP quality (1-100) | -| `width` | int | 1920 | Viewport width | -| `height` | int | 1080 | Viewport height | -| `fullPage` | bool | false | Capture full scrollable page | -| `clip.x` | float | 0 | Clip rectangle X | -| `clip.y` | float | 0 | Clip rectangle Y | -| `clip.width` | float | viewport | Clip width | -| `clip.height` | float | viewport | Clip height | - -## Expected Behaviour# - -### Preview URL - -```bash -# Quick preview -curl "http://localhost:3000/preview/url?url=https://example.com" -o preview.png - -# High-quality JPEG -curl "http://localhost:3000/preview/url?url=https://example.com&format=jpeg&quality=95" -o preview.jpg - -# Custom viewport -curl "http://localhost:3000/preview/url?url=https://example.com&width=375&height=667" -o mobile.png -``` - -### Preview HTML - -```bash -curl -X POST http://localhost:3000/preview/html \ - --form files=@index.html \ - --form format=png \ - -o preview.png -``` - -### Compare Mode - -```bash -curl -X POST http://localhost:3000/preview/compare \ - --form before=@old.html \ - --form after=@new.html \ - -o comparison.png -``` - -## Test Plan# - -### Unit Tests - -- `preview_url_returns_png_by_default` -- `preview_html_with_jpeg_format` -- `invalid_format_returns_400` -- `viewport_dimensions_applied` - -### Integration Tests# - -- `preview_url_returns_valid_image` -- `preview_html_screenshot_matches_viewport` -- `compare_mode_creates_side_by_side` -- `full_page_captures_scrollable_content` - -## Acceptance# - -- [ ] `GET /preview/url` endpoint -- [ ] `POST /preview/html` endpoint -- [ ] `GET /preview/markdown` endpoint -- [ ] Format selection: png/jpeg/webp -- [ ] Viewport dimensions applied -- [ ] Clip rectangle support -- [ ] Compare mode for debugging -- [ ] Unit tests for all endpoints -- [ ] Integration tests with real browser -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References# - -- Gotenberg discussion #861: https://github.com/gotenberg/gotenberg/discussions/861 -- Gotenberg issue #921: https://github.com/gotenberg/gotenberg/issues/921 -- Chromium screenshot API: https://chromedevtools.github.io/devtools-protocol/1-3/Page/#method-captureScreenshot -- axum response handling: https://docs.rs/axum/latest/axum/response/ diff --git a/docs/specs/46-pdf-size-estimator.md b/docs/specs/46-pdf-size-estimator.md deleted file mode 100644 index 84a2ccf..0000000 --- a/docs/specs/46-pdf-size-estimator.md +++ /dev/null @@ -1,301 +0,0 @@ -# Spec 46 β€” PDF Size Estimator - -> Proactively warn users about PDF size before conversion. -> Solves the #1 complaint: "PDFs 8x larger than -> wkhtmltopdf" (Gotenberg issues #521, #1056, #1067). - -## Goal - -Create a pre-flight estimation system that analyses -HTML/CSS/fonts/images and predicts output PDF size. -Gives users actionable warnings BEFORE they waste -time converting a document that will be too large. - -## Problem Analysis# - -### User Quotes (Gotenberg Issues) - -> "Gotenberg generates larger PDFs than Chromium, AthenaPDF -> and Firefox... noticed a significant increase of file -> size... This unfortunately broke our integration with other -> tools, which enforce a file size limit" -> β€” Issue #521 - -> "HTML to PDF file size 8X larger than wkhtmltopdf... -> We recently switched from wkhtmltopdf to Gotenberg..." -> β€” Issue #1056 - -> "Generated PDF sizes with v8.x are ~2-3x larger -> than same generated PDF on v7.x... 286kb vs 795kb" -> β€” Issue #1067 - -### Root Causes Identified# - -| Factor | Size Impact | Detection Method | -|--------|------------|-------------------| -| Web fonts (Google Fonts) | +200% | Scan CSS for @font-face | -| White background paths (Chromium bug) | +50% | Check printBackground=false | -| Images not optimised | +300% | Check image dimensions | -| Font not installed locally | +100% | Compare with system fonts | -| No compression applied | +400% | Check if Ghostscript needed | - -## Scope# - -**In:** - -- `POST /estimate` - Analyse HTML/URL and return size prediction -- `POST /estimate/batch` - Estimate multiple URLs -- Size breakdown: fonts, images, markup, overhead -- Warning thresholds: 5MB (warn), 10MB (error) -- Suggestions: install fonts, optimise images, use Ghostscript -- Factor analysis: what contributes most to size -- Comparison: vs Gotenberg, vs wkhtmltopdf - -**Out:** - -- Actual conversion (that's other endpoints) -- File size limits (policy, not estimation) -- Automatic optimisation (see spec-42) - -## Implementation# - -### 1. Estimation Endpoint# - -```rust -// crates/server/src/routes/estimate.rs - -#[derive(Deserialize)] -struct EstimateRequest { - url: Option<String>, - html: Option<String>, - files: Option<Vec<String>>, -} - -#[derive(Serialize)] -struct EstimateResponse { - estimated_size_mb: f64, - confidence: String, // "high", "medium", "low" - breakdown: SizeBreakdown, - warnings: Vec<String>, - suggestions: Vec<String>, - comparison: Option<Comparison>, -} - -#[derive(Serialize)] -struct SizeBreakdown { - fonts_mb: f64, - images_mb: f64, - markup_mb: f64, - overhead_mb: f64, -} - -pub async fn estimate( - State(state): State<AppState>, - Json(req): Json<EstimateRequest>, -) -> ApiResult<impl IntoResponse> { - let mut breakdown = SizeBreakdown { - fonts_mb: 0.0, - images_mb: 0.0, - markup_mb: 0.0, - overhead_mb: 0.5, // Base PDF overhead - }; - - let mut warnings = Vec::new(); - let mut suggestions = Vec::new(); - - // Analyse HTML/CSS - if let Some(ref html) = req.html { - let analysis = analyse_html(html).await?; - breakdown.markup_mb += analysis.markup_size_mb; - breakdown.fonts_mb += analysis.font_size_mb; - breakdown.images_mb += analysis.image_size_mb; - - if analysis.has_web_fonts { - warnings.push( - "Uses web fonts - may increase size by 200%".into() - ); - suggestions.push( - "Install fonts locally: apt-get install ttf-mscorefonts-installer".into() - ); - } - - if analysis.large_images { - warnings.push( - "Contains large images - consider optimisation".into() - ); - } - } - - // Estimate total - let estimated_mb = breakdown.fonts_mb - + breakdown.images_mb - + breakdown.markup_mb - + breakdown.overhead_mb; - - // Add warnings based on thresholds - if estimated_mb > 10.0 { - warnings.push(format!( - "Estimated size {:.1} MB exceeds 10 MB limit", - estimated_mb - )); - suggestions.push( - "Consider POST /forms/pdfengines/optimise after conversion".into() - ); - } else if estimated_mb > 5.0 { - warnings.push(format!( - "Estimated size {:.1} MB is quite large", - estimated_mb - )); - } - - Ok(Json(EstimateResponse { - estimated_size_mb: estimated_mb, - confidence: "medium".into(), - breakdown, - warnings, - suggestions, - comparison: None, // TODO: compare with Gotenberg - })) -} -``` - -### 2. HTML Analysis# - -```rust -// crates/server/src/analysis/html.rs - -struct HtmlAnalysis { - markup_size_mb: f64, - font_size_mb: f64, - image_size_mb: f64, - has_web_fonts: bool, - large_images: bool, -} - -async fn analyse_html(html: &str) -> Result<HtmlAnalysis, EngineError> { - let mut result = HtmlAnalysis { - markup_size_mb: (html.len() as f64) / 1_000_000.0, - font_size_mb: 0.0, - image_size_mb: 0.0, - has_web_fonts: false, - large_images: false, - }; - - // Check for web fonts - if html.contains("@font-face") { - result.has_web_fonts = true; - // Estimate: each web font ~500KB - let font_count = html.matches("@font-face").count(); - result.font_size_mb += font_count as f64 * 0.5; - } - - // Check for images - let img_pattern = regex::Regex::new(r#"img[^>]+src="([^"]+)""#).unwrap(); - for cap in img_pattern.captures_iter(html) { - let src = &cap[1]; - if src.starts_with("http") || src.starts_with("data:") { - result.large_images = true; - result.image_size_mb += 1.0; // Estimate - } - } - - Ok(result) -} -``` - -### 3. Batch Estimation# - -```rust -/// Estimate multiple URLs at once. -pub async fn estimate_batch( - State(state): State<AppState>, - Json(req): Json<Vec<String>>, -) -> ApiResult<impl IntoResponse> { - let mut results = Vec::new(); - - for url in req { - let estimate = estimate_single_url(&state, &url).await; - results.push((url, estimate)); - } - - Ok(Json(BatchEstimateResponse { results })) -} -``` - -## Expected Behaviour# - -### Estimation Request# - -```json -POST /estimate -{ - "html": "<html><head><style>@font-face { font-family: 'Comic Sans'; src: url(font.woff2); }</style></head><body><p>Hello</p><img src=\"large.jpg\"></body></html>" -} -``` - -### Estimation Response# - -```json -{ - "estimated_size_mb": 3.5, - "confidence": "medium", - "breakdown": { - "fonts_mb": 2.0, - "images_mb": 1.0, - "markup_mb": 0.002, - "overhead_mb": 0.5 - }, - "warnings": [ - "Uses web fonts - may increase size by 200%", - "Contains large images - consider optimisation" - ], - "suggestions": [ - "Install fonts locally: apt-get install ttf-mscorefonts-installer", - "Consider POST /forms/pdfengines/optimise after conversion" - ] -} -``` - -### Size Thresholds# - -| Estimated Size | Action | -|---------------|--------| -| <5 MB | βœ… Proceed (no warning) | -| 5-10 MB | ⚠️ Warning in response | -| >10 MB | πŸ”₯ Error suggestion + optimisation tip | - -## Test Plan# - -### Unit Tests# - -- `estimate_html_with_web_fonts` -- `estimate_html_with_large_images` -- `breakdown_calculates_correctly` -- `threshold_warnings_triggered` - -### Integration Tests# - -- `estimate_url_returns_valid_prediction` -- `batch_estimate_handles_10_urls` -- `web_fonts_warning_included` -- `optimisation_suggestion_provided` - -## Acceptance# - -- [ ] `POST /estimate` endpoint -- [ ] `POST /estimate/batch` endpoint -- [ ] Size breakdown: fonts/images/markup/overhead -- [ ] Warning thresholds: 5MB/10MB -- [ ] Web font detection -- [ ] Large image detection -- [ ] Suggestions for optimisation -- [ ] Unit tests for analysis functions -- [ ] Integration tests with real HTML -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References# - -- Gotenberg issue #521: https://github.com/gotenberg/gotenberg/issues/521 -- Gotenberg issue #1056: https://github.com/gotenberg/gotenberg/issues/1056 -- Gotenberg issue #1067: https://github.com/gotenberg/gotenberg/issues/1067 -- Web font size impact: https://github.com/puppeteer/puppeteer/issues/3939 diff --git a/docs/specs/47-one-command-install.md b/docs/specs/47-one-command-install.md deleted file mode 100644 index d7d8eaf..0000000 --- a/docs/specs/47-one-command-install.md +++ /dev/null @@ -1,430 +0,0 @@ -# Spec 47 β€” One-Command Install - -> Make Folio the easiest PDF generation tool to install. -> Gotenberg requires Docker + Chrome + LibreOffice setup. -> Folio should be: `curl -sSL https://folio.dev/install.sh | bash` - -## Goal - -Create a frictionless installation experience that gets -users from "nothing" to "first PDF in 30 seconds". -This is critical for adoption (see wkhtmltopdf archived 2023 -due to installation complexity). - -## Problem Analysis# - -### Current State (Painful)# - -#### Gotenberg (Requires Docker)# - -```bash -# Gotenberg installation (complex) -docker pull gotenberg/gotenberg:8 -docker run -p 3000:3000 gotenberg/gotenberg:8 - -# Need Chrome + LibreOffice in container -# Custom fonts? Edit Dockerfile -# Upgrade? Re-pull image -``` - -#### Folio (Current State)# - -```bash -# Install Rust (if not installed) -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh - -# Clone repo -git clone https://github.com/yourusername/folio.git -cd folio - -# Build (long!) -cargo build --release -p server - -# Install Chrome + LibreOffice -apt-get install chromium libreoffice # Linux -brew install chromium libreoffice # macOS -``` - -### Desired State (One Command)# - -```bash -# The dream -curl -sSL https://folio.dev/install.sh | bash - -# Or via package managers -brew install folio -npm install -g folio -pip install folio -``` - -## Scope# - -**In:** - -- **Install scripts** for Linux (apt/yum), macOS (brew), Windows (chocolatey) -- **Pre-built binaries** for all platforms (GitHub Releases) -- **Package manager support**: Homebrew, npm, pip, cargo -- **Docker images** (slim + full variants) -- **Auto-detection** of Chrome/LibreOffice paths -- **Font installation** helper in install script -- **Post-install test**: verify conversion works - -**Out:** - -- Auto-update mechanism (security risk) -- In-app installation of Chrome/LibreOffice (complex) -- Cloud deployment (separate: spec-40) - -## Implementation# - -### 1. Install Script (Unix)# - -```bash -#!/bin/bash -# install.sh - One-command Folio installer -# Usage: curl -sSL https://folio.dev/install.sh | bash - -set -e - -FOLIO_VERSION="latest" -INSTALL_DIR="/usr/local/bin" -REPO="yourusername/folio" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -info() { - echo -e "${GREEN}[INFO]${NC} $1" -} - -warn() { - echo -e "${YELLOW}[WARN]${NC} $1" -} - -error() { - echo -e "${RED}[ERROR]${NC} $1" - exit 1 -} - -# Detect OS -detect_os() { - if [[ "$OSTYPE" == "linux-gnu"* ]]; then - echo "linux" - elif [[ "$OSTYPE" == "darwin"* ]]; then - echo "macos" - elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then - echo "windows" - else - error "Unsupported OS: $OSTYPE" - fi -} - -OS=$(detect_os) -info "Detected OS: $OS" - -# Check for required tools -check_dependencies() { - if ! command -v curl &> /dev/null; then - error "curl is required but not installed" - fi - - if ! command -v tar &> /dev/null; then - error "tar is required but not installed" - fi -} - -# Download and install binary -install_folio() { - info "Downloading Folio $FOLIO_VERSION..." - - ARCH=$(uname -m) - case "$ARCH" in - x86_64) - ARCH="amd64" - ;; - aarch64|arm64) - ARCH="arm64" - ;; - *) - error "Unsupported architecture: $ARCH" - ;; - esac - - BINARY="folio-server-${OS}-${ARCH}.tar.gz" - DOWNLOAD_URL="https://github.com/${REPO}/releases/${FOLIO_VERSION}/download/${BINARY}" - - info "Downloading from $DOWNLOAD_URL" - curl -sSL -o /tmp/folio.tar.gz "$DOWNLOAD_URL" || error "Download failed" - - info "Installing to $INSTALL_DIR" - tar -xzf /tmp/folio.tar.gz -C "$INSTALL_DIR" - chmod +x "$INSTALL_DIR/folio-server" - - rm /tmp/folio.tar.gz -} - -# Check for Chrome/Chromium -check_chromium() { - if command -v chromium-browser &> /dev/null; then - info "Found Chromium: $(which chromium-browser)" - elif command -v chromium &> /dev/null; then - info "Found Chromium: $(which chromium)" - elif command -v google-chrome &> /dev/null; then - info "Found Chrome: $(which google-chrome)" - else - warn "Chromium/Chrome not found. Installing..." - if [[ "$OS" == "linux" ]]; then - if command -v apt-get &> /dev/null; then - sudo apt-get update && sudo apt-get install -y chromium-browser - elif command -v yum &> /dev/null; then - sudo yum install -y chromium - fi - elif [[ "$OS" == "macos" ]]; then - brew install chromium - fi - fi -} - -# Check for LibreOffice -check_libreoffice() { - if command -v soffice &> /dev/null; then - info "Found LibreOffice: $(which soffice)" - else - warn "LibreOffice not found. Installing..." - if [[ "$OS" == "linux" ]]; then - if command -v apt-get &> /dev/null; then - sudo apt-get update && sudo apt-get install -y libreoffice - elif command -v yum &> /dev/null; then - sudo yum install -y libreoffice - fi - elif [[ "$OS" == "macos" ]]; then - brew install libreoffice - fi - fi -} - -# Install common fonts -install_fonts() { - info "Installing common fonts..." - if [[ "$OS" == "linux" ]]; then - if command -v apt-get &> /dev/null; then - sudo apt-get install -y ttf-mscorefonts-installer || warn "Failed to install MS fonts" - fi - fi -} - -# Post-install test -test_installation() { - info "Testing installation..." - - # Start Folio in background - folio-server --port 13000 & - PID=$! - - sleep 3 - - # Test health endpoint - if curl -s http://localhost:13000/health | grep -q "up"; then - info "βœ… Folio is working!" - else - warn "Health check failed" - fi - - # Test conversion - echo "<h1>Test</h1>" > /tmp/test.html - if curl -s -X POST http://localhost:13000/forms/chromium/convert/html \ - --form files=@/tmp/test.html -o /tmp/test.pdf; then - info "βœ… PDF conversion works!" - else - warn "PDF conversion failed" - fi - - # Cleanup - kill $PID 2>/dev/null || true - rm /tmp/test.html /tmp/test.pdf 2>/dev/null || true -} - -# Main -main() { - info "Installing Folio..." - - check_dependencies - install_folio - check_chromium - check_libreoffice - install_fonts - test_installation - - info "βœ… Folio installation complete!" - info "Start Folio: folio-server --port 3000" - info "Convert HTML: curl -X POST http://localhost:3000/forms/chromium/convert/html --form files=@file.html" -} - -main -``` - -### 2. Package Manager Configs# - -#### Homebrew (macOS)# - -```ruby -# Formula/folio.rb -class Folio < Formula - desc "Modern, Rust-native PDF generation engine" - homepage "https://folio.dev" - url "https://github.com/yourusername/folio/releases/download/v0.1.0/folio-server-darwin-amd64.tar.gz" - sha256 "..." - - depends_on "chromium" - depends_on "libreoffice" - - def install - bin.install "folio-server" - (bin/"folio-server").chmod 0755 - end - - test do - system "#{bin}/folio-server", "--version" - end -end -``` - -#### npm (Node.js)# - -```json -{ - "name": "folio", - "version": "0.1.0", - "description": "Folio PDF generation - Gotenberg-compatible API", - "bin": { - "folio-server": "./bin/folio-server.js" - }, - "scripts": { - "postinstall": "node install.js" - }, - "dependencies": {} -} -``` - -#### PyPI (Python)# - -```python -# setup.py -from setuptools import setup - -setup( - name="folio", - version="0.1.0", - description="Folio PDF generation - Gotenberg-compatible API", - scripts=["bin/folio-server"], - install_requires=[], -) -``` - -### 3. GitHub Actions (Auto-release)# - -```yaml -# .github/workflows/release.yml -name: Release - -on: - push: - tags: - - 'v*' - -jobs: - release: - runs-on: ubuntu-latest - strategy: - matrix: - os: [linux, macos, windows] - arch: [amd64, arm64] - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-rust@v1 - - name: Build - run: cargo build --release -p server - - name: Package - run: | - tar -czf folio-server-${{ matrix.os }}-${{ matrix.arch }}.tar.gz \ - -C target/release folio-server - - name: Release - uses: softprops/action-gh-release@v1 - with: - files: folio-server-*.tar.gz -``` - -## Expected Behaviour# - -### One-Command Install# - -```bash -# Linux/macOS -curl -sSL https://folio.dev/install.sh | bash - -# Homebrew -brew install folio - -# npm -npm install -g folio - -# Python -pip install folio - -# Cargo -cargo install folio-server -``` - -### Post-Install Test# - -```bash -$ curl -sSL https://folio.dev/install.sh | bash -[INFO] Detected OS: linux -[INFO] Downloading Folio latest... -[INFO] Installing to /usr/local/bin -[INFO] Found Chromium: /usr/bin/chromium-browser -[INFO] Found LibreOffice: /usr/bin/soffice -[INFO] Installing common fonts... -[INFO] Testing installation... -[INFO] βœ… Folio is working! -[INFO] βœ… PDF conversion works! -[INFO] βœ… Folio installation complete! -[INFO] Start Folio: folio-server --port 3000 -``` - -## Test Plan# - -### Unit Tests# - -- `install_script_detects_linux` -- `install_script_detects_macos` -- `post_install_test_passes` - -### Integration Tests# - -- `one_command_install_linux` -- `one_command_install_macos` -- `homebrew_install_works` -- `npm_install_works` - -## Acceptance# - -- [ ] `install.sh` script for Unix-like systems -- [ ] Homebrew formula (macOS) -- [ ] npm package (Node.js) -- [ ] PyPI package (Python) -- [ ] GitHub Actions for auto-release -- [ ] Pre-built binaries for all platforms -- [ ] Auto-detection of Chrome/LibreOffice -- [ ] Post-install test suite -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References# - -- Gotenberg Docker install: https://gotenberg.dev/docs/getting-started/installation -- Homebrew formula guide: https://docs.brew.sh/Formula-Cookbook/ -- npm package creation: https://docs.npmjs.com/creating-and-publishing-unscoped-public-packages -- PyPI packaging: https://packaging.python.org/tutorials/packaging-projects/ diff --git a/docs/specs/48-interactive-docs.md b/docs/specs/48-interactive-docs.md deleted file mode 100644 index 71ff274..0000000 --- a/docs/specs/48-interactive-docs.md +++ /dev/null @@ -1,312 +0,0 @@ -# Spec 48 β€” Interactive Documentation# - -> Built-in API explorer and interactive docs. Gotenberg -> has static docs only. Folio should have "Try it now" -> buttons, live testing, and interactive API exploration. - -## Goal# - -Create an interactive documentation system that lets users -test Folio endpoints directly from the browser. -No external tools needed - just visit `/docs` and start -converting. This dramatically lowers the barrier to entry. - -## Problem Analysis# - -### Current State (Bad)# - -#### Gotenberg# -- Static docs at `gotenberg.dev/docs` -- Users need `curl`/`postman` to test -- No way to "try before install" -- **User complaint**: *"I wish I could test if my HTML works before installing"* - -#### Folio (Current)# -- Static docs in `/docs/` -- Same problems as Gotenberg - -### Desired State (Good)# - -- Visit `http://localhost:3000/docs` -- See all endpoints with examples -- Click "Try it" β†’ auto-fills the form -- Submit β†’ see live response -- Share example URLs with team - -## Scope# - -**In:** - -- `GET /docs` - Interactive API explorer (HTML UI) -- `GET /docs/api/openapi.json` - OpenAPI/Swagger spec -- Live "Try it now" buttons on every endpoint -- Code samples in curl, Python, Node.js -- Response preview (PDF, JSON, image) -- Shareable example URLs -- Dark mode support - -**Out:** - -- Full Swagger UI (too heavy, build custom) -- API key management (separate feature) -- Rate limiting display (not needed for docs) - -## Implementation# - -### 1. OpenAPI Spec Generation# - -```rust -// crates/server/src/docs/openapi.rs# - -use serde::Serialize; - -#[derive(Serialize)] -struct OpenApiSpec { - openapi: String, - info: Info, - servers: Vec<Server>, - paths: HashMap<String, PathItem>, -} - -#[derive(Serialize)] -struct Info { - title: String, - version: String, - description: String, -} - -/// Generate OpenAPI 3.0 spec. -pub fn generate_openapi() -> OpenApiSpec { - let mut paths = HashMap::new(); - - // Chromium endpoints - paths.insert( - "/forms/chromium/convert/url".into(), - PathItem { - post: Some(Operation { - summary: "Convert URL to PDF".into(), - operation_id: Some("convertUrl".into()), - request_body: Some(RequestBody { - content: hashmap! { - "multipart/form-data" => MediaType { - schema: Some(schema_for_chromium_convert()) - } - }, - }), - responses: responses_for_pdf(), - .. - }), - } - ); - - // ... add all endpoints - - OpenApiSpec { - openapi: "3.0.0".into(), - info: Info { - title: "Folio API".into(), - version: env!("CARGO_PKG_VERSION").into(), - description: "Gotenberg-compatible PDF generation API".into(), - }, - servers: vec![ - Server { - url: "http://localhost:3000".into(), - description: Some("Local development".into()), - } - ], - paths, - } -} -``` - -### 2. Interactive HTML UI# - -```html -<!-- crates/server/assets/docs/index.html --> - -<!DOCTYPE html> -<html> -<head> - <title>Folio API Docs - - - -

πŸ“„ Folio API Documentation

- -
-

POST /forms/chromium/convert/url

-

Convert any URL to PDF

- - - -
- - -
-
-
- - - - -``` - -### 3. Endpoint Handler# - -```rust -// crates/server/src/routes/docs.rs# - -use axum::response::Html; - -/// Serve interactive API documentation. -pub async fn docs_handler() -> Html<&'static str> { - let html = include_str!("../../assets/docs/index.html"); - Html(html) -} - -/// Serve OpenAPI spec as JSON. -pub async fn openapi_handler() -> Json { - Json(generate_openapi()) -} -``` - -### 4. Router Integration# - -```rust -// crates/server/src/app.rs# - -Router::new() - .route("/docs", get(docs_handler)) - .route("/docs/api/openapi.json", get(openapi_handler)) - // ... other routes -``` - -### 5. "Try it Now" Code Samples# - -```javascript -// Code sample generator -function generateCurl(endpoint, fields) { - let cmd = `curl -X POST http://localhost:3000${endpoint} \\\n`; - for (let [key, value] of Object.entries(fields)) { - cmd += ` --form ${key}="${value}" \\\n`; - } - return cmd + ' -o output.pdf'; -} - -function generatePython(endpoint, fields) { - return `import requests - -response = requests.post( - "http://localhost:3000${endpoint}", - files={${Object.entries(fields).map(([k,v]) => `"${k}": open("${v}")`).join(', ')} -) -open("output.pdf", "wb").write(response.content)`; -} - -function generateNode(endpoint, fields) { - return `const axios = require('axios'); -const fs = require('fs'); - -const form = new FormData(); -${Object.entries(fields).map(([k,v]) => `form.append('${k}', '${v}');`).join('\n')} - -axios.post('http://localhost:3000${endpoint}', form) - .then(response => fs.writeFileSync('output.pdf', response.data));`; -} -``` - -## Expected Behaviour# - -### Visit `/docs`# - -``` -πŸ“„ Folio API Documentation - -[Endpoint List] -- POST /forms/chromium/convert/url [Try it now] -- POST /forms/chromium/convert/html [Try it now] -- ... - -[Interactive Tester] -URL: [https://example.com ] -[Convert] [View cURL] [View Python] [View Node] -``` - -### Response Preview# - -- PDF: Auto-downloads and opens in new tab -- JSON: Pretty-printed with syntax highlighting -- Image: Rendered inline - -### Shareable URLs# - -``` -http://localhost:3000/docs#endpoint=chromium-url&url=https://example.com -``` - -## Test Plan# - -### Unit Tests# - -- `openapi_spec_generates_valid_json` -- `code_sample_generator_curl` -- `code_sample_generator_python` - -### Integration Tests# - -- `docs_page_loads` -- `try_it_now_returns_pdf` -- `openapi_json_valid` - -## Acceptance# - -- [ ] `GET /docs` serves interactive UI -- [ ] `GET /docs/api/openapi.json` returns spec -- [ ] "Try it now" buttons on all endpoints -- [ ] Code samples in 3 languages -- [ ] PDF/JSON/image preview -- [ ] Dark mode support -- [ ] Shareable URLs -- [ ] Unit tests for OpenAPI generation -- [ ] Integration tests for docs page -- [ ] `cargo clippy -p server -- -D warnings` clean - -## References# - -- Swagger UI: https://swagger.io/tools/swagger-ui/ -- OpenAPI 3.0: https://spec.openapis.org/oas/v3.0.3 -- Gotenberg docs (static): https://gotenberg.dev/docs/ diff --git a/docs/specs/49-template-library.md b/docs/specs/49-template-library.md deleted file mode 100644 index 159de80..0000000 --- a/docs/specs/49-template-library.md +++ /dev/null @@ -1,363 +0,0 @@ -# Spec 49 β€” Template Library# - -> Pre-built document templates for common use cases. -> Users don't need to write HTML from scratch - just -> pick a template, fill in data, and get a perfect PDF. -> Unique to Folio (Gotenberg doesn't have this). - -## Goal# - -Create a library of professional document templates -that users can customize with their data. Solves the -"I don't know how to write HTML invoices" problem. - -## Problem Analysis# - -### Current State (Painful)# - -**User workflows:** -1. User needs an invoice PDF -2. Searches web for "HTML invoice template" -3. Downloads sketchy HTML from questionable sites -4. Struggles to customize it -5. Converts to PDF β†’ "Why does it look bad?" - -**Quote from Gotenberg Discussion:** -> "I wish there was an invoice template. I spent 3 hours -> tweaking HTML/CSS before getting a decent PDF." -> β€” Gotenberg Discussion #850 - -### Desired State (Easy)# - -1. User picks "Invoice Standard" template -2. Fills in JSON data: `{"company": "Acme", "amount": 1000}` -3. Gets perfect PDF in 2 seconds - -## Scope# - -**In:** - -- Template library at `GET /templates` -- Pre-built templates: - - Invoice (3 variants) - - Report (2 variants) - - Receipt (compact, thermal-printer friendly) - - Letter (business, personal) - - Certificate (award, completion) -- Template preview images at `GET /templates/{id}/preview` -- Data injection via JSON: `POST /forms/templates/{id}/render` -- Custom templates support (user-provided HTML) -- Template variables validation - -**Out:** - -- Template editor (too complex, use external tools) -- Drag-and-drop builder (separate product) -- Template marketplace (legal concerns) - -## Implementation# - -### 1. Template Definition# - -```rust -// crates/server/src/templates/mod.rs# - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Template { - pub id: String, - pub name: String, - pub description: String, - pub category: TemplateCategory, - pub thumbnail: String, // URL to preview image - pub fields: Vec, - pub html_template: String, // Mustache/Handlebars template -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum TemplateCategory { - Invoice, - Report, - Receipt, - Letter, - Certificate, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TemplateField { - pub name: String, - pub label: String, - pub field_type: FieldType, - pub required: bool, - pub default: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum FieldType { - String, - Number, - Date, - Boolean, - Image, // Base64 or URL -} -``` - -### 2. Built-in Templates# - -```rust -// crates/server/src/templates/builtin.rs# - -pub fn get_templates() -> Vec