From b07106dc4982c91543fc1858693241529f9c2a03 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Mon, 25 May 2026 09:42:06 +0300 Subject: [PATCH 1/4] feat(review): add local browser review app + demo release candidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `hletterscriptgen review ` CLI subcommand that serves a single-page review UI at localhost:8765 (configurable with --port). - `src/hletterscriptgen/reviewer.py`: builds an HTML page that embeds all glyph images as base64 data URIs; no external CDN deps, pure stdlib HTTP server. UI features: - Sticky header with writer ID, generation date, and live progress bar - Sticky letter sidebar with per-variant verdict dots (green/red/orange) - Per-variant cards: pixelated glyph image, metadata table (size, ink_ratio, source entry, bbox, license), verdict buttons (Accept/Reject/Changes), comment textarea, Save button - `GET /feedback` / `POST /feedback` auto-persist to `.review_feedback.json` next to the letter-set file - "Export" button downloads feedback JSON; "Accept all unreviewed" bulk action - IntersectionObserver-driven sidebar highlight on scroll - Unsaved-changes guard on page unload - `scripts/make_demo_candidate.py`: stdlib-only synthetic glyph generator (zero extra deps beyond what the project already uses for tests). Draws 15 Hebrew letter shapes (Alef–Tav) using Bresenham line strokes, produces 29 PNG crops across 2-3 size variants per letter, writes a schema-valid `letter_set.v1` document. Usage: python3 scripts/make_demo_candidate.py hletterscriptgen review examples/demo_candidate/demo_writer_0001/letter_set.json - `tests/test_reviewer.py`: 37 tests covering utility functions, HTML builders, HTTP GET/POST handler, and CLI error paths; no flaky threading issues (one-shot `handle_request()` per test). - `pyproject.toml`: suppress RUF001 on reviewer.py (intentional Hebrew character dict keys). Tests: 175 pass, coverage 91.26%, ruff+mypy clean. Co-Authored-By: Claude Sonnet 4.6 --- .../demo_writer_0001/letter_set.json | 846 ++++++++++++++++++ .../letters/alef/alef-0001.png | Bin 0 -> 209 bytes .../letters/alef/alef-0002.png | Bin 0 -> 186 bytes .../letters/alef/alef-0003.png | Bin 0 -> 212 bytes .../letters/ayin/ayin-0001.png | Bin 0 -> 225 bytes .../letters/ayin/ayin-0002.png | Bin 0 -> 195 bytes .../demo_writer_0001/letters/bet/bet-0001.png | Bin 0 -> 107 bytes .../demo_writer_0001/letters/bet/bet-0002.png | Bin 0 -> 110 bytes .../letters/dalet/dalet-0001.png | Bin 0 -> 99 bytes .../letters/gimel/gimel-0001.png | Bin 0 -> 120 bytes .../letters/gimel/gimel-0002.png | Bin 0 -> 128 bytes .../demo_writer_0001/letters/he/he-0001.png | Bin 0 -> 106 bytes .../demo_writer_0001/letters/he/he-0002.png | Bin 0 -> 103 bytes .../demo_writer_0001/letters/mem/mem-0001.png | Bin 0 -> 105 bytes .../demo_writer_0001/letters/mem/mem-0002.png | Bin 0 -> 112 bytes .../demo_writer_0001/letters/nun/nun-0001.png | Bin 0 -> 154 bytes .../demo_writer_0001/letters/nun/nun-0002.png | Bin 0 -> 154 bytes .../demo_writer_0001/letters/pe/pe-0001.png | Bin 0 -> 113 bytes .../demo_writer_0001/letters/pe/pe-0002.png | Bin 0 -> 112 bytes .../letters/resh/resh-0001.png | Bin 0 -> 96 bytes .../letters/resh/resh-0002.png | Bin 0 -> 97 bytes .../letters/samekh/samekh-0001.png | Bin 0 -> 104 bytes .../letters/shin/shin-0001.png | Bin 0 -> 109 bytes .../letters/shin/shin-0002.png | Bin 0 -> 112 bytes .../letters/shin/shin-0003.png | Bin 0 -> 108 bytes .../demo_writer_0001/letters/tav/tav-0001.png | Bin 0 -> 108 bytes .../demo_writer_0001/letters/tav/tav-0002.png | Bin 0 -> 108 bytes .../demo_writer_0001/letters/vav/vav-0001.png | Bin 0 -> 95 bytes .../demo_writer_0001/letters/vav/vav-0002.png | Bin 0 -> 96 bytes .../letters/zayin/zayin-0001.png | Bin 0 -> 101 bytes pyproject.toml | 2 + scripts/make_demo_candidate.py | 506 +++++++++++ src/hletterscriptgen/cli.py | 40 + src/hletterscriptgen/reviewer.py | 733 +++++++++++++++ tests/test_reviewer.py | 515 +++++++++++ 35 files changed, 2642 insertions(+) create mode 100644 examples/demo_candidate/demo_writer_0001/letter_set.json create mode 100644 examples/demo_candidate/demo_writer_0001/letters/alef/alef-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/alef/alef-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/alef/alef-0003.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/ayin/ayin-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/ayin/ayin-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/bet/bet-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/bet/bet-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/dalet/dalet-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/gimel/gimel-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/gimel/gimel-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/he/he-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/he/he-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/mem/mem-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/mem/mem-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/nun/nun-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/nun/nun-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/pe/pe-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/pe/pe-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/resh/resh-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/resh/resh-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/samekh/samekh-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/shin/shin-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/shin/shin-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/shin/shin-0003.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/tav/tav-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/tav/tav-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/vav/vav-0001.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/vav/vav-0002.png create mode 100644 examples/demo_candidate/demo_writer_0001/letters/zayin/zayin-0001.png create mode 100644 scripts/make_demo_candidate.py create mode 100644 src/hletterscriptgen/reviewer.py create mode 100644 tests/test_reviewer.py diff --git a/examples/demo_candidate/demo_writer_0001/letter_set.json b/examples/demo_candidate/demo_writer_0001/letter_set.json new file mode 100644 index 0000000..7b89f01 --- /dev/null +++ b/examples/demo_candidate/demo_writer_0001/letter_set.json @@ -0,0 +1,846 @@ +{ + "schema_version": "letter_set.v1", + "writer_id": "demo_writer_0001", + "writer_label": "Demo Writer (synthetic glyphs — not a real person)", + "writer_provenance": { + "source_repo": "HeOCR/public-domain-hand-written-hebrew-scans", + "source_entry_ids": [ + "demo__manuscript_scan__p0001", + "demo__manuscript_scan__p0002", + "demo__manuscript_scan__p0003" + ], + "attribution_method": "fixture", + "notes": "Synthetic demo generated by scripts/make_demo_candidate.py." + }, + "generator": { + "name": "hletterscriptgen", + "version": "0.1.0.dev0", + "config_hash": "0000000000000000000000000000000000000000000000000000000000000000" + }, + "generated_at": "2026-05-25T00:00:00Z", + "upstream": { + "repo": "HeOCR/public-domain-hand-written-hebrew-scans", + "revision": "0000000000000000000000000000000000000000" + }, + "letters": { + "א": [ + { + "variant_id": "alef-0001", + "asset_path": "letters/alef/alef-0001.png", + "checksum_sha256": "2ef89ff0e905f64d5dd863006e007032e4bdb8f74f0b64bca43138e64da1e5c2", + "image": { + "width_px": 48, + "height_px": 56, + "format": "png" + }, + "quality": { + "ink_ratio": 0.074 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 48, + "height": 56 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 48×56px)." + }, + { + "variant_id": "alef-0002", + "asset_path": "letters/alef/alef-0002.png", + "checksum_sha256": "b0a5281e5a0bbf5ae2ba7295b3239daa58c1d60c3bf2775e866a064ec97cf1d1", + "image": { + "width_px": 44, + "height_px": 52, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1241 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 44, + "height": 52 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 44×52px)." + }, + { + "variant_id": "alef-0003", + "asset_path": "letters/alef/alef-0003.png", + "checksum_sha256": "091f65d989cb7ec8ebdf43397a3c5bfa4988ea5cfe6e4123acaef9929bb18ba3", + "image": { + "width_px": 52, + "height_px": 60, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0679 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0003", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0003", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 86, + "y": 104, + "width": 52, + "height": 60 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 3, 52×60px)." + } + ], + "ב": [ + { + "variant_id": "bet-0001", + "asset_path": "letters/bet/bet-0001.png", + "checksum_sha256": "05c6ee099fa57b110666933e7088044649fe296fca01e4802ef3e3e6391404a0", + "image": { + "width_px": 50, + "height_px": 40, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0965 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 50, + "height": 40 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 50×40px)." + }, + { + "variant_id": "bet-0002", + "asset_path": "letters/bet/bet-0002.png", + "checksum_sha256": "0c0f8760e89ffb96ba835f025ada634bb6df3d986be000ee405d2feb4a4ceccd", + "image": { + "width_px": 46, + "height_px": 44, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1388 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 46, + "height": 44 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 46×44px)." + } + ], + "ג": [ + { + "variant_id": "gimel-0001", + "asset_path": "letters/gimel/gimel-0001.png", + "checksum_sha256": "9a89d53ce670ac3a4e3ac8eebb97233f03bdc9098daac8a59d9233260179f5bf", + "image": { + "width_px": 44, + "height_px": 50, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0645 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 44, + "height": 50 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 44×50px)." + }, + { + "variant_id": "gimel-0002", + "asset_path": "letters/gimel/gimel-0002.png", + "checksum_sha256": "c304c1e71a99be9c6651188c4fdb81a3e1513d04b83c5d7a32e58ed247808ea4", + "image": { + "width_px": 48, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0964 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 48, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 48×48px)." + } + ], + "ד": [ + { + "variant_id": "dalet-0001", + "asset_path": "letters/dalet/dalet-0001.png", + "checksum_sha256": "a6c825edf23804e605c1dc61ec4fd66558fb240faddf94701ae9ad3bdb8ae269", + "image": { + "width_px": 46, + "height_px": 40, + "format": "png" + }, + "quality": { + "ink_ratio": 0.063 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 46, + "height": 40 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 46×40px)." + } + ], + "ה": [ + { + "variant_id": "he-0001", + "asset_path": "letters/he/he-0001.png", + "checksum_sha256": "f940e276f17510122bd53c1f85417a002f75cc28081f9ce0170089de916a1ea7", + "image": { + "width_px": 50, + "height_px": 42, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0781 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 50, + "height": 42 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 50×42px)." + }, + { + "variant_id": "he-0002", + "asset_path": "letters/he/he-0002.png", + "checksum_sha256": "1283d973075e5b71a4d05210e50edbdf56d64d71e684effc034ea12c0e2910ae", + "image": { + "width_px": 46, + "height_px": 46, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1191 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 46, + "height": 46 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 46×46px)." + } + ], + "ו": [ + { + "variant_id": "vav-0001", + "asset_path": "letters/vav/vav-0001.png", + "checksum_sha256": "abe5e6e52a13c167275d7715fec3e8073202d3ce5f748b97ba218c88527c6ae7", + "image": { + "width_px": 30, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0611 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 30, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 30×48px)." + }, + { + "variant_id": "vav-0002", + "asset_path": "letters/vav/vav-0002.png", + "checksum_sha256": "bc70358bbf5b103196eb039f7122fa9ac871c0fac52c37c96332fe545b1ab6bb", + "image": { + "width_px": 28, + "height_px": 52, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0927 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 28, + "height": 52 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 28×52px)." + } + ], + "ז": [ + { + "variant_id": "zayin-0001", + "asset_path": "letters/zayin/zayin-0001.png", + "checksum_sha256": "2f4cc7416a831f3ffd86eb8fd76566d2e2728c0c7617a62d1f562a9e1fcb8c83", + "image": { + "width_px": 38, + "height_px": 44, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0813 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 38, + "height": 44 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 38×44px)." + } + ], + "מ": [ + { + "variant_id": "mem-0001", + "asset_path": "letters/mem/mem-0001.png", + "checksum_sha256": "91bb63433fd11ee59095f8aff963cd2ea1f1797cd75aec98b98c4bc8014cbb68", + "image": { + "width_px": 50, + "height_px": 44, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1068 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 50, + "height": 44 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 50×44px)." + }, + { + "variant_id": "mem-0002", + "asset_path": "letters/mem/mem-0002.png", + "checksum_sha256": "7672722274950c767be5416b6c27d518ff4ba7179028175d3d438f0211b129b2", + "image": { + "width_px": 54, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1501 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 54, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 54×48px)." + } + ], + "נ": [ + { + "variant_id": "nun-0001", + "asset_path": "letters/nun/nun-0001.png", + "checksum_sha256": "e2ffc93a2278c25afcd4bec213f5806066e93aace9d5234459a427ae26249e9c", + "image": { + "width_px": 44, + "height_px": 50, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0677 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 44, + "height": 50 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 44×50px)." + }, + { + "variant_id": "nun-0002", + "asset_path": "letters/nun/nun-0002.png", + "checksum_sha256": "98c3b67939c7e9a1df6cbc43e19ee34f59be4a7dbd3ad10bbed0af5cc21bb172", + "image": { + "width_px": 40, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1141 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 40, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 40×48px)." + } + ], + "ס": [ + { + "variant_id": "samekh-0001", + "asset_path": "letters/samekh/samekh-0001.png", + "checksum_sha256": "dc1d4523769d69885f9beed8f076a5f7f8f087a15111ae3ead334c402f56bbdc", + "image": { + "width_px": 46, + "height_px": 46, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1248 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 46, + "height": 46 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 46×46px)." + } + ], + "ע": [ + { + "variant_id": "ayin-0001", + "asset_path": "letters/ayin/ayin-0001.png", + "checksum_sha256": "0799c0a6530b682bc0088ba09c6e7a9f89f7d1f807ef0c721a6c96e7456c9a31", + "image": { + "width_px": 50, + "height_px": 50, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0748 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 50, + "height": 50 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 50×50px)." + }, + { + "variant_id": "ayin-0002", + "asset_path": "letters/ayin/ayin-0002.png", + "checksum_sha256": "ff2b19b4236d0adb23fe7531d88a41fd8e84f6ac67683d23fe4e2d263c670fde", + "image": { + "width_px": 46, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1277 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 46, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 46×48px)." + } + ], + "פ": [ + { + "variant_id": "pe-0001", + "asset_path": "letters/pe/pe-0001.png", + "checksum_sha256": "f6466d14e85853dcd67dea5fd18959139cb82303462b9434bfd4e28bbbe1d2b2", + "image": { + "width_px": 48, + "height_px": 52, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0869 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 48, + "height": 52 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 48×52px)." + }, + { + "variant_id": "pe-0002", + "asset_path": "letters/pe/pe-0002.png", + "checksum_sha256": "4f49f0b48ed34f00b79d476c26571f8fff9198a88662745fa570d4a042b0ae9e", + "image": { + "width_px": 44, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1402 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 44, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 44×48px)." + } + ], + "ר": [ + { + "variant_id": "resh-0001", + "asset_path": "letters/resh/resh-0001.png", + "checksum_sha256": "d26966328e67060e18b24e257fdf5389909819a22e1bf8b47eb491bd67311ffa", + "image": { + "width_px": 46, + "height_px": 44, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0613 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 46, + "height": 44 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 46×44px)." + }, + { + "variant_id": "resh-0002", + "asset_path": "letters/resh/resh-0002.png", + "checksum_sha256": "e6a6514e94d3371b16f12e092b10ab3f6896abb1252cf85e430f1962c78d95db", + "image": { + "width_px": 42, + "height_px": 46, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0963 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 42, + "height": 46 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 42×46px)." + } + ], + "ש": [ + { + "variant_id": "shin-0001", + "asset_path": "letters/shin/shin-0001.png", + "checksum_sha256": "f61026afe0c2642a1ba2532da56056d6b32c67c128f639db283c9a3b67769dad", + "image": { + "width_px": 54, + "height_px": 48, + "format": "png" + }, + "quality": { + "ink_ratio": 0.103 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 54, + "height": 48 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 54×48px)." + }, + { + "variant_id": "shin-0002", + "asset_path": "letters/shin/shin-0002.png", + "checksum_sha256": "292fef5dd799e31a440bff0fc0e9cfeb4177d9f95c6159f9302b08e8171ccd90", + "image": { + "width_px": 50, + "height_px": 52, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1635 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 50, + "height": 52 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 50×52px)." + }, + { + "variant_id": "shin-0003", + "asset_path": "letters/shin/shin-0003.png", + "checksum_sha256": "1e4e82355d65d9488717f3ab0043039979e45d216049efb97705fe21683817fc", + "image": { + "width_px": 56, + "height_px": 44, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1035 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0003", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0003", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 86, + "y": 104, + "width": 56, + "height": 44 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 3, 56×44px)." + } + ], + "ת": [ + { + "variant_id": "tav-0001", + "asset_path": "letters/tav/tav-0001.png", + "checksum_sha256": "cdb39a8ca5008b107ead9a5d9267fa4881febb98dc57d4fe158a090d4c7810dd", + "image": { + "width_px": 50, + "height_px": 46, + "format": "png" + }, + "quality": { + "ink_ratio": 0.0835 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0001", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0001", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 62, + "y": 88, + "width": 50, + "height": 46 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 1, 50×46px)." + }, + { + "variant_id": "tav-0002", + "asset_path": "letters/tav/tav-0002.png", + "checksum_sha256": "2385d3de5f7ff01eb13180bb86d4ee9cecd045ec954c75ab18ca647fa0910f7c", + "image": { + "width_px": 48, + "height_px": 50, + "format": "png" + }, + "quality": { + "ink_ratio": 0.1237 + }, + "source": { + "scan_entry_id": "demo__manuscript_scan__p0002", + "scan_url": "https://example.invalid/scans/demo__manuscript_scan__p0002", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": { + "x": 74, + "y": 96, + "width": 48, + "height": 50 + } + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": "Synthetic demo glyph (variant 2, 48×50px)." + } + ] + }, + "license_summary": { + "licenses": [ + "PDM-1.0" + ], + "notes": "All variants are synthetic demo fixtures under PDM-1.0." + } +} \ No newline at end of file diff --git a/examples/demo_candidate/demo_writer_0001/letters/alef/alef-0001.png b/examples/demo_candidate/demo_writer_0001/letters/alef/alef-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..dbb364e0616eae0030392ea5d68cefef342139a2 GIT binary patch literal 209 zcmeAS@N?(olHy`uVBq!ia0vp^20(1V0VEjaS*e%-sTrOwjv*Dd-d+pjYf#`}zHsmV zC1bWuzGK&3O?TV(dqoFh;-drnajXoy4lFwumzX-R2sEA+W>C6uc)AU<57XKPb_s5- z3(uZ8p5OeyV^hzSjL#@y#>m>#sbvT_4eW}dU5c&!mZm(B0o8n{ABcr zujkz)F7nPf)S$`e{egl76Arj6h|vi7$NAus+zgI?_Ko_@ynFwxKX2o&76x*wr>mdK II;Vst0B{XV^#A|> literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/alef/alef-0002.png b/examples/demo_candidate/demo_writer_0001/letters/alef/alef-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..cc3f6b8db037e32bf78aeb6bf62ef26d59ce7070 GIT binary patch literal 186 zcmeAS@N?(olHy`uVBq!ia0vp^IzVi~0VEi%WVLDosYXv1$B>F!Z?B%_Vo>C9zWDpU z{oTkOgRXuxzISha39Lv`>3R5QvBU&Jwr3*$a{3Gla*cZ$ZyvbBo0PzF=O%02+&1sD z-0K#%9%$-$9qMV4_OVWU@XI#ULdeFhN%G&CuIJkhYg@+|@W`-D+ttUbvo#~nT82&R kbj$|(0*N30Ha!2p95M0CG+#Bodmu-7y85}Sb4q9e0494!_y7O^ literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/alef/alef-0003.png b/examples/demo_candidate/demo_writer_0001/letters/alef/alef-0003.png new file mode 100644 index 0000000000000000000000000000000000000000..7045b8b9c461a71a8fd0d4076e9952e446d4fad4 GIT binary patch literal 212 zcmeAS@N?(olHy`uVBq!ia0vp^CO~Y%0VEiB9F9c+so9<`jv*Dd-d^+MJ7B=U?C}5p z<&e(S@XAd2dA94^@1l$VGm2{3nR-W{qE+V6d5nI_yyA+3Ljx%U14UZb*5uc zvxCARo9`8*uD0Oa=aaL}cbG97F`nIj(M2kXp;)wmEXF<;O MboFyt=akR{0HfejRsaA1 literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/ayin/ayin-0001.png b/examples/demo_candidate/demo_writer_0001/letters/ayin/ayin-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..caa0badc5aab313dd44bc212ea9b9ab0797d009b GIT binary patch literal 225 zcmeAS@N?(olHy`uVBq!ia0vp^Mj*@qBp9qiuIB=&6`n4RAr-gYUOUKl$bhH$;s3w! zw`3*=TsY#FAg)q&YvFyJt-YpO>V8cA?R}`9^}b+}MTb(LqeQFC`v?Us&dC#|7jJao zP4xaHVW*hIY5C-B)Hav44XUL)jT4;u1hqLP3OI>~&yk-g;Cg79%x`0*hxe8yCwTlR z5q?(VlcU%+d$+=l=&bZr^H|*@1b^*37J6#Z1@p$tb?fqf)unh|>GJN>e4%~v{oliP Y7mdKI;Vst0EO^clmGw# literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/ayin/ayin-0002.png b/examples/demo_candidate/demo_writer_0001/letters/ayin/ayin-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..3eec4d9ad66173fc5ebf8caa1dd46151ecd8af77 GIT binary patch literal 195 zcmeAS@N?(olHy`uVBq!ia0vp^dO&Qz0VEi_`NTm~m#2$kNX4zER|EMP6nK~~-1~oN z#smS8-G`2K$p6}DH`(=uOQG!#dG;sWx_!kSdyg)CA2~(){$x?UW4Z4if49o$UM&;# zzTlnwR?YUaGrxaaxacxR_-3nP#eD|#-h{}bpL?&*?-bti)#3ee+uXW+ v+wQMEt{JB}|1(b<&)g$tS!W!!|Ct?sT)*x~f$Ph2AZK~H`njxgN@xNAoWoU} literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/bet/bet-0001.png b/examples/demo_candidate/demo_writer_0001/letters/bet/bet-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..2b55acf782d29b6ab442f78164ec21c654087b71 GIT binary patch literal 107 zcmeAS@N?(olHy`uVBq!ia0vp^MnJ5=0VEh^N`9LF67Y0!45_%4{KMZULEFVdQ&MBb@ E0KY>YD*ylh literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/bet/bet-0002.png b/examples/demo_candidate/demo_writer_0001/letters/bet/bet-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..da0e90562e393b9d5bf0456f249a87b94f303924 GIT binary patch literal 110 zcmeAS@N?(olHy`uVBq!ia0vp^dO)nh0VEjgrLH~zQl_3Rjv*Ddl7ILMB}jz&Iao3) zI;21SF~?@k6yc7!fln8oa-Yf{`cy@3+=y|i8+Z|+(r>mdK II;Vst07bhZRsaA1 literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/dalet/dalet-0001.png b/examples/demo_candidate/demo_writer_0001/letters/dalet/dalet-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..098f7253f6ef04daf0c6fc36eb74bdb8bb90b7ce GIT binary patch literal 99 zcmeAS@N?(olHy`uVBq!ia0vp^dO)nf0VEifbWi^Vq_jL;978H@CI9djN|2~LCCS0q xHAT39`SAb1r;AUyPwm%|_c`TWRO+C@z_5Gu*7-q~@3De(d%F6$taD0e0s#H}9!LNH literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/gimel/gimel-0001.png b/examples/demo_candidate/demo_writer_0001/letters/gimel/gimel-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..cf8c9e8ad8ece2fcfd2e24d328ea4697a74326c0 GIT binary patch literal 120 zcmeAS@N?(olHy`uVBq!ia0vp^IzVj10VEihjML13l)a~mV@SoVw^t4M7!-J%F5dfp z$ts+C<;yg?&)zo_t~oQ=99w97WS2A#Tl2x0+X~}iy7^yya5dFiaYWSg$Mp8Qde52i TyVj|RfsFKY^>bP0l+XkK=lUng literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/gimel/gimel-0002.png b/examples/demo_candidate/demo_writer_0001/letters/gimel/gimel-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..7340a642441287ce059fdf17f1be4715fe2095c4 GIT binary patch literal 128 zcmeAS@N?(olHy`uVBq!ia0vp^1|ZA#!)f;ALV2%)&L6lUyx}WDO)dM0;4fd* a|9@ezSCk5G3!f$lGTPJC&t;ucLK6VP3M~l$ literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/he/he-0001.png b/examples/demo_candidate/demo_writer_0001/letters/he/he-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..a61126cc5f1b75488b3dd24fbc2d857b6d4dfb89 GIT binary patch literal 106 zcmeAS@N?(olHy`uVBq!ia0vp^MnJ5^0VEi%TnwKLqzpY>978H@CI9d@N|3nfv`&oe zPy<&KhgJK-{~xX$I$*FWHRSY)X@R;S8i{V%hM%@FFf0-7xV3GPtrf@+Pgg&ebxsLQ E05F;&lmGw# literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/he/he-0002.png b/examples/demo_candidate/demo_writer_0001/letters/he/he-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..e9725835bbfca3cd400f2db46c31b2eacbba3e0e GIT binary patch literal 103 zcmeAS@N?(olHy`uVBq!ia0vp^dLYaJBp8Y=mAnRVJY5_^DsCnJ@E1yu2=#NYWL9+W zf66k)X3vzvt!`8Mwd6zXR?U%mdM4wnd+t+)pPwC-rd(RP3S@$(tDnm{r-UW|c@7~4 literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/mem/mem-0001.png b/examples/demo_candidate/demo_writer_0001/letters/mem/mem-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..9a8e7dfaf11a9b6803b4a4d61c56c7221f9e4a8d GIT binary patch literal 105 zcmeAS@N?(olHy`uVBq!ia0vp^MnJ5?0VEihI<~X{DFaUz$B>F!$v^y!5+vqDNpLW{ z%20eTM{#LuV`9kZ71IKB7e~1*5X%Y|JG{4Q=|V9E$B?Ca4cBZ~2r|Rd)z4*}Q$iB} DjoKhg literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/mem/mem-0002.png b/examples/demo_candidate/demo_writer_0001/letters/mem/mem-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..6f9621dc418d1367ceffa2a006eb48ba8d45f9fc GIT binary patch literal 112 zcmeAS@N?(olHy`uVBq!ia0vp^WF!$v^zX5+ts+xl1z} zCM?)0_;7LK^)>;St7fZmLvF9w7MSZCDc$?Z+(`T3#b-QMw=y!czL|6{^}f<6kWrql KelF{r5}E+snkBse literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/nun/nun-0001.png b/examples/demo_candidate/demo_writer_0001/letters/nun/nun-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..ce6e38b323dc38cb805605e2538be70b73ca1c40 GIT binary patch literal 154 zcmeAS@N?(olHy`uVBq!ia0vp^IzVj10VEihjML13RHCPgV@SoVw^t4M4g_#81-z?& z`ERRMg3tE4nem5Q4j6Of9TNE2=lcE8(e7%SnOAR{S)AE=@MP4S6-SRs#jHJcM2jbl z_1Uy!Kg+ObiJ6?&wk0JSw{7OEmfpDhX@lDJZ-?L88E5@YzbnpW539sm7* z86%z08{>LWbKUX<48o^<{)n(WXE}0GuI7W>3%zx_AMTb;*foPuOg*ubD{NUpInO~w z6?Xwyhce!_0#UE<1!a<1(;hyFdb}(3eO;@y{dIO#;d$E*zcsxBveeVn&t;ucLK6V} CgEe*l literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/pe/pe-0001.png b/examples/demo_candidate/demo_writer_0001/letters/pe/pe-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..95f0c0e2cf518b3296fed050e1c1c02d50cf3923 GIT binary patch literal 113 zcmeAS@N?(olHy`uVBq!ia0vp^20(1W0VEh+{&?aGq%1sL978H@CI9eON|3lICGmk- zE91cX2@h8{HYSD~UeU&Q*&#_x)ya0F-!J=BV3Ee_20>BA3|Cd|OMO`BPgg&e IbxsLQ0D5jG5&!@I literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/pe/pe-0002.png b/examples/demo_candidate/demo_writer_0001/letters/pe/pe-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..ce8a2df747358c197e19e47537a8a2c50fb30537 GIT binary patch literal 112 zcmeAS@N?(olHy`uVBq!ia0vp^IzVi|0VEjwe;>C6Qs$m6jv*Ddl7IL!B}f={Nq%73 zrFram$S)aP;&?bnif($${I%gu1yPcUeUPFg6)C{I^E Jmvv4FO#qhlA+Z1e literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/resh/resh-0001.png b/examples/demo_candidate/demo_writer_0001/letters/resh/resh-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..0491d5c186fe4397a1e7a567a88697365d5a4872 GIT binary patch literal 96 zcmeAS@N?(olHy`uVBq!ia0vp^dO)nh0VEjgrLH~zQtF;Ajv*Ddl7ILMB}j;!lKjBf uHT7}vp%tG3pDsS-KDA#v?bnhIwOb`KyC~|KZ}TE4HiqTKrh1DnGs}RC@O1TaS?83{1OSS{ B9_au8 literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/shin/shin-0001.png b/examples/demo_candidate/demo_writer_0001/letters/shin/shin-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..bcc7d1ed69a34b9013b2ae321903c24ec3afbbd6 GIT binary patch literal 109 zcmeAS@N?(olHy`uVBq!ia0vp^WF!$v^zX5+ts+xl1!E zDo+sMk;;`g){`ZD)ofL6$n6!|0&|&H_bMp(-_9s*`*n+fe=bk)*`BHFAaguj{an^L HB{Ts5-zOlK literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/shin/shin-0002.png b/examples/demo_candidate/demo_writer_0001/letters/shin/shin-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..f6481921f890f8a16984441cdaffe44a3c2d4872 GIT binary patch literal 112 zcmeAS@N?(olHy`uVBq!ia0vp^MnG)B0VEjS@|qL?DRWO3$B>F!$v^y!5+trNB^y1s z+&@9KD(V8S#8snJsUfFVObgUq9L2U%>`z0AuEnd`pshVerU^0helGhZ((!9G$S6-& KKbLh*2~7ajF(sY= literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/shin/shin-0003.png b/examples/demo_candidate/demo_writer_0001/letters/shin/shin-0003.png new file mode 100644 index 0000000000000000000000000000000000000000..3e80489800bca6b4a335af3710b0da740f45341a GIT binary patch literal 108 zcmeAS@N?(olHy`uVBq!ia0vp^7C@}S0VEhiSE={|DPvC;$B>F!$v^zn5+o9(Bspd_ ztw|`zJYtX$@_5Cuz~04D?xuZNOXj&XHN^BP@3`tH#K6G$VaAOq@F!$v^y+5+p84NpLhX zUTC=gcCnP3X-}5KRRbQA2JiijOBkd78r(4H347!0&CIa&n7)$|L-q-fF`lk|F6*2U FngF9SA1(j@ literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/vav/vav-0001.png b/examples/demo_candidate/demo_writer_0001/letters/vav/vav-0001.png new file mode 100644 index 0000000000000000000000000000000000000000..6de868c97929b8620b69146f13792d1c81812c69 GIT binary patch literal 95 zcmeAS@N?(olHy`uVBq!ia0vp^azJdr0VEid|4y|4Qfi(qjv*Ddl7IAz8}M*uvau*Q pOD$qqe|PaE5Ih`Sl_6GrgMnekerJ{@?o3^fPES`qmvv4FO#m`a8oB@g literal 0 HcmV?d00001 diff --git a/examples/demo_candidate/demo_writer_0001/letters/vav/vav-0002.png b/examples/demo_candidate/demo_writer_0001/letters/vav/vav-0002.png new file mode 100644 index 0000000000000000000000000000000000000000..0db6af56bc98d54d7fecb3731e10658c8efaba70 GIT binary patch literal 96 zcmeAS@N?(olHy`uVBq!ia0vp^GC*v?0VEjq%w!h^QtF;Ajv*Ddl7IBG8}P(=vRN!x rr4quJ$KLDJ2ZVg?Cl>~Po2bRf@O8e5avpQZ6_8d>S3j3^P6R0VEi7&Q28pQJyZ2Ar-fhfB1_eNc=e^QNSdn xb@-tY+oC-Nt8zkauGkcqIz>Y|$kk-(6h;QSm|s2#u{jDL{hqFVF6*2UngHu=9M}K= literal 0 HcmV?d00001 diff --git a/pyproject.toml b/pyproject.toml index be3f9e3..b563533 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,8 @@ select = [ [tool.ruff.lint.per-file-ignores] "tests/*" = ["B"] +# reviewer.py uses Hebrew characters as intentional dict keys — RUF001 false positives. +"src/hletterscriptgen/reviewer.py" = ["RUF001"] [tool.mypy] python_version = "3.11" diff --git a/scripts/make_demo_candidate.py b/scripts/make_demo_candidate.py new file mode 100644 index 0000000..02720d1 --- /dev/null +++ b/scripts/make_demo_candidate.py @@ -0,0 +1,506 @@ +#!/usr/bin/env python3 +"""Generate a synthetic demo release candidate for reviewing with the review UI. + +Creates: + + examples/demo_candidate/ + demo_writer_0001/ + letter_set.json — schema-valid letter_set.v1 document + letters/ + / + .png — synthetic glyph images (cv2-rendered) + +Usage:: + + python3 scripts/make_demo_candidate.py [--output DIR] + +Requires the ``cv`` extra (opencv-python-headless):: + + pip install -e ".[cv]" +""" +from __future__ import annotations + +import argparse +import hashlib +import json +import struct +import sys +import zlib +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +DEFAULT_OUT = REPO_ROOT / "examples" / "demo_candidate" + +# --------------------------------------------------------------------------- +# Minimal stdlib-only PNG encoder +# --------------------------------------------------------------------------- + + +def _png_from_pixels(pixels: list[list[int]]) -> bytes: + """Encode a 2-D list of 0-255 grayscale values as a PNG (stdlib only).""" + height = len(pixels) + width = len(pixels[0]) if height else 0 + + # Each row is prefixed by a filter byte (0 = None). + raw = b"".join(b"\x00" + bytes(row) for row in pixels) + compressed = zlib.compress(raw, 9) + + def chunk(tag: bytes, data: bytes) -> bytes: + crc = zlib.crc32(tag + data) & 0xFFFF_FFFF + return struct.pack(">I", len(data)) + tag + data + struct.pack(">I", crc) + + # IHDR: width(4) height(4) bit-depth(1) color-type(1=grayscale=0) + # compression(1=0) filter(1=0) interlace(1=0) + ihdr_data = struct.pack(">II", width, height) + bytes([8, 0, 0, 0, 0]) + + return ( + b"\x89PNG\r\n\x1a\n" + + chunk(b"IHDR", ihdr_data) + + chunk(b"IDAT", compressed) + + chunk(b"IEND", b"") + ) + + +# --------------------------------------------------------------------------- +# Letter shape primitives +# --------------------------------------------------------------------------- + +WHITE = 255 +BLACK = 0 + + +def _blank(w: int, h: int) -> list[list[int]]: + return [[WHITE] * w for _ in range(h)] + + +def _hline(px: list[list[int]], y: int, x0: int, x1: int, t: int = 2) -> None: + """Draw a horizontal line.""" + h, w = len(px), len(px[0]) + for dy in range(t): + row = y + dy + if 0 <= row < h: + for x in range(max(0, x0), min(w, x1 + 1)): + px[row][x] = BLACK + + +def _vline(px: list[list[int]], x: int, y0: int, y1: int, t: int = 2) -> None: + """Draw a vertical line.""" + h, w = len(px), len(px[0]) + for dx in range(t): + col = x + dx + if 0 <= col < w: + for y in range(max(0, y0), min(h, y1 + 1)): + px[y][col] = BLACK + + +def _diag(px: list[list[int]], x0: int, y0: int, x1: int, y1: int, t: int = 2) -> None: + """Draw a straight line via integer Bresenham.""" + dx = abs(x1 - x0) + dy = abs(y1 - y0) + sx = 1 if x1 > x0 else -1 + sy = 1 if y1 > y0 else -1 + h, w = len(px), len(px[0]) + err = dx - dy + x, y = x0, y0 + while True: + for ox in range(t): + for oy in range(t): + nx, ny = x + ox, y + oy + if 0 <= nx < w and 0 <= ny < h: + px[ny][nx] = BLACK + if x == x1 and y == y1: + break + e2 = 2 * err + if e2 > -dy: + err -= dy + x += sx + if e2 < dx: + err += dx + y += sy + + +# --------------------------------------------------------------------------- +# Per-letter shape generators +# Any function here accepts (width, height, variant_index) and returns +# a PNG bytes object. +# --------------------------------------------------------------------------- + +def _shape_alef(w: int, h: int, v: int) -> bytes: + """Alef (א): diagonal cross + horizontal bar.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + # Main diagonal top-right to bottom-left + _diag(px, w - w // 5, h // 6, w // 5, h - h // 5, t) + # Left fork: bottom-left up to centre + _diag(px, w // 5, h - h // 5, w // 3, h // 2, t) + # Right fork: top-right down to centre + _diag(px, w - w // 5, h // 6, 2 * w // 3, h // 2, t) + # Small horizontal bar at centre-left + _hline(px, h // 2, w // 4, w // 2, t) + return _png_from_pixels(px) + + +def _shape_bet(w: int, h: int, v: int) -> bytes: + """Bet (ב): top horizontal + right vertical + bottom bar.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 6, w // 6, w - w // 6, t) # top + _vline(px, w - w // 5, h // 6, h - h // 5, t) # right + _hline(px, h - h // 5, w // 5, w - w // 5, t) # bottom + # Tiny left foot + _vline(px, w // 6, h // 6, h // 3, t) + return _png_from_pixels(px) + + +def _shape_gimel(w: int, h: int, v: int) -> bytes: + """Gimel (ג): right vertical + top horizontal + right-down hook.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _vline(px, w - w // 4, h // 6, h - h // 4, t) # right vertical + _hline(px, h // 6, w // 5, w - w // 4, t) # top horizontal + # Hook at bottom-right going down-left + _diag(px, w - w // 4, h - h // 4, w // 2, h - h // 8, t) + return _png_from_pixels(px) + + +def _shape_dalet(w: int, h: int, v: int) -> bytes: + """Dalet (ד): top horizontal + right vertical (no foot).""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 6, w // 6, w - w // 6, t) # top + _vline(px, w - w // 5, h // 6, h - h // 5, t) # right + return _png_from_pixels(px) + + +def _shape_he(w: int, h: int, v: int) -> bytes: + """He (ה): dalet + detached left vertical.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 6, w // 6, w - w // 6, t) + _vline(px, w - w // 5, h // 6, h - h // 5, t) + # Detached left vertical (doesn't touch top) + _vline(px, w // 5, h // 3, h - h // 5, t) + return _png_from_pixels(px) + + +def _shape_vav(w: int, h: int, v: int) -> bytes: + """Vav (ו): short cap + descending vertical.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + # Cap + _hline(px, h // 8, w // 3, 2 * w // 3, t) + _vline(px, w // 2 - t // 2, h // 8, 5 * h // 6, t) + return _png_from_pixels(px) + + +def _shape_zayin(w: int, h: int, v: int) -> bytes: + """Zayin (ז): top bar (long) + short descender from right.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 6, 5 * w // 6, t + 1) # wide cap + _vline(px, 3 * w // 4, h // 8, 5 * h // 6, t) # right descender + return _png_from_pixels(px) + + +def _shape_mem(w: int, h: int, v: int) -> bytes: + """Mem (מ): closed square with left opening.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 5, w - w // 5, t) # top + _vline(px, w - w // 5, h // 8, h - h // 5, t) # right + _hline(px, h - h // 5, w // 5, w - w // 5, t) # bottom + _vline(px, w // 5, h // 4, h - h // 5, t) # left (partial, open at top) + return _png_from_pixels(px) + + +def _shape_nun(w: int, h: int, v: int) -> bytes: + """Nun (נ): hook with descender.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 4, 3 * w // 4, t) + _vline(px, 3 * w // 4, h // 8, h // 2, t) + # Descending diagonal from hook + _diag(px, 3 * w // 4, h // 2, w // 4, h - h // 8, t) + return _png_from_pixels(px) + + +def _shape_samekh(w: int, h: int, v: int) -> bytes: + """Samekh (ס): closed rectangle.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 6, 5 * w // 6, t) + _hline(px, h - h // 8, w // 6, 5 * w // 6, t) + _vline(px, w // 6, h // 8, h - h // 8, t) + _vline(px, 5 * w // 6 - t, h // 8, h - h // 8, t) + return _png_from_pixels(px) + + +def _shape_ayin(w: int, h: int, v: int) -> bytes: + """Ayin (ע): two diagonals meeting at bottom.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + cx = w // 2 + bot = h - h // 8 + _diag(px, w // 6, h // 8, cx, bot, t) + _diag(px, 5 * w // 6, h // 8, cx, bot, t) + return _png_from_pixels(px) + + +def _shape_pe(w: int, h: int, v: int) -> bytes: + """Pe (פ): circular top + descender.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 5, 4 * w // 5, t) + _vline(px, 4 * w // 5, h // 8, h // 2, t) + _hline(px, h // 2, w // 4, 4 * w // 5, t) + _vline(px, w // 4, h // 4, h - h // 8, t) + return _png_from_pixels(px) + + +def _shape_resh(w: int, h: int, v: int) -> bytes: + """Resh (ר): top bar + right vertical descender.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 5, 4 * w // 5, t) + _vline(px, 4 * w // 5, h // 8, h - h // 8, t) + return _png_from_pixels(px) + + +def _shape_shin(w: int, h: int, v: int) -> bytes: + """Shin (ש): three prongs.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + bot = h - h // 6 + _vline(px, w // 5, h // 8, bot, t) + _vline(px, w // 2 - t // 2, h // 5, bot, t) + _vline(px, 4 * w // 5, h // 8, bot, t) + _hline(px, bot, w // 5, 4 * w // 5, t) + return _png_from_pixels(px) + + +def _shape_tav(w: int, h: int, v: int) -> bytes: + """Tav (ת): top bar + left & right descenders, right foot.""" + px = _blank(w, h) + t = max(1, 2 + (v % 2)) + _hline(px, h // 8, w // 6, 5 * w // 6, t) + _vline(px, w // 6, h // 8, h - h // 5, t) + _vline(px, 5 * w // 6, h // 8, h - h // 3, t) + _hline(px, h - h // 3, 5 * w // 6, 5 * w // 6 + t + 3, t) # right foot + return _png_from_pixels(px) + + +# Mapping: Unicode char → shape function +_SHAPES = { + "א": _shape_alef, + "ב": _shape_bet, + "ג": _shape_gimel, + "ד": _shape_dalet, + "ה": _shape_he, + "ו": _shape_vav, + "ז": _shape_zayin, + "מ": _shape_mem, + "נ": _shape_nun, + "ס": _shape_samekh, + "ע": _shape_ayin, + "פ": _shape_pe, + "ר": _shape_resh, + "ש": _shape_shin, + "ת": _shape_tav, +} + +# (letter_char, letter_name, variants): each variant is a (width, height) tuple. +_DEMO_LETTERS: list[tuple[str, str, list[tuple[int, int]]]] = [ + ("א", "alef", [(48, 56), (44, 52), (52, 60)]), + ("ב", "bet", [(50, 40), (46, 44)]), + ("ג", "gimel", [(44, 50), (48, 48)]), + ("ד", "dalet", [(46, 40)]), + ("ה", "he", [(50, 42), (46, 46)]), + ("ו", "vav", [(30, 48), (28, 52)]), + ("ז", "zayin", [(38, 44)]), + ("מ", "mem", [(50, 44), (54, 48)]), + ("נ", "nun", [(44, 50), (40, 48)]), + ("ס", "samekh", [(46, 46)]), + ("ע", "ayin", [(50, 50), (46, 48)]), + ("פ", "pe", [(48, 52), (44, 48)]), + ("ר", "resh", [(46, 44), (42, 46)]), + ("ש", "shin", [(54, 48), (50, 52), (56, 44)]), + ("ת", "tav", [(50, 46), (48, 50)]), +] + + +# --------------------------------------------------------------------------- +# Ink-ratio computation (mirrors extractor.compute_ink_ratio) +# --------------------------------------------------------------------------- + + +def _ink_ratio(png_bytes: bytes) -> float: + """Parse a grayscale PNG and compute ink fraction (pixels < 128 / total).""" + import zlib as _zlib + import struct as _struct + + data = png_bytes + + def _read_chunk(pos: int) -> tuple[bytes, bytes, int]: + length = _struct.unpack_from(">I", data, pos)[0] + tag = data[pos + 4 : pos + 8] + chunk_data = data[pos + 8 : pos + 8 + length] + return tag, chunk_data, pos + 12 + length + + # Parse IHDR + pos = 8 # skip signature + tag, ihdr, pos = _read_chunk(pos) + width, height = _struct.unpack_from(">II", ihdr) + + # Collect IDAT chunks + idat_raw = b"" + while pos < len(data): + tag, cdata, pos = _read_chunk(pos) + if tag == b"IDAT": + idat_raw += cdata + elif tag == b"IEND": + break + + raw = _zlib.decompress(idat_raw) + # Each row: 1 filter byte + width bytes + ink = 0 + total = width * height + for row in range(height): + row_data = raw[row * (width + 1) + 1 : row * (width + 1) + 1 + width] + ink += sum(1 for b in row_data if b < 128) + return ink / total if total else 0.0 + + +# --------------------------------------------------------------------------- +# Release candidate builder +# --------------------------------------------------------------------------- + + +def _letter_set_name(name: str) -> str: + """Convert letter name to letter_set asset path fragment.""" + return name.replace("_", "-") + + +def build_demo_candidate(out_dir: Path) -> Path: + """Build the demo release candidate tree under *out_dir*. + + Returns the path to the ``letter_set.json`` file. + """ + writer_id = "demo_writer_0001" + writer_dir = out_dir / writer_id + letters_dir = writer_dir / "letters" + + letters_dict: dict[str, list[dict]] = {} + + for char, name, size_list in _DEMO_LETTERS: + shape_fn = _SHAPES.get(char) + if shape_fn is None: + print(f" skip {char} (no shape defined)") + continue + + variants: list[dict] = [] + for i, (w, h) in enumerate(size_list, start=1): + png_bytes = shape_fn(w, h, i - 1) + ink = _ink_ratio(png_bytes) + sha = hashlib.sha256(png_bytes).hexdigest() + + # Asset path relative to letter_set.json + fname = f"{name}-{i:04d}" + asset_rel = f"letters/{_letter_set_name(name)}/{fname}.png" + img_path = writer_dir / asset_rel + img_path.parent.mkdir(parents=True, exist_ok=True) + img_path.write_bytes(png_bytes) + + scan_entry = f"demo__manuscript_scan__p{i:04d}" + variants.append({ + "variant_id": f"{name}-{i:04d}", + "asset_path": asset_rel, + "checksum_sha256": sha, + "image": {"width_px": w, "height_px": h, "format": "png"}, + "quality": {"ink_ratio": round(ink, 4)}, + "source": { + "scan_entry_id": scan_entry, + "scan_url": f"https://example.invalid/scans/{scan_entry}", + "license": "PDM-1.0", + "rights_evidence": "Demo fixture — synthetic glyph, no real provenance.", + "bbox_in_source": {"x": 50 + i * 12, "y": 80 + i * 8, "width": w, "height": h}, + }, + "extracted_at": "2026-05-25T00:00:00Z", + "notes": f"Synthetic demo glyph (variant {i}, {w}×{h}px).", + }) + + letters_dict[char] = variants + print(f" {char} ({name}): {len(variants)} variant(s)") + + # Collect unique scan entry IDs and licenses + source_entry_ids = sorted({ + v["source"]["scan_entry_id"] + for vlist in letters_dict.values() + for v in vlist + }) + licenses = sorted({ + v["source"]["license"] + for vlist in letters_dict.values() + for v in vlist + }) + + letter_set = { + "schema_version": "letter_set.v1", + "writer_id": writer_id, + "writer_label": "Demo Writer (synthetic glyphs — not a real person)", + "writer_provenance": { + "source_repo": "HeOCR/public-domain-hand-written-hebrew-scans", + "source_entry_ids": source_entry_ids, + "attribution_method": "fixture", + "notes": "Synthetic demo generated by scripts/make_demo_candidate.py.", + }, + "generator": { + "name": "hletterscriptgen", + "version": "0.1.0.dev0", + "config_hash": "0" * 64, + }, + "generated_at": "2026-05-25T00:00:00Z", + "upstream": { + "repo": "HeOCR/public-domain-hand-written-hebrew-scans", + "revision": "0" * 40, + }, + "letters": letters_dict, + "license_summary": { + "licenses": licenses, + "notes": "All variants are synthetic demo fixtures under PDM-1.0.", + }, + } + + ls_path = writer_dir / "letter_set.json" + ls_path.write_text( + json.dumps(letter_set, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + return ls_path + + +def main() -> None: + ap = argparse.ArgumentParser(description="Generate a synthetic demo release candidate.") + ap.add_argument( + "--output", + type=Path, + default=DEFAULT_OUT, + metavar="DIR", + help=f"Output directory (default: {DEFAULT_OUT.relative_to(REPO_ROOT)})", + ) + args = ap.parse_args() + + out_dir = args.output + print(f"Generating demo release candidate in: {out_dir}") + ls_path = build_demo_candidate(out_dir) + total = sum( + len(v) for v in json.loads(ls_path.read_text())["letters"].values() + ) + print(f"\nWrote {ls_path}") + print(f"Total: {total} synthetic variants across {len(_DEMO_LETTERS)} letters") + print() + print("To review:") + print(f" hletterscriptgen review {ls_path}") + + +if __name__ == "__main__": + main() diff --git a/src/hletterscriptgen/cli.py b/src/hletterscriptgen/cli.py index 6214da1..78b5a12 100644 --- a/src/hletterscriptgen/cli.py +++ b/src/hletterscriptgen/cli.py @@ -132,6 +132,33 @@ def _build_parser() -> argparse.ArgumentParser: help="Output format (default: json).", ) + review_p = sub.add_parser( + "review", + help="Serve a local browser-based review UI for a letter_set.json file.", + ) + review_p.add_argument( + "path", + type=Path, + help="Path to a letter_set.json file produced by 'generate'.", + ) + review_p.add_argument( + "--port", + type=int, + default=8765, + metavar="N", + help="Local port to serve on (default: 8765).", + ) + review_p.add_argument( + "--feedback", + type=Path, + default=None, + metavar="FILE", + help=( + "Path to the feedback JSON file (read on load, written on save). " + "Defaults to .review_feedback.json next to the letter-set file." + ), + ) + return parser @@ -275,6 +302,17 @@ def _cmd_scan_blobs(args: argparse.Namespace) -> int: return EXIT_OK +def _cmd_review(args: argparse.Namespace) -> int: + from hletterscriptgen.reviewer import serve + + try: + serve(args.path, port=args.port, feedback_path=args.feedback) + except (FileNotFoundError, ValueError) as exc: + print(str(exc), file=sys.stderr) + return EXIT_INPUT_ERROR + return EXIT_OK + + def main(argv: list[str] | None = None) -> int: parser = _build_parser() args = parser.parse_args(argv) @@ -291,5 +329,7 @@ def main(argv: list[str] | None = None) -> int: return _cmd_check_eligible(args) if args.command == "scan-blobs": return _cmd_scan_blobs(args) + if args.command == "review": + return _cmd_review(args) parser.error(f"unknown command: {args.command}") diff --git a/src/hletterscriptgen/reviewer.py b/src/hletterscriptgen/reviewer.py new file mode 100644 index 0000000..b0f4421 --- /dev/null +++ b/src/hletterscriptgen/reviewer.py @@ -0,0 +1,733 @@ +"""Local browser-based review server for a letter_set.v1 release candidate. + +Serves a single-page review UI that lets you scroll through every variant in a +``letter_set.json``, mark each one as *accepted*, *rejected*, or *changes +requested*, add free-text comments, and persist the feedback to a JSON file. + +Usage via the CLI:: + + hletterscriptgen review path/to/letter_set.json [--port 8765] + +Usage as a library:: + + from hletterscriptgen.reviewer import serve + from pathlib import Path + serve(Path("out/my_writer/letter_set.json"), port=8765) + +The feedback file (``.review_feedback.json`` next to the letter-set by +default, or the path passed as ``--feedback``) is auto-created on the first +``POST /feedback`` and read back on page load so a review session can be +resumed. +""" + +from __future__ import annotations + +import base64 +import http.server +import json +import threading +import webbrowser +from pathlib import Path +from typing import Any + +_FEEDBACK_FILENAME = ".review_feedback.json" + +_LETTER_NAMES: dict[str, str] = { + "א": "Alef", + "ב": "Bet", + "ג": "Gimel", + "ד": "Dalet", + "ה": "He", + "ו": "Vav", + "ז": "Zayin", + "ח": "Het", + "ט": "Tet", + "י": "Yod", + "כ": "Kaf", + "ך": "Kaf (final)", + "ל": "Lamed", + "מ": "Mem", + "ם": "Mem (final)", + "נ": "Nun", + "ן": "Nun (final)", + "ס": "Samekh", + "ע": "Ayin", + "פ": "Pe", + "ף": "Pe (final)", + "צ": "Tsadi", + "ץ": "Tsadi (final)", + "ק": "Qof", + "ר": "Resh", + "ש": "Shin", + "ת": "Tav", +} + + +# --------------------------------------------------------------------------- +# HTML template pieces (pure strings — no f-string so curly-braces are safe) +# --------------------------------------------------------------------------- + +_CSS = """ +*,*::before,*::after{box-sizing:border-box;margin:0;padding:0} +body{font-family:system-ui,-apple-system,sans-serif;font-size:14px; +background:#f0f2f5;color:#1a1a2e;display:flex;flex-direction:column;min-height:100vh} +code{font-size:.82em;background:#f0f0f0;padding:1px 4px;border-radius:3px} + +/* top header */ +.top-header{background:#1a1a2e;color:#fff;padding:.75rem 1.5rem; + display:flex;align-items:center;gap:1rem;flex-wrap:wrap; + position:sticky;top:0;z-index:100;box-shadow:0 2px 8px rgba(0,0,0,.35)} +.top-header h1{font-size:1rem;font-weight:600;white-space:nowrap} +.subtitle{font-size:.78rem;color:#aaa;margin-top:1px} +.progress-wrap{flex:1;min-width:180px} +.progress-label{font-size:.73rem;color:#bbb;margin-bottom:3px} +.progress-bar{height:6px;background:#333;border-radius:3px;overflow:hidden} +.progress-fill{height:100%;background:#4caf50;border-radius:3px;transition:width .3s} +.header-actions{display:flex;gap:.5rem} +.hdr-btn{padding:.3rem .75rem;font-size:.78rem;border-radius:4px; + cursor:pointer;border:none;font-weight:500} +.btn-export{background:#4a90d9;color:#fff} +.btn-export:hover{background:#357ab8} +.btn-accept-all{background:#2e7d32;color:#fff} +.btn-accept-all:hover{background:#1b5e20} + +/* layout */ +.main-layout{display:flex;flex:1} + +/* sidebar */ +.sidebar{width:195px;min-width:195px;background:#fff;border-right:1px solid #dde; + position:sticky;top:53px;height:calc(100vh - 53px);overflow-y:auto; + padding:.75rem .4rem} +.sidebar-title{font-size:.68rem;text-transform:uppercase;letter-spacing:.08em; + color:#aaa;padding:.25rem .6rem .5rem} +.letter-nav-item{display:flex;align-items:center;gap:.4rem;padding:.38rem .6rem; + border-radius:5px;text-decoration:none;color:inherit;margin-bottom:1px; + transition:background .12s;cursor:pointer} +.letter-nav-item:hover{background:#f0f2f8} +.letter-nav-item.active{background:#e8edf8;color:#1a3a7a;font-weight:600} +.lni-char{font-size:1.15rem;min-width:22px;text-align:center;direction:rtl} +.lni-name{flex:1;font-size:.78rem;color:#666;overflow:hidden; + white-space:nowrap;text-overflow:ellipsis} +.lni-count{font-size:.68rem;background:#eee;color:#777; + padding:1px 5px;border-radius:8px;flex-shrink:0} +.lni-dots{display:flex;gap:2px;margin-left:2px;flex-shrink:0} +.dot{width:7px;height:7px;border-radius:50%;background:#ccc;display:inline-block} +.dot.accept{background:#4caf50} +.dot.reject{background:#f44336} +.dot.changes{background:#ff9800} + +/* content */ +.content{flex:1;padding:1.25rem 1.5rem;min-width:0} + +/* letter sections */ +.letter-section{margin-bottom:2.25rem} +.letter-section-header{display:flex;align-items:center;gap:.75rem; + margin-bottom:1rem;padding-bottom:.5rem;border-bottom:2px solid #d0d4e8} +.lsh-char{font-size:2rem;direction:rtl;color:#1a1a2e;line-height:1} +.lsh-name{font-size:1.1rem;font-weight:600;color:#1a1a2e} +.lsh-count{font-size:.78rem;color:#888;background:#eee; + padding:2px 8px;border-radius:10px} + +/* variant cards */ +.variant-card{background:#fff;border-radius:8px;padding:1rem; + margin-bottom:.85rem;box-shadow:0 1px 3px rgba(0,0,0,.08); + border:2px solid transparent;transition:border-color .15s} +.variant-card.verdict-accept{border-color:#4caf50} +.variant-card.verdict-reject{border-color:#f44336} +.variant-card.verdict-changes{border-color:#ff9800} +.variant-card.dirty{border-style:dashed} + +.card-header{display:flex;align-items:center;gap:.6rem; + margin-bottom:.75rem;flex-wrap:wrap} +.card-id{font-family:monospace;font-size:.83rem;color:#555} +.card-letter{font-size:.88rem;color:#444;direction:rtl} +.quality-badge{font-size:.7rem;padding:2px 7px;border-radius:10px; + font-weight:600;margin-left:auto;flex-shrink:0} +.quality-ok{background:#e8f5e9;color:#2e7d32} +.quality-warn{background:#fff8e1;color:#e65100} +.quality-low{background:#fce4ec;color:#c62828} +.verdict-badge{font-size:.72rem;padding:2px 8px;border-radius:10px; + font-weight:600;flex-shrink:0} +.vb-accept{background:#4caf50;color:#fff} +.vb-reject{background:#f44336;color:#fff} +.vb-changes{background:#ff9800;color:#fff} + +.card-body{display:flex;gap:1.25rem;flex-wrap:wrap} + +.card-image{display:flex;flex-direction:column;align-items:center; + gap:.4rem;min-width:80px} +.glyph-img{image-rendering:pixelated;max-width:180px;min-width:48px; + border:1px solid #ddd;background:#fff;width:auto;height:auto} +.glyph-missing{width:80px;height:80px;background:#f5f5f5;border:1px dashed #ccc; + display:flex;align-items:center;justify-content:center;font-size:.7rem; + color:#aaa;text-align:center;padding:.5rem;border-radius:4px} +.image-dims{font-size:.68rem;color:#aaa} + +.card-meta{flex:1;min-width:180px} +.meta-table{border-collapse:collapse;width:100%;font-size:.8rem} +.meta-table th{text-align:left;color:#999;padding:2px 8px 2px 0; + white-space:nowrap;font-weight:500;vertical-align:top} +.meta-table td{padding:2px 0;color:#333;word-break:break-all} + +.card-review{flex:1;min-width:210px;display:flex;flex-direction:column;gap:.5rem} +.verdict-btns{display:flex;gap:.4rem;flex-wrap:wrap} +.verdict-btn{flex:1;padding:.38rem .5rem;font-size:.8rem; + border:2px solid transparent;border-radius:5px;cursor:pointer; + font-weight:500;background:#f5f5f5;color:#333;transition:all .15s; + min-width:80px} +.verdict-btn:hover{transform:translateY(-1px);box-shadow:0 2px 5px rgba(0,0,0,.15)} +.btn-accept{border-color:#4caf50} +.btn-accept:hover,.btn-accept.active{background:#4caf50;color:#fff} +.btn-reject{border-color:#f44336} +.btn-reject:hover,.btn-reject.active{background:#f44336;color:#fff} +.btn-changes{border-color:#ff9800} +.btn-changes:hover,.btn-changes.active{background:#ff9800;color:#fff} + +.comment-box{width:100%;font-size:.8rem;border:1px solid #ddd;border-radius:4px; + padding:.38rem .5rem;resize:vertical;font-family:inherit;color:#333} +.comment-box:focus{outline:none;border-color:#4a90d9} +.card-actions{display:flex;align-items:center;gap:.75rem} +.save-btn{padding:.3rem .9rem;font-size:.78rem;border:none;border-radius:4px; + background:#4a90d9;color:#fff;cursor:pointer;font-weight:500} +.save-btn:hover{background:#357ab8} +.saved-ok{font-size:.73rem;color:#4caf50} + +.variant-card.highlight{animation:hl .8s ease-out} +@keyframes hl{0%{box-shadow:0 0 0 4px #4a90d9}100%{box-shadow:none}} + +#toast{position:fixed;bottom:1.5rem;right:1.5rem;background:#222;color:#fff; + padding:.45rem 1rem;border-radius:6px;font-size:.8rem;z-index:999; + opacity:0;transition:opacity .25s;pointer-events:none} +#toast.show{opacity:1} +""" + +# Note: all JS curly braces are literal — this is NOT an f-string. +# Dynamic values are spliced in via .replace() calls in _build_html(). +_SCRIPT = r""" +const ALL_IDS = __ALL_IDS__; +let feedback = {}; +let dirty = new Set(); +let _currentVerdict = {}; + +// --- Feedback persistence --- +async function loadFeedback() { + try { + const r = await fetch('/feedback'); + if (r.ok) { feedback = await r.json(); restoreUI(); } + } catch(e) {} +} + +function restoreUI() { + for (const [vid, fb] of Object.entries(feedback)) { + if (fb.verdict) _applyVerdict(vid, fb.verdict); + const box = document.getElementById('comment-' + vid); + if (box && fb.comment) box.value = fb.comment; + markSaved(vid); + } + updateProgress(); + updateSidebar(); +} + +async function _persistFeedback() { + try { + await fetch('/feedback', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify(feedback), + }); + dirty.clear(); + } catch(e) { showToast('Save failed: ' + e.message, true); } +} + +// --- Verdict --- +function setVerdict(vid, verdict) { + const prev = _currentVerdict[vid]; + if (prev === verdict) { + delete _currentVerdict[vid]; + _applyVerdict(vid, null); + if (feedback[vid]) delete feedback[vid].verdict; + } else { + _currentVerdict[vid] = verdict; + _applyVerdict(vid, verdict); + feedback[vid] = feedback[vid] || {}; + feedback[vid].verdict = verdict; + } + updateProgress(); + updateSidebar(); + dirty.add(vid); +} + +function _applyVerdict(vid, verdict) { + _currentVerdict[vid] = verdict; + const card = document.getElementById('card-' + vid); + if (!card) return; + card.classList.remove('verdict-accept', 'verdict-reject', 'verdict-changes'); + if (verdict) card.classList.add('verdict-' + verdict); + + card.querySelectorAll('.verdict-btn').forEach(b => b.classList.remove('active')); + if (verdict) { + const sel = verdict === 'accept' ? '.btn-accept' + : verdict === 'reject' ? '.btn-reject' : '.btn-changes'; + const btn = card.querySelector(sel); + if (btn) btn.classList.add('active'); + } + + const badge = document.getElementById('verdict-badge-' + vid); + if (badge) { + const labels = {accept: '✅ Accepted', reject: '❌ Rejected', changes: '🔄 Changes'}; + badge.textContent = verdict ? (labels[verdict] || verdict) : ''; + badge.className = 'verdict-badge' + (verdict ? ' vb-' + verdict : ''); + } +} + +function markDirty(vid) { + dirty.add(vid); + const card = document.getElementById('card-' + vid); + if (card && !feedback[vid]?.verdict) card.classList.add('dirty'); +} + +// --- Save card --- +async function saveCard(vid) { + const box = document.getElementById('comment-' + vid); + const comment = (box && box.value.trim()) || null; + feedback[vid] = feedback[vid] || {}; + if (comment) feedback[vid].comment = comment; + else delete feedback[vid].comment; + if (_currentVerdict[vid]) feedback[vid].verdict = _currentVerdict[vid]; + if (!Object.keys(feedback[vid]).length) delete feedback[vid]; + await _persistFeedback(); + markSaved(vid); + const card = document.getElementById('card-' + vid); + if (card) card.classList.remove('dirty'); + showToast('Saved ' + vid); +} + +function markSaved(vid) { + const el = document.getElementById('saved-' + vid); + if (el) { el.textContent = '✓ saved'; setTimeout(() => { if(el) el.textContent=''; }, 2500); } +} + +// --- Progress --- +function updateProgress() { + const reviewed = ALL_IDS.filter(id => feedback[id]?.verdict).length; + const total = ALL_IDS.length; + const pct = total ? (reviewed / total * 100).toFixed(0) : 0; + const lbl = document.getElementById('progress-label'); + const fill = document.getElementById('progress-fill'); + if (lbl) lbl.textContent = reviewed + ' / ' + total + ' reviewed'; + if (fill) fill.style.width = pct + '%'; +} + +// --- Sidebar dots --- +function _letterAnchorId(char) { + const pts = [...char].map(c => 'u' + c.codePointAt(0).toString(16).padStart(4, '0')); + return 'letter-' + pts.join(''); +} +function updateSidebar() { + document.querySelectorAll('.letter-nav-item').forEach(nav => { + const letter = nav.dataset.letter; + const dotsEl = nav.querySelector('.lni-dots'); + if (!dotsEl || !letter) return; + const sid = _letterAnchorId(letter); + const section = document.getElementById(sid); + if (!section) return; + const cards = section.querySelectorAll('.variant-card'); + dotsEl.innerHTML = ''; + cards.forEach(card => { + const vid = card.dataset.variantId; + const v = feedback[vid]?.verdict; + const dot = document.createElement('span'); + dot.className = 'dot' + (v ? ' ' + v : ''); + dotsEl.appendChild(dot); + }); + }); +} + +// --- Accept all unreviewed --- +async function acceptAllUnreviewed() { + const unrev = ALL_IDS.filter(id => !feedback[id]?.verdict); + if (!unrev.length) { showToast('All variants already reviewed'); return; } + unrev.forEach(vid => { + _currentVerdict[vid] = 'accept'; + feedback[vid] = { ...(feedback[vid] || {}), verdict: 'accept' }; + _applyVerdict(vid, 'accept'); + }); + await _persistFeedback(); + updateProgress(); + updateSidebar(); + showToast('Accepted ' + unrev.length + ' unreviewed variant(s)'); +} + +// --- Export --- +function exportFeedback() { + const blob = new Blob([JSON.stringify(feedback, null, 2)], {type: 'application/json'}); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; a.download = 'review_feedback.json'; a.click(); + URL.revokeObjectURL(url); +} + +// --- Toast --- +let _toastTimer = null; +function showToast(msg, err=false) { + const el = document.getElementById('toast'); + if (!el) return; + el.textContent = msg; + el.style.background = err ? '#c00' : '#222'; + el.classList.add('show'); + if (_toastTimer) clearTimeout(_toastTimer); + _toastTimer = setTimeout(() => el.classList.remove('show'), 3200); +} + +// --- Active sidebar highlight on scroll --- +const _io = new IntersectionObserver(entries => { + for (const e of entries) { + if (e.isIntersecting) { + document.querySelectorAll('.letter-nav-item').forEach(n => n.classList.remove('active')); + const nav = document.querySelector('.letter-nav-item[href="#' + e.target.id + '"]'); + if (nav) nav.classList.add('active'); + } + } +}, {rootMargin: '-5% 0px -85% 0px'}); +document.querySelectorAll('.letter-section').forEach(s => _io.observe(s)); + +// Warn on unsaved changes +window.addEventListener('beforeunload', e => { + if (dirty.size > 0) { e.preventDefault(); e.returnValue = ''; } +}); + +loadFeedback(); +""" + + +# --------------------------------------------------------------------------- +# HTML builders +# --------------------------------------------------------------------------- + + +def _letter_anchor(char: str) -> str: + """Stable ASCII anchor for a Hebrew Unicode character, e.g. 'u05d0'.""" + return "".join(f"u{ord(c):04x}" for c in char) + + +def _ink_quality(ink_ratio: float) -> tuple[str, str]: + """(label, css_class) for an ink_ratio in [0, 1].""" + if ink_ratio < 0.08: + return "Very sparse", "quality-low" + if ink_ratio < 0.15: + return "Sparse", "quality-warn" + if ink_ratio <= 0.60: + return "Normal", "quality-ok" + return "Dense", "quality-warn" + + +def _img_data_uri(path: Path) -> str | None: + """Return a base64 data URI for the image, or None if the file is absent.""" + if not path.exists(): + return None + ext = path.suffix.lower().lstrip(".") + mime = {"png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", + "webp": "image/webp", "tiff": "image/tiff"}.get(ext, "image/png") + data = base64.b64encode(path.read_bytes()).decode() + return f"data:{mime};base64,{data}" + + +def _build_variant_card(variant: dict[str, Any], letter_char: str, base_dir: Path) -> str: + vid = variant["variant_id"] + asset_path = base_dir / variant["asset_path"] + uri = _img_data_uri(asset_path) + + w = variant["image"]["width_px"] + h = variant["image"]["height_px"] + fmt = variant["image"]["format"] + ink_ratio: float = variant["quality"]["ink_ratio"] + q_label, q_cls = _ink_quality(ink_ratio) + + source = variant.get("source", {}) + scan_id = source.get("scan_entry_id", "—") + lic = source.get("license", "—") + bbox = source.get("bbox_in_source", {}) + bbox_str = ( + f"x={bbox.get('x')}, y={bbox.get('y')}, " + f"w={bbox.get('width')}, h={bbox.get('height')}" + if bbox else "—" + ) + letter_name = _LETTER_NAMES.get(letter_char, letter_char) + + img_html = ( + f'{letter_char}' + if uri else + '
Image not found
' + ) + + return ( + f'
\n' + f'
\n' + f' {vid}\n' + f' {letter_char} {letter_name}\n' + f' {q_label} ({ink_ratio:.2f})\n' + f' \n' + f'
\n' + f'
\n' + f'
{img_html}' + f'
{w}\xd7{h} px
\n' + f'
\n' + f' \n' + f' \n' + f' \n' + f' \n' + f' \n' + f' \n' + f'
Format{fmt}
Size{w}\xd7{h} px
Ink ratio{ink_ratio:.3f}
Source{scan_id}
License{lic}
Bbox{bbox_str}
\n' + f'
\n' + f'
\n' + f' \n' + f' \n' + f' \n' + f'
\n' + f' \n' + f'
\n' + f' \n' + f' \n' + f'
\n' + f'
\n' + f'
\n' + f'
\n' + ) + + +def _build_sidebar(letter_set: dict[str, Any]) -> str: + parts: list[str] = [] + for char, variants in letter_set.get("letters", {}).items(): + name = _LETTER_NAMES.get(char, char) + anchor = _letter_anchor(char) + count = len(variants) + dots = "".join('' for _ in variants) + parts.append( + f'' + f'{char}' + f'{name}' + f'{count}' + f'{dots}' + f'' + ) + return "\n".join(parts) + + +def _build_sections(letter_set: dict[str, Any], base_dir: Path) -> tuple[str, list[str]]: + """Return (sections_html, all_variant_ids).""" + sections: list[str] = [] + all_ids: list[str] = [] + + for char, variants in letter_set.get("letters", {}).items(): + name = _LETTER_NAMES.get(char, char) + anchor = _letter_anchor(char) + n = len(variants) + s_label = "variant" if n == 1 else "variants" + cards = "".join(_build_variant_card(v, char, base_dir) for v in variants) + all_ids.extend(v["variant_id"] for v in variants) + sections.append( + f'
\n' + f'

' + f'{char}' + f'{name}' + f'{n} {s_label}' + f'

\n{cards}
\n' + ) + return "".join(sections), all_ids + + +def _build_html( + letter_set: dict[str, Any], + base_dir: Path, + feedback_path: Path, +) -> str: + writer_id = letter_set.get("writer_id", "") + writer_label = letter_set.get("writer_label", "") + generated_at = letter_set.get("generated_at", "") + date_str = generated_at[:10] if generated_at else "—" + + letters = letter_set.get("letters", {}) + total = sum(len(v) for v in letters.values()) + + sidebar_html = _build_sidebar(letter_set) + sections_html, all_ids = _build_sections(letter_set, base_dir) + + script = _SCRIPT.replace("__ALL_IDS__", json.dumps(all_ids)) + + label_line = f'
{writer_label}
\n' if writer_label else "" + title_esc = writer_id.replace("<", "<").replace(">", ">") + + return ( + "\n" + '\n\n' + '\n' + '\n' + f"Review — {title_esc}\n" + f"\n" + "\n\n" + '
\n' + "
\n" + f'

\U0001f50c Review — {writer_id}

\n' + f" {label_line}" + f'
Generated: {date_str}' + f" · {total} variant{'s' if total != 1 else ''}
\n" + "
\n" + '
\n' + '
' + f'0 / {total} reviewed
\n' + '
' + '
\n' + "
\n" + '
\n' + ' \n" + ' \n" + "
\n" + "
\n" + '
\n' + ' \n" + '
\n' + f" {sections_html}\n" + "
\n" + "
\n" + '
\n' + f"\n" + "\n\n" + ) + + +# --------------------------------------------------------------------------- +# HTTP server +# --------------------------------------------------------------------------- + + +class _ReviewHandler(http.server.BaseHTTPRequestHandler): + """Minimal handler: serves pre-built HTML and manages feedback JSON.""" + + _html: str = "" + _feedback_path: Path = Path(_FEEDBACK_FILENAME) + + def log_message(self, fmt: str, *args: object) -> None: # silence request log + pass + + def do_GET(self) -> None: + if self.path == "/": + body = self._html.encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + elif self.path == "/feedback": + data: dict[str, Any] = {} + fp = self._feedback_path + if fp.exists(): + try: + data = json.loads(fp.read_text(encoding="utf-8")) + except json.JSONDecodeError: + pass + body = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + else: + self.send_response(404) + self.end_headers() + + def do_POST(self) -> None: + if self.path == "/feedback": + length = int(self.headers.get("Content-Length", 0)) + raw = self.rfile.read(length) + try: + data = json.loads(raw.decode("utf-8")) + except json.JSONDecodeError: + self.send_response(400) + self.end_headers() + return + self._feedback_path.write_text( + json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True), + encoding="utf-8", + ) + self.send_response(204) + self.end_headers() + else: + self.send_response(404) + self.end_headers() + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def serve( + path: Path, + *, + port: int = 8765, + feedback_path: Path | None = None, +) -> None: + """Build a review page from *path* and serve it on *localhost:port*. + + Parameters + ---------- + path: + Absolute or relative path to a ``letter_set.json`` file. PNG assets + are resolved relative to ``path.parent``. + port: + TCP port to listen on (default: 8765). + feedback_path: + Where to read/write feedback JSON. Defaults to + ``.review_feedback.json`` next to *path*. + """ + path = Path(path).resolve() + base_dir = path.parent + if feedback_path is None: + feedback_path = base_dir / _FEEDBACK_FILENAME + + try: + letter_set: dict[str, Any] = json.loads(path.read_text(encoding="utf-8")) + except FileNotFoundError as exc: + raise FileNotFoundError(f"letter_set file not found: {path}") from exc + except json.JSONDecodeError as exc: + raise ValueError(f"letter_set file is not valid JSON: {exc}") from exc + + html = _build_html(letter_set, base_dir, feedback_path) + + # Patch the handler class attributes (one server instance at a time). + _ReviewHandler._html = html + _ReviewHandler._feedback_path = feedback_path + + server = http.server.HTTPServer(("127.0.0.1", port), _ReviewHandler) + url = f"http://localhost:{port}/" + writer_id = letter_set.get("writer_id", path.name) + total = sum(len(v) for v in letter_set.get("letters", {}).values()) + + print(f"Review server: {url}") + print(f"Writer: {writer_id} ({total} variant(s))") + print(f"Feedback file: {feedback_path}") + print("Press Ctrl-C to stop.") + print() + + threading.Timer(0.4, lambda: webbrowser.open(url)).start() + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopped.") + + +__all__ = ["serve"] diff --git a/tests/test_reviewer.py b/tests/test_reviewer.py new file mode 100644 index 0000000..f77cda1 --- /dev/null +++ b/tests/test_reviewer.py @@ -0,0 +1,515 @@ +"""Tests for hletterscriptgen.reviewer — review app HTML builder and HTTP server.""" + +from __future__ import annotations + +import http.client +import json +import struct +import threading +import zlib +from http.server import HTTPServer +from pathlib import Path +from typing import Any + +import pytest + +from hletterscriptgen.reviewer import ( + _build_html, + _build_sections, + _build_sidebar, + _build_variant_card, + _img_data_uri, + _ink_quality, + _letter_anchor, + _ReviewHandler, + serve, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _minimal_png(width: int = 4, height: int = 4) -> bytes: + """Return a tiny valid grayscale PNG for testing.""" + raw = b"".join(b"\x00" + bytes([128] * width) for _ in range(height)) + compressed = zlib.compress(raw, 1) + + def chunk(tag: bytes, data: bytes) -> bytes: + crc = zlib.crc32(tag + data) & 0xFFFF_FFFF + return struct.pack(">I", len(data)) + tag + data + struct.pack(">I", crc) + + ihdr_data = struct.pack(">II", width, height) + bytes([8, 0, 0, 0, 0]) + return ( + b"\x89PNG\r\n\x1a\n" + + chunk(b"IHDR", ihdr_data) + + chunk(b"IDAT", compressed) + + chunk(b"IEND", b"") + ) + + +def _make_letter_set( + writer_id: str = "w1", + letters: dict[str, list[dict[str, Any]]] | None = None, +) -> dict[str, Any]: + if letters is None: + letters = { + "א": [ + { + "variant_id": "alef-0001", + "asset_path": "letters/alef/alef-0001.png", + "checksum_sha256": "a" * 64, + "image": {"width_px": 32, "height_px": 40, "format": "png"}, + "quality": {"ink_ratio": 0.25}, + "source": { + "scan_entry_id": "scan-001", + "license": "PDM-1.0", + "bbox_in_source": {"x": 10, "y": 20, "width": 32, "height": 40}, + }, + } + ] + } + return { + "schema_version": "letter_set.v1", + "writer_id": writer_id, + "writer_label": "Test Writer", + "generated_at": "2026-05-25T00:00:00Z", + "letters": letters, + } + + +# --------------------------------------------------------------------------- +# _letter_anchor +# --------------------------------------------------------------------------- + + +def test_letter_anchor_alef() -> None: + assert _letter_anchor("א") == "u05d0" + + +def test_letter_anchor_resh() -> None: + assert _letter_anchor("ר") == "u05e8" + + +def test_letter_anchor_tav() -> None: + assert _letter_anchor("ת") == "u05ea" + + +# --------------------------------------------------------------------------- +# _ink_quality +# --------------------------------------------------------------------------- + + +def test_ink_quality_very_sparse() -> None: + label, cls = _ink_quality(0.03) + assert label == "Very sparse" + assert cls == "quality-low" + + +def test_ink_quality_sparse() -> None: + label, cls = _ink_quality(0.10) + assert label == "Sparse" + assert cls == "quality-warn" + + +def test_ink_quality_normal() -> None: + label, cls = _ink_quality(0.30) + assert label == "Normal" + assert cls == "quality-ok" + + +def test_ink_quality_normal_upper_boundary() -> None: + label, cls = _ink_quality(0.60) + assert label == "Normal" + assert cls == "quality-ok" + + +def test_ink_quality_dense() -> None: + label, cls = _ink_quality(0.80) + assert label == "Dense" + assert cls == "quality-warn" + + +# --------------------------------------------------------------------------- +# _img_data_uri +# --------------------------------------------------------------------------- + + +def test_img_data_uri_returns_none_for_missing_file(tmp_path: Path) -> None: + result = _img_data_uri(tmp_path / "nonexistent.png") + assert result is None + + +def test_img_data_uri_encodes_png(tmp_path: Path) -> None: + png_path = tmp_path / "test.png" + png_bytes = _minimal_png() + png_path.write_bytes(png_bytes) + uri = _img_data_uri(png_path) + assert uri is not None + assert uri.startswith("data:image/png;base64,") + + +def test_img_data_uri_encodes_jpeg(tmp_path: Path) -> None: + # Minimal JPEG placeholder (just needs to be readable bytes) + jpg_path = tmp_path / "test.jpg" + jpg_path.write_bytes(b"\xff\xd8\xff\xe0" + b"\x00" * 100) + uri = _img_data_uri(jpg_path) + assert uri is not None + assert uri.startswith("data:image/jpeg;base64,") + + +def test_img_data_uri_fallback_mime_for_unknown_ext(tmp_path: Path) -> None: + weird = tmp_path / "test.xyz" + weird.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 40) + uri = _img_data_uri(weird) + assert uri is not None + assert "image/png" in uri # default MIME + + +# --------------------------------------------------------------------------- +# _build_variant_card +# --------------------------------------------------------------------------- + + +def test_variant_card_contains_variant_id(tmp_path: Path) -> None: + variant: dict[str, Any] = { + "variant_id": "alef-0042", + "asset_path": "letters/alef/alef-0042.png", + "checksum_sha256": "a" * 64, + "image": {"width_px": 30, "height_px": 40, "format": "png"}, + "quality": {"ink_ratio": 0.28}, + "source": { + "scan_entry_id": "s001", + "license": "PDM-1.0", + "bbox_in_source": {"x": 5, "y": 10, "width": 30, "height": 40}, + }, + } + html = _build_variant_card(variant, "א", tmp_path) + assert "alef-0042" in html + assert "id=\"card-alef-0042\"" in html + + +def test_variant_card_missing_image_shows_fallback(tmp_path: Path) -> None: + variant: dict[str, Any] = { + "variant_id": "alef-0001", + "asset_path": "letters/alef/missing.png", + "checksum_sha256": "a" * 64, + "image": {"width_px": 30, "height_px": 40, "format": "png"}, + "quality": {"ink_ratio": 0.25}, + "source": { + "scan_entry_id": "s001", + "license": "PDM-1.0", + "bbox_in_source": {"x": 5, "y": 10, "width": 30, "height": 40}, + }, + } + html = _build_variant_card(variant, "א", tmp_path) + assert "glyph-missing" in html + assert " None: + png_path = tmp_path / "letters" / "alef" + png_path.mkdir(parents=True) + (png_path / "alef-0001.png").write_bytes(_minimal_png()) + + variant: dict[str, Any] = { + "variant_id": "alef-0001", + "asset_path": "letters/alef/alef-0001.png", + "checksum_sha256": "a" * 64, + "image": {"width_px": 4, "height_px": 4, "format": "png"}, + "quality": {"ink_ratio": 0.28}, + "source": { + "scan_entry_id": "s001", + "license": "PDM-1.0", + "bbox_in_source": {"x": 5, "y": 10, "width": 4, "height": 4}, + }, + } + html = _build_variant_card(variant, "א", tmp_path) + assert ' None: + """Dense ink_ratio should produce the 'quality-warn' class.""" + variant: dict[str, Any] = { + "variant_id": "v1", + "asset_path": "x.png", + "checksum_sha256": "a" * 64, + "image": {"width_px": 10, "height_px": 10, "format": "png"}, + "quality": {"ink_ratio": 0.90}, + "source": {"scan_entry_id": "s", "license": "PDM-1.0", + "bbox_in_source": {"x": 0, "y": 0, "width": 10, "height": 10}}, + } + html = _build_variant_card(variant, "ב", tmp_path) + assert "quality-warn" in html + + +# --------------------------------------------------------------------------- +# _build_sidebar +# --------------------------------------------------------------------------- + + +def test_build_sidebar_contains_all_letters(tmp_path: Path) -> None: + ls = _make_letter_set( + letters={ + "א": [{"variant_id": "a1", "asset_path": "x.png", + "checksum_sha256": "a" * 64, + "image": {"width_px": 1, "height_px": 1, "format": "png"}, + "quality": {"ink_ratio": 0.2}, + "source": {"scan_entry_id": "s", "license": "PDM-1.0", + "bbox_in_source": {"x": 0, "y": 0, "width": 1, "height": 1}}}], + "ב": [{"variant_id": "b1", "asset_path": "y.png", + "checksum_sha256": "b" * 64, + "image": {"width_px": 1, "height_px": 1, "format": "png"}, + "quality": {"ink_ratio": 0.3}, + "source": {"scan_entry_id": "s", "license": "PDM-1.0", + "bbox_in_source": {"x": 0, "y": 0, "width": 1, "height": 1}}}], + } + ) + html = _build_sidebar(ls) + assert "u05d0" in html # Alef anchor + assert "u05d1" in html # Bet anchor + assert "letter-nav-item" in html + + +def test_build_sidebar_empty_letters() -> None: + html = _build_sidebar({"letters": {}}) + assert html == "" + + +# --------------------------------------------------------------------------- +# _build_sections +# --------------------------------------------------------------------------- + + +def test_build_sections_returns_all_ids(tmp_path: Path) -> None: + ls = _make_letter_set() + _, ids = _build_sections(ls, tmp_path) + assert ids == ["alef-0001"] + + +def test_build_sections_html_contains_section_id(tmp_path: Path) -> None: + ls = _make_letter_set() + html, _ = _build_sections(ls, tmp_path) + assert 'id="letter-u05d0"' in html + + +# --------------------------------------------------------------------------- +# _build_html +# --------------------------------------------------------------------------- + + +def test_build_html_contains_writer_id(tmp_path: Path) -> None: + ls = _make_letter_set(writer_id="my-writer-007") + html = _build_html(ls, tmp_path, tmp_path / ".feedback.json") + assert "my-writer-007" in html + + +def test_build_html_contains_progress_elements(tmp_path: Path) -> None: + ls = _make_letter_set() + html = _build_html(ls, tmp_path, tmp_path / ".feedback.json") + assert "progress-fill" in html + assert "progress-label" in html + + +def test_build_html_embeds_all_ids_in_script(tmp_path: Path) -> None: + ls = _make_letter_set() + html = _build_html(ls, tmp_path, tmp_path / ".feedback.json") + assert '"alef-0001"' in html # variant_id appears in the JS ALL_IDS array + + +def test_build_html_no_writer_label_skips_label_div(tmp_path: Path) -> None: + ls = _make_letter_set() + del ls["writer_label"] + html = _build_html(ls, tmp_path, tmp_path / ".feedback.json") + # The
...
label line should not appear + assert "Test Writer" not in html + + +def test_build_html_is_valid_html_scaffold(tmp_path: Path) -> None: + ls = _make_letter_set() + html = _build_html(ls, tmp_path, tmp_path / ".feedback.json") + assert html.startswith("") + assert "" in html + assert "