diff --git a/CHANGELOG.md b/CHANGELOG.md index 77cedc5..9749b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## Unreleased + +### Added + +- Unit and public PDF smoke coverage for length-changing replacements when the + match appears at the beginning, middle, or end of a one-glyph-per-line text + object. +- Bbox alignment assertions now identify the checked coordinate and measured + delta without embedding decoded document text. + ## v0.1.5 | Public Length-Changing Fixture Maintenance release that replaces private positive length-changing smoke diff --git a/README.md b/README.md index d70f79f..d3a030e 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,9 @@ artifact paths, sizes, short hashes, and warnings, but not extracted bbox text. If `pdftotext` is missing or bbox extraction fails, mutation still succeeds and the report records a layout-evidence warning. For direct writes, exact mode records before/after extraction counts, while `--align left` and `--align right` -record numeric bbox edge deltas and pass/fail assertions. +record numeric bbox edge deltas and pass/fail assertions. Failed edge +assertions name the checked coordinate (`x_min` for left alignment, `x_max` for +right alignment) and the measured delta without embedding decoded document text. ## Synthetic Fixtures @@ -162,7 +164,9 @@ pdftotext work/public-length-right.pdf - | rg '13846|3734' The public length-changing smoke should report `layout_evidence.status: "ok"` and `alignment_assertions.status: "ok"` for both `--align left` and -`--align right`. +`--align right`. The test suite also exercises the same public fixture shape +with the replacement target at the beginning, middle, and end of a +one-glyph-per-line text object. The same helper is available from Python: diff --git a/ROADMAP.md b/ROADMAP.md index 846c8ce..6c037cf 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -263,7 +263,7 @@ Completed: ### M14 | Start-Glyph Alignment Hardening -Status: PLANNED +Status: DONE Goal: Broaden deterministic layout coverage around match positions inside a text object. @@ -281,6 +281,14 @@ Acceptance Criteria: - Failed alignment evidence points to the coordinate that violated the active contract without embedding decoded document text. +Completed: +- Added unit coverage for length-changing matches at the beginning, middle, + and end of one-glyph-per-line text objects. +- Expanded the public synthetic PDF smoke to validate left and right bbox + alignment for all three match positions. +- Added non-sensitive assertion fields that name the checked coordinate and + measured delta for faster failed-alignment triage. + ## Supporting Infrastructure Lane The following work is useful, but should not displace mutation-engine progress: diff --git a/pdf_mutation/layout.py b/pdf_mutation/layout.py index 174ff1e..63f3371 100644 --- a/pdf_mutation/layout.py +++ b/pdf_mutation/layout.py @@ -134,14 +134,20 @@ def bbox_alignment_assertions( if align == "right": passed = abs(right_delta) <= tolerance contract = "right_edge" + checked_edge = "x_max" + checked_delta = right_delta else: passed = abs(left_delta) <= tolerance contract = "left_edge" + checked_edge = "x_min" + checked_delta = left_delta assertions.append( { "index": index, "contract": contract, "passed": passed, + "checked_edge": checked_edge, + "checked_delta": decimal_report(checked_delta), "left_delta": decimal_report(left_delta), "right_delta": decimal_report(right_delta), "before": { diff --git a/tests/test_pdf_glyph_replace.py b/tests/test_pdf_glyph_replace.py index 6fdc49e..3ec7f9c 100644 --- a/tests/test_pdf_glyph_replace.py +++ b/tests/test_pdf_glyph_replace.py @@ -260,62 +260,98 @@ def test_left_aligned_replacement_at_text_start_does_not_insert_leading_td(self) self.assertIn(b"1 0 0 -1 653.375 1370 Tm\n<002B> Tj", edited) self.assertNotIn(b"Tm\n9.6 0 Td <002B> Tj", edited) + def test_length_changing_replacement_handles_match_positions(self): + cases = { + "start": ("3734 A", b"Tm\n<002B> Tj"), + "middle": ("A 3734 A", b"9.6 0 Td <002B> Tj"), + "end": ("A 3734", b"9.6 0 Td <002B> Tj"), + } + + for position, (text, first_replacement_line) in cases.items(): + with self.subTest(position=position, align="left"): + qdf = f.synthetic_qdf(text, one_glyph_per_line=True, x="653.375", y="1370") + edited, count = p.replace_qdf(qdf, "3734", "13846", align="left") + + self.assertEqual(count, 1) + self.assertIn(b"1 0 0 -1 653.375 1370 Tm", edited) + self.assertIn(first_replacement_line, edited) + + with self.subTest(position=position, align="right"): + qdf = f.synthetic_qdf(text, one_glyph_per_line=True, x="653.375", y="1370") + edited, count = p.replace_qdf(qdf, "3734", "13846", align="right") + + self.assertEqual(count, 1) + self.assertIn(b"1 0 0 -1 643.775 1370 Tm", edited) + self.assertIn(first_replacement_line, edited) + @unittest.skipUnless( all(shutil.which(tool) for tool in ("qpdf", "fix-qdf", "pdftotext")), "requires qpdf, fix-qdf, and pdftotext", ) - def test_public_pdf_fixture_smokes_left_and_right_bbox_alignment(self): + def test_public_pdf_fixture_smokes_positioned_left_and_right_bbox_alignment(self): + cases = { + "start": "3734 A", + "middle": "A 3734 A", + "end": "A 3734", + } with p.tempfile.TemporaryDirectory() as tmp: root = p.Path(tmp) - input_pdf = root / "public-length.pdf" - input_pdf.write_bytes( - f.synthetic_pdf("3734", one_glyph_per_line=True, x="653.375", y="1370") - ) - - for align in ("left", "right"): - output_pdf = root / f"public-length-{align}.pdf" - report_path = root / f"public-length-{align}.json" - bbox_dir = root / f"bbox-{align}" - - result = subprocess.run( - [ - sys.executable, - "pdf_glyph_replace.py", - str(input_pdf), - "3734", - "13846", - "--align", - align, - "-o", - str(output_pdf), - "--report", - str(report_path), - "--bbox-dir", - str(bbox_dir), - ], - cwd=p.Path(__file__).resolve().parents[1], - check=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + for position, text in cases.items(): + input_pdf = root / f"public-length-{position}.pdf" + input_pdf.write_bytes( + f.synthetic_pdf(text, one_glyph_per_line=True, x="653.375", y="1370") ) - self.assertEqual(result.returncode, 0, result.stderr.decode("utf-8")) - subprocess.run(["qpdf", "--check", str(output_pdf)], check=True) - extracted = subprocess.run( - ["pdftotext", str(output_pdf), "-"], - check=True, - stdout=subprocess.PIPE, - ).stdout.decode("utf-8") - self.assertIn("13846", extracted) - self.assertNotIn("3734", extracted) - - report = json.loads(report_path.read_text(encoding="utf-8")) - assertions = report["layout_evidence"]["alignment_assertions"] - self.assertEqual(assertions["status"], "ok") - self.assertEqual(assertions["align"], align) - self.assertEqual(assertions["checked_pairs"], 1) - self.assertTrue(assertions["assertions"][0]["passed"]) - self.assertNotIn("3734", json.dumps(report)) - self.assertNotIn("13846", json.dumps(report)) + + for align in ("left", "right"): + output_pdf = root / f"public-length-{position}-{align}.pdf" + report_path = root / f"public-length-{position}-{align}.json" + bbox_dir = root / f"bbox-{position}-{align}" + + result = subprocess.run( + [ + sys.executable, + "pdf_glyph_replace.py", + str(input_pdf), + "3734", + "13846", + "--align", + align, + "-o", + str(output_pdf), + "--report", + str(report_path), + "--bbox-dir", + str(bbox_dir), + ], + cwd=p.Path(__file__).resolve().parents[1], + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + self.assertEqual(result.returncode, 0, result.stderr.decode("utf-8")) + subprocess.run(["qpdf", "--check", str(output_pdf)], check=True) + extracted = subprocess.run( + ["pdftotext", str(output_pdf), "-"], + check=True, + stdout=subprocess.PIPE, + ).stdout.decode("utf-8") + self.assertIn("13846", extracted) + self.assertNotIn("3734", extracted) + + report = json.loads(report_path.read_text(encoding="utf-8")) + assertions = report["layout_evidence"]["alignment_assertions"] + assertion = assertions["assertions"][0] + self.assertEqual(assertions["status"], "ok") + self.assertEqual(assertions["align"], align) + self.assertEqual(assertions["checked_pairs"], 1) + self.assertTrue(assertion["passed"]) + self.assertEqual( + assertion["checked_edge"], + "x_min" if align == "left" else "x_max", + ) + self.assertEqual(assertion["checked_delta"], "0") + self.assertNotIn("3734", json.dumps(report)) + self.assertNotIn("13846", json.dumps(report)) def test_analyze_qdf_reports_feasibility(self): qdf = f.synthetic_qdf("3807", one_glyph_per_line=True) @@ -771,10 +807,14 @@ def test_bbox_alignment_assertions_check_left_and_right_edges_without_literal_te self.assertEqual(left["status"], "ok") self.assertEqual(left["assertions"][0]["contract"], "left_edge") + self.assertEqual(left["assertions"][0]["checked_edge"], "x_min") + self.assertEqual(left["assertions"][0]["checked_delta"], "0.2") self.assertEqual(left["assertions"][0]["left_delta"], "0.2") self.assertTrue(left["assertions"][0]["passed"]) self.assertEqual(right["status"], "ok") self.assertEqual(right["assertions"][0]["contract"], "right_edge") + self.assertEqual(right["assertions"][0]["checked_edge"], "x_max") + self.assertEqual(right["assertions"][0]["checked_delta"], "0.3") self.assertEqual(right["assertions"][0]["right_delta"], "0.3") self.assertTrue(right["assertions"][0]["passed"]) self.assertNotIn("37.34", str(left)) @@ -804,6 +844,8 @@ def test_bbox_alignment_assertions_warn_on_failed_contract(self): self.assertEqual(payload["status"], "warning") self.assertFalse(payload["assertions"][0]["passed"]) + self.assertEqual(payload["assertions"][0]["checked_edge"], "x_min") + self.assertEqual(payload["assertions"][0]["checked_delta"], "12") self.assertIn("failed", payload["warnings"][0]) def test_collect_bbox_evidence_warns_when_pdftotext_is_missing(self):