Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Changelog

## Unreleased

### Added

- Unit and public PDF smoke coverage for length-changing replacements when the
match appears at the beginning, middle, or end of a one-glyph-per-line text
object.
- Bbox alignment assertions now identify the checked coordinate and measured
delta without embedding decoded document text.

## v0.1.5 | Public Length-Changing Fixture

Maintenance release that replaces private positive length-changing smoke
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ artifact paths, sizes, short hashes, and warnings, but not extracted bbox text.
If `pdftotext` is missing or bbox extraction fails, mutation still succeeds and
the report records a layout-evidence warning. For direct writes, exact mode
records before/after extraction counts, while `--align left` and `--align right`
record numeric bbox edge deltas and pass/fail assertions.
record numeric bbox edge deltas and pass/fail assertions. Failed edge
assertions name the checked coordinate (`x_min` for left alignment, `x_max` for
right alignment) and the measured delta without embedding decoded document text.

## Synthetic Fixtures

Expand Down Expand Up @@ -162,7 +164,9 @@ pdftotext work/public-length-right.pdf - | rg '13846|3734'

The public length-changing smoke should report `layout_evidence.status: "ok"`
and `alignment_assertions.status: "ok"` for both `--align left` and
`--align right`.
`--align right`. The test suite also exercises the same public fixture shape
with the replacement target at the beginning, middle, and end of a
one-glyph-per-line text object.

The same helper is available from Python:

Expand Down
10 changes: 9 additions & 1 deletion ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ Completed:

### M14 | Start-Glyph Alignment Hardening

Status: PLANNED
Status: DONE

Goal: Broaden deterministic layout coverage around match positions inside a
text object.
Expand All @@ -281,6 +281,14 @@ Acceptance Criteria:
- Failed alignment evidence points to the coordinate that violated the active
contract without embedding decoded document text.

Completed:
- Added unit coverage for length-changing matches at the beginning, middle,
and end of one-glyph-per-line text objects.
- Expanded the public synthetic PDF smoke to validate left and right bbox
alignment for all three match positions.
- Added non-sensitive assertion fields that name the checked coordinate and
measured delta for faster failed-alignment triage.

## Supporting Infrastructure Lane

The following work is useful, but should not displace mutation-engine progress:
Expand Down
6 changes: 6 additions & 0 deletions pdf_mutation/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,20 @@ def bbox_alignment_assertions(
if align == "right":
passed = abs(right_delta) <= tolerance
contract = "right_edge"
checked_edge = "x_max"
checked_delta = right_delta
else:
passed = abs(left_delta) <= tolerance
contract = "left_edge"
checked_edge = "x_min"
checked_delta = left_delta
assertions.append(
{
"index": index,
"contract": contract,
"passed": passed,
"checked_edge": checked_edge,
"checked_delta": decimal_report(checked_delta),
"left_delta": decimal_report(left_delta),
"right_delta": decimal_report(right_delta),
"before": {
Expand Down
140 changes: 91 additions & 49 deletions tests/test_pdf_glyph_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,62 +260,98 @@ def test_left_aligned_replacement_at_text_start_does_not_insert_leading_td(self)
self.assertIn(b"1 0 0 -1 653.375 1370 Tm\n<002B> Tj", edited)
self.assertNotIn(b"Tm\n9.6 0 Td <002B> Tj", edited)

def test_length_changing_replacement_handles_match_positions(self):
cases = {
"start": ("3734 A", b"Tm\n<002B> Tj"),
"middle": ("A 3734 A", b"9.6 0 Td <002B> Tj"),
"end": ("A 3734", b"9.6 0 Td <002B> Tj"),
}

for position, (text, first_replacement_line) in cases.items():
with self.subTest(position=position, align="left"):
qdf = f.synthetic_qdf(text, one_glyph_per_line=True, x="653.375", y="1370")
edited, count = p.replace_qdf(qdf, "3734", "13846", align="left")

self.assertEqual(count, 1)
self.assertIn(b"1 0 0 -1 653.375 1370 Tm", edited)
self.assertIn(first_replacement_line, edited)

with self.subTest(position=position, align="right"):
qdf = f.synthetic_qdf(text, one_glyph_per_line=True, x="653.375", y="1370")
edited, count = p.replace_qdf(qdf, "3734", "13846", align="right")

self.assertEqual(count, 1)
self.assertIn(b"1 0 0 -1 643.775 1370 Tm", edited)
self.assertIn(first_replacement_line, edited)

@unittest.skipUnless(
all(shutil.which(tool) for tool in ("qpdf", "fix-qdf", "pdftotext")),
"requires qpdf, fix-qdf, and pdftotext",
)
def test_public_pdf_fixture_smokes_left_and_right_bbox_alignment(self):
def test_public_pdf_fixture_smokes_positioned_left_and_right_bbox_alignment(self):
cases = {
"start": "3734 A",
"middle": "A 3734 A",
"end": "A 3734",
}
with p.tempfile.TemporaryDirectory() as tmp:
root = p.Path(tmp)
input_pdf = root / "public-length.pdf"
input_pdf.write_bytes(
f.synthetic_pdf("3734", one_glyph_per_line=True, x="653.375", y="1370")
)

for align in ("left", "right"):
output_pdf = root / f"public-length-{align}.pdf"
report_path = root / f"public-length-{align}.json"
bbox_dir = root / f"bbox-{align}"

result = subprocess.run(
[
sys.executable,
"pdf_glyph_replace.py",
str(input_pdf),
"3734",
"13846",
"--align",
align,
"-o",
str(output_pdf),
"--report",
str(report_path),
"--bbox-dir",
str(bbox_dir),
],
cwd=p.Path(__file__).resolve().parents[1],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
for position, text in cases.items():
input_pdf = root / f"public-length-{position}.pdf"
input_pdf.write_bytes(
f.synthetic_pdf(text, one_glyph_per_line=True, x="653.375", y="1370")
)
self.assertEqual(result.returncode, 0, result.stderr.decode("utf-8"))
subprocess.run(["qpdf", "--check", str(output_pdf)], check=True)
extracted = subprocess.run(
["pdftotext", str(output_pdf), "-"],
check=True,
stdout=subprocess.PIPE,
).stdout.decode("utf-8")
self.assertIn("13846", extracted)
self.assertNotIn("3734", extracted)

report = json.loads(report_path.read_text(encoding="utf-8"))
assertions = report["layout_evidence"]["alignment_assertions"]
self.assertEqual(assertions["status"], "ok")
self.assertEqual(assertions["align"], align)
self.assertEqual(assertions["checked_pairs"], 1)
self.assertTrue(assertions["assertions"][0]["passed"])
self.assertNotIn("3734", json.dumps(report))
self.assertNotIn("13846", json.dumps(report))

for align in ("left", "right"):
output_pdf = root / f"public-length-{position}-{align}.pdf"
report_path = root / f"public-length-{position}-{align}.json"
bbox_dir = root / f"bbox-{position}-{align}"

result = subprocess.run(
[
sys.executable,
"pdf_glyph_replace.py",
str(input_pdf),
"3734",
"13846",
"--align",
align,
"-o",
str(output_pdf),
"--report",
str(report_path),
"--bbox-dir",
str(bbox_dir),
],
cwd=p.Path(__file__).resolve().parents[1],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
self.assertEqual(result.returncode, 0, result.stderr.decode("utf-8"))
subprocess.run(["qpdf", "--check", str(output_pdf)], check=True)
extracted = subprocess.run(
["pdftotext", str(output_pdf), "-"],
check=True,
stdout=subprocess.PIPE,
).stdout.decode("utf-8")
self.assertIn("13846", extracted)
self.assertNotIn("3734", extracted)

report = json.loads(report_path.read_text(encoding="utf-8"))
assertions = report["layout_evidence"]["alignment_assertions"]
assertion = assertions["assertions"][0]
self.assertEqual(assertions["status"], "ok")
self.assertEqual(assertions["align"], align)
self.assertEqual(assertions["checked_pairs"], 1)
self.assertTrue(assertion["passed"])
self.assertEqual(
assertion["checked_edge"],
"x_min" if align == "left" else "x_max",
)
self.assertEqual(assertion["checked_delta"], "0")
self.assertNotIn("3734", json.dumps(report))
self.assertNotIn("13846", json.dumps(report))

def test_analyze_qdf_reports_feasibility(self):
qdf = f.synthetic_qdf("3807", one_glyph_per_line=True)
Expand Down Expand Up @@ -771,10 +807,14 @@ def test_bbox_alignment_assertions_check_left_and_right_edges_without_literal_te

self.assertEqual(left["status"], "ok")
self.assertEqual(left["assertions"][0]["contract"], "left_edge")
self.assertEqual(left["assertions"][0]["checked_edge"], "x_min")
self.assertEqual(left["assertions"][0]["checked_delta"], "0.2")
self.assertEqual(left["assertions"][0]["left_delta"], "0.2")
self.assertTrue(left["assertions"][0]["passed"])
self.assertEqual(right["status"], "ok")
self.assertEqual(right["assertions"][0]["contract"], "right_edge")
self.assertEqual(right["assertions"][0]["checked_edge"], "x_max")
self.assertEqual(right["assertions"][0]["checked_delta"], "0.3")
self.assertEqual(right["assertions"][0]["right_delta"], "0.3")
self.assertTrue(right["assertions"][0]["passed"])
self.assertNotIn("37.34", str(left))
Expand Down Expand Up @@ -804,6 +844,8 @@ def test_bbox_alignment_assertions_warn_on_failed_contract(self):

self.assertEqual(payload["status"], "warning")
self.assertFalse(payload["assertions"][0]["passed"])
self.assertEqual(payload["assertions"][0]["checked_edge"], "x_min")
self.assertEqual(payload["assertions"][0]["checked_delta"], "12")
self.assertIn("failed", payload["warnings"][0])

def test_collect_bbox_evidence_warns_when_pdftotext_is_missing(self):
Expand Down