Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/tutorials/_quarto.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
project:
title: "PyRenew Tutorials"
pre-render: uv run ../../docs_scripts/cleanup_generated_md.py . # remove generated .md files
post-render: uv run ../../docs_scripts/add_markdown_to_divs.py . # see https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/#markdown-in-html
post-render: uv run ../../docs_scripts/postprocess_generated_markdown.py . # see https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/#markdown-in-html

format: gfm
engine: jupyter
32 changes: 0 additions & 32 deletions docs_scripts/add_markdown_to_divs.py

This file was deleted.

62 changes: 62 additions & 0 deletions docs_scripts/postprocess_generated_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# numpydoc ignore=GL08

import sys
from pathlib import Path

from bs4 import BeautifulSoup
from bs4.element import Tag


def _next_tag(element: Tag) -> Tag | None: # numpydoc ignore=GL08
for sibling in element.next_siblings:
if isinstance(sibling, Tag):
return sibling
if str(sibling).strip():
return None
return None


def _add_alt_text_paragraphs(soup: BeautifulSoup) -> None: # numpydoc ignore=GL08
for img in soup.find_all("img"):
alt_text = img.get("alt", "").strip()
if not alt_text:
continue

next_tag = _next_tag(img)
if (
next_tag
and next_tag.name == "p"
and next_tag.get_text(strip=True) == alt_text
):
continue

caption = soup.new_tag("p")
caption.string = alt_text
img.insert_after(caption)


def postprocess_generated_markdown(html: str) -> str: # numpydoc ignore=GL08
soup = BeautifulSoup(html, "html.parser")
for div in soup.find_all("div"):
if "markdown" not in div.attrs:
div["markdown"] = "1"
for img in soup.find_all("img"):
img.attrs.pop("width", None)
img.attrs.pop("height", None)
_add_alt_text_paragraphs(soup)
return soup.decode(formatter=None)


if __name__ == "__main__":
target = Path(sys.argv[1])
if target.is_file():
text = target.read_text(encoding="utf-8")
updated = postprocess_generated_markdown(text)
target.write_text(updated, encoding="utf-8")
print(f"Processed {target}")
elif target.is_dir():
for f in target.rglob("*.md"):
text = f.read_text(encoding="utf-8")
updated = postprocess_generated_markdown(text)
f.write_text(updated, encoding="utf-8")
print(f"Processed {f}")
39 changes: 39 additions & 0 deletions test/test_docs_postprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# numpydoc ignore=GL08

import importlib.util
from pathlib import Path


def _load_postprocessor(): # numpydoc ignore=GL08
script_path = (
Path(__file__).parents[1] / "docs_scripts" / "postprocess_generated_markdown.py"
)
spec = importlib.util.spec_from_file_location(
"postprocess_generated_markdown", script_path
)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module


def test_postprocess_generated_markdown_adds_alt_text_paragraph(): # numpydoc ignore=GL08
postprocessor = _load_postprocessor()

result = postprocessor.postprocess_generated_markdown(
'<div><img alt="Figure caption." height="400" src="plot.png" width="600"/></div>'
)

assert result == (
'<div markdown="1"><img alt="Figure caption." src="plot.png"/>'
"<p>Figure caption.</p></div>"
)


def test_postprocess_generated_markdown_does_not_duplicate_alt_text_paragraph(): # numpydoc ignore=GL08
postprocessor = _load_postprocessor()

result = postprocessor.postprocess_generated_markdown(
'<img alt="Figure caption." src="plot.png"/><p>Figure caption.</p>'
)

assert result == '<img alt="Figure caption." src="plot.png"/><p>Figure caption.</p>'
Loading