From 37af33603ec669efefd5fe87dfb6c0c1ea26eada Mon Sep 17 00:00:00 2001 From: Damon Bayer Date: Mon, 18 May 2026 16:24:29 -0500 Subject: [PATCH 1/2] fig-cap as paragraph --- docs_scripts/add_markdown_to_divs.py | 30 ++++++++++++++++++++++++ test/test_docs_postprocessing.py | 35 ++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 test/test_docs_postprocessing.py diff --git a/docs_scripts/add_markdown_to_divs.py b/docs_scripts/add_markdown_to_divs.py index f13aee48..fbcf6e46 100644 --- a/docs_scripts/add_markdown_to_divs.py +++ b/docs_scripts/add_markdown_to_divs.py @@ -4,6 +4,35 @@ from pathlib import Path from bs4 import BeautifulSoup +from bs4.element import Tag + + +def _next_tag(element: Tag) -> Tag | None: # numpydoc ignore=GL08 + for sibling in element.next_siblings: + if isinstance(sibling, Tag): + return sibling + if str(sibling).strip(): + return None + return None + + +def _add_alt_text_paragraphs(soup: BeautifulSoup) -> None: # numpydoc ignore=GL08 + for img in soup.find_all("img"): + alt_text = img.get("alt", "").strip() + if not alt_text: + continue + + next_tag = _next_tag(img) + if ( + next_tag + and next_tag.name == "p" + and next_tag.get_text(strip=True) == alt_text + ): + continue + + caption = soup.new_tag("p") + caption.string = alt_text + img.insert_after(caption) def add_markdown_to_divs(html: str) -> str: # numpydoc ignore=GL08 @@ -14,6 +43,7 @@ def add_markdown_to_divs(html: str) -> str: # numpydoc ignore=GL08 for img in soup.find_all("img"): img.attrs.pop("width", None) img.attrs.pop("height", None) + _add_alt_text_paragraphs(soup) return soup.decode(formatter=None) diff --git a/test/test_docs_postprocessing.py b/test/test_docs_postprocessing.py new file mode 100644 index 00000000..8e669672 --- /dev/null +++ b/test/test_docs_postprocessing.py @@ -0,0 +1,35 @@ +# numpydoc ignore=GL08 + +import importlib.util +from pathlib import Path + + +def _load_postprocessor(): # numpydoc ignore=GL08 + script_path = Path(__file__).parents[1] / "docs_scripts" / "add_markdown_to_divs.py" + spec = importlib.util.spec_from_file_location("add_markdown_to_divs", script_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_add_markdown_to_divs_adds_alt_text_paragraph(): # numpydoc ignore=GL08 + postprocessor = _load_postprocessor() + + result = postprocessor.add_markdown_to_divs( + '
Figure caption.
' + ) + + assert result == ( + '
Figure caption.' + "

Figure caption.

" + ) + + +def test_add_markdown_to_divs_does_not_duplicate_alt_text_paragraph(): # numpydoc ignore=GL08 + postprocessor = _load_postprocessor() + + result = postprocessor.add_markdown_to_divs( + 'Figure caption.

Figure caption.

' + ) + + assert result == 'Figure caption.

Figure caption.

' From b296a3b6e16842160327596bf0b06d8ee4099b4d Mon Sep 17 00:00:00 2001 From: Damon Bayer Date: Mon, 18 May 2026 16:26:41 -0500 Subject: [PATCH 2/2] rename --- docs/tutorials/_quarto.yml | 2 +- ...divs.py => postprocess_generated_markdown.py} | 6 +++--- test/test_docs_postprocessing.py | 16 ++++++++++------ 3 files changed, 14 insertions(+), 10 deletions(-) rename docs_scripts/{add_markdown_to_divs.py => postprocess_generated_markdown.py} (89%) diff --git a/docs/tutorials/_quarto.yml b/docs/tutorials/_quarto.yml index a48d2b44..2eed4b30 100644 --- a/docs/tutorials/_quarto.yml +++ b/docs/tutorials/_quarto.yml @@ -1,7 +1,7 @@ project: title: "PyRenew Tutorials" pre-render: uv run ../../docs_scripts/cleanup_generated_md.py . # remove generated .md files - post-render: uv run ../../docs_scripts/add_markdown_to_divs.py . # see https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/#markdown-in-html + post-render: uv run ../../docs_scripts/postprocess_generated_markdown.py . # see https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/#markdown-in-html format: gfm engine: jupyter diff --git a/docs_scripts/add_markdown_to_divs.py b/docs_scripts/postprocess_generated_markdown.py similarity index 89% rename from docs_scripts/add_markdown_to_divs.py rename to docs_scripts/postprocess_generated_markdown.py index fbcf6e46..5f4a90ef 100644 --- a/docs_scripts/add_markdown_to_divs.py +++ b/docs_scripts/postprocess_generated_markdown.py @@ -35,7 +35,7 @@ def _add_alt_text_paragraphs(soup: BeautifulSoup) -> None: # numpydoc ignore=GL img.insert_after(caption) -def add_markdown_to_divs(html: str) -> str: # numpydoc ignore=GL08 +def postprocess_generated_markdown(html: str) -> str: # numpydoc ignore=GL08 soup = BeautifulSoup(html, "html.parser") for div in soup.find_all("div"): if "markdown" not in div.attrs: @@ -51,12 +51,12 @@ def add_markdown_to_divs(html: str) -> str: # numpydoc ignore=GL08 target = Path(sys.argv[1]) if target.is_file(): text = target.read_text(encoding="utf-8") - updated = add_markdown_to_divs(text) + updated = postprocess_generated_markdown(text) target.write_text(updated, encoding="utf-8") print(f"Processed {target}") elif target.is_dir(): for f in target.rglob("*.md"): text = f.read_text(encoding="utf-8") - updated = add_markdown_to_divs(text) + updated = postprocess_generated_markdown(text) f.write_text(updated, encoding="utf-8") print(f"Processed {f}") diff --git a/test/test_docs_postprocessing.py b/test/test_docs_postprocessing.py index 8e669672..417280bf 100644 --- a/test/test_docs_postprocessing.py +++ b/test/test_docs_postprocessing.py @@ -5,17 +5,21 @@ def _load_postprocessor(): # numpydoc ignore=GL08 - script_path = Path(__file__).parents[1] / "docs_scripts" / "add_markdown_to_divs.py" - spec = importlib.util.spec_from_file_location("add_markdown_to_divs", script_path) + script_path = ( + Path(__file__).parents[1] / "docs_scripts" / "postprocess_generated_markdown.py" + ) + spec = importlib.util.spec_from_file_location( + "postprocess_generated_markdown", script_path + ) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module -def test_add_markdown_to_divs_adds_alt_text_paragraph(): # numpydoc ignore=GL08 +def test_postprocess_generated_markdown_adds_alt_text_paragraph(): # numpydoc ignore=GL08 postprocessor = _load_postprocessor() - result = postprocessor.add_markdown_to_divs( + result = postprocessor.postprocess_generated_markdown( '
Figure caption.
' ) @@ -25,10 +29,10 @@ def test_add_markdown_to_divs_adds_alt_text_paragraph(): # numpydoc ignore=GL08 ) -def test_add_markdown_to_divs_does_not_duplicate_alt_text_paragraph(): # numpydoc ignore=GL08 +def test_postprocess_generated_markdown_does_not_duplicate_alt_text_paragraph(): # numpydoc ignore=GL08 postprocessor = _load_postprocessor() - result = postprocessor.add_markdown_to_divs( + result = postprocessor.postprocess_generated_markdown( 'Figure caption.

Figure caption.

' )