diff --git a/docs/tutorials/_quarto.yml b/docs/tutorials/_quarto.yml index a48d2b44..2eed4b30 100644 --- a/docs/tutorials/_quarto.yml +++ b/docs/tutorials/_quarto.yml @@ -1,7 +1,7 @@ project: title: "PyRenew Tutorials" pre-render: uv run ../../docs_scripts/cleanup_generated_md.py . # remove generated .md files - post-render: uv run ../../docs_scripts/add_markdown_to_divs.py . # see https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/#markdown-in-html + post-render: uv run ../../docs_scripts/postprocess_generated_markdown.py . # see https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/#markdown-in-html format: gfm engine: jupyter diff --git a/docs_scripts/add_markdown_to_divs.py b/docs_scripts/add_markdown_to_divs.py deleted file mode 100644 index f13aee48..00000000 --- a/docs_scripts/add_markdown_to_divs.py +++ /dev/null @@ -1,32 +0,0 @@ -# numpydoc ignore=GL08 - -import sys -from pathlib import Path - -from bs4 import BeautifulSoup - - -def add_markdown_to_divs(html: str) -> str: # numpydoc ignore=GL08 - soup = BeautifulSoup(html, "html.parser") - for div in soup.find_all("div"): - if "markdown" not in div.attrs: - div["markdown"] = "1" - for img in soup.find_all("img"): - img.attrs.pop("width", None) - img.attrs.pop("height", None) - return soup.decode(formatter=None) - - -if __name__ == "__main__": - target = Path(sys.argv[1]) - if target.is_file(): - text = target.read_text(encoding="utf-8") - updated = add_markdown_to_divs(text) - target.write_text(updated, encoding="utf-8") - print(f"Processed {target}") - elif target.is_dir(): - for f in target.rglob("*.md"): - text = f.read_text(encoding="utf-8") - updated = add_markdown_to_divs(text) - f.write_text(updated, encoding="utf-8") - print(f"Processed {f}") diff --git a/docs_scripts/postprocess_generated_markdown.py b/docs_scripts/postprocess_generated_markdown.py new file mode 100644 index 00000000..5f4a90ef --- /dev/null +++ b/docs_scripts/postprocess_generated_markdown.py @@ -0,0 +1,62 @@ +# numpydoc ignore=GL08 + +import sys +from pathlib import Path + +from bs4 import BeautifulSoup +from bs4.element import Tag + + +def _next_tag(element: Tag) -> Tag | None: # numpydoc ignore=GL08 + for sibling in element.next_siblings: + if isinstance(sibling, Tag): + return sibling + if str(sibling).strip(): + return None + return None + + +def _add_alt_text_paragraphs(soup: BeautifulSoup) -> None: # numpydoc ignore=GL08 + for img in soup.find_all("img"): + alt_text = img.get("alt", "").strip() + if not alt_text: + continue + + next_tag = _next_tag(img) + if ( + next_tag + and next_tag.name == "p" + and next_tag.get_text(strip=True) == alt_text + ): + continue + + caption = soup.new_tag("p") + caption.string = alt_text + img.insert_after(caption) + + +def postprocess_generated_markdown(html: str) -> str: # numpydoc ignore=GL08 + soup = BeautifulSoup(html, "html.parser") + for div in soup.find_all("div"): + if "markdown" not in div.attrs: + div["markdown"] = "1" + for img in soup.find_all("img"): + img.attrs.pop("width", None) + img.attrs.pop("height", None) + _add_alt_text_paragraphs(soup) + return soup.decode(formatter=None) + + +if __name__ == "__main__": + target = Path(sys.argv[1]) + if target.is_file(): + text = target.read_text(encoding="utf-8") + updated = postprocess_generated_markdown(text) + target.write_text(updated, encoding="utf-8") + print(f"Processed {target}") + elif target.is_dir(): + for f in target.rglob("*.md"): + text = f.read_text(encoding="utf-8") + updated = postprocess_generated_markdown(text) + f.write_text(updated, encoding="utf-8") + print(f"Processed {f}") diff --git a/test/test_docs_postprocessing.py b/test/test_docs_postprocessing.py new file mode 100644 index 00000000..417280bf --- /dev/null +++ b/test/test_docs_postprocessing.py @@ -0,0 +1,39 @@ +# numpydoc ignore=GL08 + +import importlib.util +from pathlib import Path + + +def _load_postprocessor(): # numpydoc ignore=GL08 + script_path = ( + Path(__file__).parents[1] / "docs_scripts" / "postprocess_generated_markdown.py" + ) + spec = importlib.util.spec_from_file_location( + "postprocess_generated_markdown", script_path + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_postprocess_generated_markdown_adds_alt_text_paragraph(): # numpydoc ignore=GL08 + postprocessor = _load_postprocessor() + + result = postprocessor.postprocess_generated_markdown( + '

'
+ "Figure caption.

Figure caption.
' + ) + + assert result == '
Figure caption.
'