Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/wikitextprocessor/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,11 +924,33 @@ def pop_until_nth_list(ctx: "Wtp", list_token: str) -> None:
_parser_pop(ctx, True)


# https://github.com/wikimedia/mediawiki/blob/0f67e1045f35a8c854f3c6a3e2712c2d8b0b54d6/languages/messages/MessagesEn.php#L571-L575
LINKTRAIL_RE = re.compile(r"(?s)([a-z]+)(.*)")
# https://github.com/wikimedia/mediawiki/blob/0f67e1045f35a8c854f3c6a3e2712c2d8b0b54d6/languages/messages/MessagesEl.php#L309
LINKTRAIL_RE_EL = re.compile(
r"(?s)([a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]+)(.*)"
)
LINKTRAIL_RE_RU = re.compile(
r"(?s)([a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]+)(.*)"
)


def linktrail_re(lang: str) -> re.Pattern[str]:
match lang:
case "el":
return LINKTRAIL_RE_EL
case "ru":
return LINKTRAIL_RE_RU
case _:
return LINKTRAIL_RE


def text_fn(ctx: "Wtp", token: str) -> None:
"""Inserts the token as raw text into the parse tree."""
close_begline_lists(ctx)

node = ctx.parser_stack[-1]
lang = ctx.lang_code

# Convert certain characters from the token into HTML entities
# XXX this breaks tags inside templates, e.g. <math> in
Expand Down Expand Up @@ -1029,7 +1051,7 @@ def text_fn(ctx: "Wtp", token: str) -> None:
and not node.children[-1].children
and not ctx.suppress_special
):
m = re.match(r"(?s)([a-z]+)(.*)", token)
m = linktrail_re(lang).match(token)
if m:
node.children[-1].children.append(m.group(1))
token = m.group(2)
Expand Down
Loading