diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index bf190fd560..70e98e8cda 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -210,6 +210,18 @@ def __str__(self) -> str: ) +def _check_ascii_printable(url: str, key: str | None = None) -> None: + if any(char.isascii() and not char.isprintable() for char in url): + char = next(char for char in url if char.isascii() and not char.isprintable()) + idx = url.find(char) + component = f" {key} component" if key else "" + error = ( + f"Invalid non-printable ASCII character in URL{component}," + f" {char!r} at position {idx}." + ) + raise InvalidURL(error) + + def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # Initial basic checks on allowable URLs. # --------------------------------------- @@ -220,13 +232,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # If a URL includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. - if any(char.isascii() and not char.isprintable() for char in url): - char = next(char for char in url if char.isascii() and not char.isprintable()) - idx = url.find(char) - error = ( - f"Invalid non-printable ASCII character in URL, {char!r} at position {idx}." - ) - raise InvalidURL(error) + _check_ascii_printable(url) # Some keyword arguments require special handling. # ------------------------------------------------ @@ -270,16 +276,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. - if any(char.isascii() and not char.isprintable() for char in value): - char = next( - char for char in value if char.isascii() and not char.isprintable() - ) - idx = value.find(char) - error = ( - f"Invalid non-printable ASCII character in URL {key} component, " - f"{char!r} at position {idx}." - ) - raise InvalidURL(error) + _check_ascii_printable(value, key) # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value):