Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions atcf_data_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A Python parser for the a-deck data posted online by the Automated Tropical Cyclone Forecasting System."""

from __future__ import annotations

import gzip
Expand All @@ -10,26 +11,30 @@


@retry()
def get_gzipped_url(url: str) -> str:
"""Open a gzipped file from a URL and return its contents as a list of strings.
def get_gzipped_url(url: str, timeout: int | None = None) -> str:
"""Open a gzipped file from a URL and return its contents as a strings.

Parameters
----------
url : str
URL of the gzipped file.
timeout : int or None
Maximum number of seconds to wait for the network request.
Defaults to None (no timeout).
Comment thread
palewire marked this conversation as resolved.

Returns
-------
str
List of strings representing the lines of the file.
The file contents as a string.

Examples
--------
>>> url = "https://ftp.nhc.noaa.gov/atcf/aid_public/aep182023.dat.gz"
>>> get_gzipped_url(url)
"""
# Read in the URL
r = requests.get(url)
r = requests.get(url, timeout=timeout)
Comment thread
palewire marked this conversation as resolved.
r.raise_for_status()

# Unzip the file
f = io.BytesIO(r.content)
Expand All @@ -46,25 +51,38 @@ def get_gzipped_url(url: str) -> str:
return content


def get_dataframe(url: str) -> pd.DataFrame:
def get_dataframe(url: str, timeout: int = 30) -> pd.DataFrame:
"""Parse a fixed-width file into a pandas DataFrame.

Parameters
----------
url : str
URL of the gzipped file.
timeout : int
Maximum number of seconds to wait for network requests.
Defaults to 30 seconds.
Comment thread
palewire marked this conversation as resolved.

Returns
-------
pandas.DataFrame
DataFrame containing the parsed data.

Raises
------
requests.HTTPError
If the URL does not exist or returns an HTTP error status.

Examples
--------
>>> url = "https://ftp.nhc.noaa.gov/atcf/aid_public/aep182023.dat.gz"
>>> get_dataframe(url)
"""
data = get_gzipped_url(url)
# Verify the URL exists before downloading
head = requests.head(url, allow_redirects=True, timeout=timeout)
head.raise_for_status()

data = get_gzipped_url(url, timeout=timeout)

return pd.read_fwf(
io.StringIO(data),
colspecs=[
Expand Down
Loading
Loading