Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: end-of-file-fixer
- repo: https://github.com/charliermarsh/ruff-pre-commit
# keep the version here in sync with the version in uv.lock
rev: "v0.12.7"
rev: "v0.12.9"
hooks:
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- `tilebox-storage`: Added `USGSLandsatStorageClient` to download landsat data from the USGS Landsat S3 bucket.
- `tilebox-storage`: Storage client now support concurrent downloads of multiple objects, controlled by the
`max_concurrent_downloads` parameter.
- `tilebox-storage`: Added `quicklook` and `download_quicklook` methods to the `CopernicusStorageClient` to download and
display preview images for Sentinel data.

## [0.41.0] - 2025-08-01

### Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,19 +374,16 @@ def _create_field_converter(field: FieldDescriptor) -> _FieldConverter:
"""
# special handling for enums:
if field.type == FieldDescriptor.TYPE_ENUM:
if field.label == FieldDescriptor.LABEL_REPEATED:
if field.is_repeated: # type: ignore[attr-defined]
raise NotImplementedError("Repeated enum fields are not supported")

return _EnumFieldConverter(field.name, enum_mapping_from_field_descriptor(field))

field_type = infer_field_type(field)
if field.label == FieldDescriptor.LABEL_OPTIONAL: # simple fields (in proto3 every simple field is optional)
return _SimpleFieldConverter(field.name, field_type)

if field.label == FieldDescriptor.LABEL_REPEATED:
if field.is_repeated: # type: ignore[attr-defined]
return _ArrayFieldConverter(field.name, field_type)

raise ValueError(f"Unsupported field type with label {field.label} and type {field.type}")
return _SimpleFieldConverter(field.name, field_type)


def _combine_dimension_names(array_dimensions: dict[str, int]) -> dict[str, tuple[str, int]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
import pandas as pd
import xarray as xr
from google.protobuf.descriptor import FieldDescriptor
from google.protobuf.message import Message

from tilebox.datasets.protobuf_conversion.field_types import (
Expand Down Expand Up @@ -80,7 +79,7 @@ def to_messages( # noqa: C901, PLR0912
descriptor = field_descriptors_by_name[field_name]
field_type = infer_field_type(descriptor)

if descriptor.label == FieldDescriptor.LABEL_REPEATED:
if descriptor.is_repeated:
values = convert_repeated_values_to_proto(values, field_type)
else:
values = convert_values_to_proto(values, field_type, filter_none=False)
Expand Down
4 changes: 1 addition & 3 deletions tilebox-storage/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ dependencies = [
"aiofile>=3.8",
"folium>=0.15",
"shapely>=2",
"boto3>=1.33",
"boto3-stubs[essential]>=1.33",
"obstore>=0.8.0",
]

[dependency-groups]
Expand All @@ -37,7 +36,6 @@ dev = [
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
"pytest>=8.3.2",
"moto>=5",
]

[project.urls]
Expand Down
33 changes: 28 additions & 5 deletions tilebox-storage/tests/storage_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@

from hypothesis.strategies import DrawFn, booleans, composite, datetimes, integers, just, one_of, text, uuids

from tilebox.storage.granule import ASFStorageGranule, CopernicusStorageGranule, UmbraStorageGranule
from tilebox.storage.granule import (
ASFStorageGranule,
CopernicusStorageGranule,
UmbraStorageGranule,
USGSLandsatStorageGranule,
)
from tilebox.storage.providers import _ASF_URL, StorageURLs


Expand Down Expand Up @@ -46,15 +51,14 @@ def alphanumerical_text(draw: DrawFn, min_size: int = 1, max_size: int = 100) ->
@composite
def umbra_granules(draw: DrawFn) -> UmbraStorageGranule:
"""Generate a realistic-looking random Umbra granule."""
level = "L0"
time = draw(datetimes(min_value=datetime(1990, 1, 1), max_value=datetime(2025, 1, 1), timezones=just(None)))
number = draw(integers(min_value=1, max_value=2))
text_location = draw(alphanumerical_text(min_size=1, max_size=20))
granule_id = str(draw(uuids(version=4)))
granule_name = f"{time:%Y-%m-%d-%H-%M-%S}_UMBRA-{number:02d}"
location = str(Path(text_location) / granule_id / granule_name)

return UmbraStorageGranule(time, granule_name, level, location)
return UmbraStorageGranule(time, granule_name, location)


@composite
Expand All @@ -80,5 +84,24 @@ def s5p_granules(draw: DrawFn) -> CopernicusStorageGranule:
# /eodata/Sentinel-5P/TROPOMI/L2__AER_LH/2024/04/15/S5P_NRTI_L2__AER_LH_20240415T055540_20240415T060040_33707_03_020600_20240415T063447
location = f"/eodata/Sentinel-5P/{instrument}/{product_type}/{start:%Y}/{start:%m}/{start:%d}/{granule_name.removesuffix('.nc')}"

file_size = draw(integers(min_value=10_000, max_value=999_999_999))
return CopernicusStorageGranule(start, granule_name, location, file_size)
return CopernicusStorageGranule(start, granule_name, location)


@composite
def landsat_granules(draw: DrawFn) -> USGSLandsatStorageGranule:
"""Generate a realistic-looking random USGS Landsat granule."""
time = draw(datetimes(min_value=datetime(1990, 1, 1), max_value=datetime(2025, 1, 1), timezones=just(None)))
landsat_mission = draw(integers(min_value=1, max_value=9))

path = draw(integers(min_value=1, max_value=999))
row = draw(integers(min_value=1, max_value=999))

granule_name = f"LC{landsat_mission:02d}_L1GT_{path:03d}{row:03d}_{time:%Y%m%d}_{time:%Y%m%d}_02_T1"
location = f"s3://usgs-landsat/collection02/level-1/standard/oli-tirs/{time:%Y}/{path:03d}/{row:03d}/{granule_name}"
thumbnail = draw(one_of(just(f"{granule_name}_thumb_small.jpeg"), just(None)))
return USGSLandsatStorageGranule(
time,
granule_name,
location,
thumbnail,
)
81 changes: 77 additions & 4 deletions tilebox-storage/tests/test_granule.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
from hypothesis import given
from hypothesis.strategies import lists

from tests.storage_data import ers_granules, s5p_granules, umbra_granules
from tilebox.storage.granule import ASFStorageGranule, CopernicusStorageGranule, UmbraStorageGranule, _asf_download_urls
from tests.storage_data import ers_granules, landsat_granules, s5p_granules, umbra_granules
from tilebox.storage.granule import (
ASFStorageGranule,
CopernicusStorageGranule,
UmbraStorageGranule,
USGSLandsatStorageGranule,
_asf_download_urls,
_thumbnail_relative_to_eodata_location,
)


def _asf_granule_to_datapoint(granule: ASFStorageGranule) -> xr.Dataset:
Expand Down Expand Up @@ -53,7 +60,6 @@ def _umbra_granule_to_datapoint(granule: UmbraStorageGranule) -> xr.Dataset:
datapoint = xr.Dataset()
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
datapoint["granule_name"] = granule.granule_name
datapoint["processing_level"] = granule.processing_level
datapoint["location"] = granule.location
return datapoint

Expand All @@ -76,12 +82,51 @@ def test_granule_from_umbra_datapoints(granules: list[UmbraStorageGranule]) -> N
assert UmbraStorageGranule.from_data(dataset.isel(time=i)) == granules[i]


@pytest.mark.parametrize(
("thumbnail_url", "location", "expected"),
[
(
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE/preview/thumbnail.png",
"/eodata/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE",
"preview/thumbnail.png",
),
(
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-2/MSI/L1C/2025/08/07/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945.SAFE/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945-ql.jpg",
"/eodata/Sentinel-2/MSI/L1C/2025/08/07/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945.SAFE",
"S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945-ql.jpg",
),
(
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/OLCI/OL_2_LFR___/2025/08/07/S3A_OL_2_LFR____20250807T011653_20250807T011953_20250807T033036_0179_129_074_1620_PS1_O_NR_003.SEN3/quicklook.jpg",
"/eodata/Sentinel-3/OLCI/OL_2_LFR___/2025/08/07/S3A_OL_2_LFR____20250807T011653_20250807T011953_20250807T033036_0179_129_074_1620_PS1_O_NR_003.SEN3",
"quicklook.jpg",
),
(
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/SLSTR/SL_1_RBT___/2025/08/07/S3B_SL_1_RBT____20250807T002314_20250807T002614_20250807T025411_0179_109_316_0720_ESA_O_NR_004.SEN3/quicklook.jpg",
"/eodata/Sentinel-3/SLSTR/SL_1_RBT___/2025/08/07/S3B_SL_1_RBT____20250807T002314_20250807T002614_20250807T025411_0179_109_316_0720_ESA_O_NR_004.SEN3",
"quicklook.jpg",
),
(
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/SYNERGY/SY_2_VG1___/2025/08/04/S3A_SY_2_VG1____20250804T000000_20250804T235959_20250806T202029_AUSTRALASIA_______PS1_O_NT_002.SEN3/quicklook.jpg",
"/eodata/Sentinel-3/SYNERGY/SY_2_VG1___/2025/08/04/S3A_SY_2_VG1____20250804T000000_20250804T235959_20250806T202029_AUSTRALASIA_______PS1_O_NT_002.SEN3",
"quicklook.jpg",
),
],
)
def test_thumbnail_relative_to_eodata_location(thumbnail_url: str, location: str, expected: str) -> None:
assert (
_thumbnail_relative_to_eodata_location(
thumbnail_url,
location,
)
== expected
)


def _copernicus_granule_to_datapoint(granule: CopernicusStorageGranule) -> xr.Dataset:
datapoint = xr.Dataset()
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
datapoint["granule_name"] = granule.granule_name
datapoint["location"] = granule.location
datapoint["file_size"] = granule.file_size
return datapoint


Expand All @@ -101,3 +146,31 @@ def test_granule_from_copernicus_datapoints(granules: list[CopernicusStorageGran

for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
assert CopernicusStorageGranule.from_data(dataset.isel(time=i)) == granules[i]


def _landsat_granule_to_datapoint(granule: USGSLandsatStorageGranule) -> xr.Dataset:
datapoint = xr.Dataset()
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
datapoint["granule_name"] = granule.granule_name
datapoint["location"] = granule.location
if granule.thumbnail is not None:
datapoint["thumbnail"] = f"{granule.location}/{granule.thumbnail}"
return datapoint


@given(landsat_granules())
def test_granule_from_landsat_datapoint(granule: USGSLandsatStorageGranule) -> None:
datapoint = _landsat_granule_to_datapoint(granule)
assert USGSLandsatStorageGranule.from_data(datapoint) == granule
assert USGSLandsatStorageGranule.from_data(USGSLandsatStorageGranule.from_data(datapoint)) == granule


@given(lists(landsat_granules(), min_size=2, max_size=5))
def test_granule_from_landsat_datapoints(granules: list[USGSLandsatStorageGranule]) -> None:
datapoints = [_landsat_granule_to_datapoint(granule) for granule in granules]
dataset = xr.concat(datapoints, dim="time")
with pytest.raises(ValueError, match=".*more than one granule.*"):
USGSLandsatStorageGranule.from_data(dataset)

for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
assert USGSLandsatStorageGranule.from_data(dataset.isel(time=i)) == granules[i]
6 changes: 4 additions & 2 deletions tilebox-storage/tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tilebox.storage.providers import _asf_login


@pytest.mark.anyio
@pytest.mark.asyncio
async def test_asf_login(httpx_mock: HTTPXMock) -> None:
httpx_mock.add_response(headers={"Set-Cookie": "logged_in=yes"})

Expand All @@ -15,8 +15,10 @@ async def test_asf_login(httpx_mock: HTTPXMock) -> None:
assert isinstance(client.auth, BasicAuth)
assert client.cookies["logged_in"] == "yes"

await client.aclose()

@pytest.mark.anyio

@pytest.mark.asyncio
async def test_asf_login_invalid_auth(httpx_mock: HTTPXMock) -> None:
httpx_mock.add_response(401)
with pytest.raises(ValueError, match="Invalid username or password."):
Expand Down
Loading
Loading