From 07ac953ffe11451e77d2a38e5cb67e46930a13ac Mon Sep 17 00:00:00 2001 From: Mawuli Ejere Date: Fri, 29 May 2026 21:39:42 +0100 Subject: [PATCH 1/5] feat(cli,dockerfile,ci): clearer CLI help, slimmer image, pre-commit hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #180 — `astroml/cli.py` was structurally broken: duplicate `from __future__` + imports, two `parse_args(argv)` calls, and a `preprocess-backfill` handler placed AFTER an unconditional `return 1` so it was unreachable. Rewrote as a single _build_parser / main pair with: - module docstring + RawDescription help so examples render verbatim - new top-level `--config` / `--env` flags that bridge to the ASTROML_CONFIG / ASTROML_ENV env vars the loaders already read - examples block in the help epilog and an env-vars block Closes #196 — Dockerfile improvements: - Base image pinned to `python:3.11.9-slim-bookworm` for reproducible builds (was the floating `python:3.11-slim`). - All three apt-get install layers gain `--no-install-recommends`, drop ~80MB of suggested-but-unneeded packages, and `apt-get clean` before the cache wipe. - `as` keyword capitalised on the layers I touched. Closes #197 — pre-commit: - `.pre-commit-config.yaml` with trailing-whitespace, end-of-file, YAML/TOML checks, large-file guard, merge-conflict guard, black, isort (profile=black), and ruff with `--fix`. - `.github/workflows/pre-commit.yml` runs the hooks on every PR + push to main so a regression breaks the build, not the next contributor's checkout. --- Dockerfile | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 042f4a0..f94cc75 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,10 @@ # ============================================================================ # BASE STAGE - Common dependencies and Python environment # ============================================================================ -FROM python:3.11-slim as base +# Pin the Python base image to an exact patch + distro (#196) so a rebuild +# six months from now produces the same intermediate layers. The slim +# bookworm tag is roughly 60% smaller than the default `python:3.11` image. +FROM python:3.11.9-slim-bookworm AS base # Set environment variables ENV PYTHONUNBUFFERED=1 \ @@ -15,8 +18,10 @@ ENV PYTHONUNBUFFERED=1 \ ASTROML_ENV=container \ FEATURE_STORE_PATH=/app/feature_store -# Install system dependencies -RUN apt-get update && apt-get install -y \ +# Install system dependencies. `--no-install-recommends` skips the long tail +# of suggested packages (man-db, locales, etc.) that ship with apt's default +# recommend resolution and add ~80MB to the image (#196). +RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ git \ @@ -25,6 +30,7 @@ RUN apt-get update && apt-get install -y \ netcat-openbsd \ jq \ wget \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Create app user @@ -41,12 +47,13 @@ RUN pip install --upgrade pip && \ # ============================================================================ # INGESTION STAGE - Optimized for data ingestion and streaming with Feature Store # ============================================================================ -FROM base as ingestion +FROM base AS ingestion -# Install additional dependencies for ingestion -RUN apt-get update && apt-get install -y \ +# Install additional dependencies for ingestion. +RUN apt-get update && apt-get install -y --no-install-recommends \ jq \ netcat-openbsd \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Copy application code @@ -75,10 +82,10 @@ CMD ["python", "-m", "astroml.ingestion"] # ============================================================================ # TRAINING STAGE - Optimized for ML training with GPU support # ============================================================================ -FROM nvidia/cuda:12.1-runtime-base-ubuntu22.04 as training-base +FROM nvidia/cuda:12.1-runtime-base-ubuntu22.04 AS training-base -# Install Python and system dependencies -RUN apt-get update && apt-get install -y \ +# Install Python and system dependencies. +RUN apt-get update && apt-get install -y --no-install-recommends \ python3.11 \ python3.11-pip \ python3.11-dev \ @@ -86,6 +93,7 @@ RUN apt-get update && apt-get install -y \ curl \ git \ postgresql-client \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Create symbolic links for python From 6c709e14a4f5e8ae7f0598020365243958495427 Mon Sep 17 00:00:00 2001 From: Mawuli Ejere Date: Sun, 31 May 2026 19:22:52 +0100 Subject: [PATCH 2/5] fix(deps): repair invalid pip requirement specifiers requirements.txt:71 had a stray triple-backtick fence left over from a docs-to-config copy, causing pip-audit, Python Security Tests, build-and-test, pytest (4 matrix jobs), and notify to all fail with "Invalid requirement: '```'" at install time. requirements-cpu.txt:2 pinned torch with a local version label using '>=', which pip rejects: "Local version label can only be used with '==' or '!='". Switched to '==2.0.0+cpu' and moved the wheel index to a top-of-file '--extra-index-url' so the line itself is just a package spec. --- requirements-cpu.txt | 3 ++- requirements.txt | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-cpu.txt b/requirements-cpu.txt index e82a706..917622d 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -1,5 +1,6 @@ # CPU-only requirements for faster installation -torch>=2.0.0+cpu --index-url https://download.pytorch.org/whl/cpu +--extra-index-url https://download.pytorch.org/whl/cpu +torch==2.0.0+cpu torch-geometric>=2.3.0 numpy>=1.24 diff --git a/requirements.txt b/requirements.txt index 04c326a..b85130b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,5 +68,4 @@ ipykernel>=6.26.0 pre-commit>=3.7.0 isort>=5.13.0 ruff>=0.4.0 -``` From 6b6bbf02393c968ae8817b89f791375b61cc1794 Mon Sep 17 00:00:00 2001 From: Mawuli Ejere Date: Sun, 31 May 2026 19:26:43 +0100 Subject: [PATCH 3/5] fix(deps): pin starlette >= 1.0.1 for PYSEC-2026-161 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pip-audit flagged starlette 0.52.1 with PYSEC-2026-161 (Host header path injection that can confuse authentication paths). Added a direct floor constraint in requirements.txt — starlette is pulled in transitively (mlflow / notebook ecosystem) and isn't otherwise pinned. --- requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/requirements.txt b/requirements.txt index b85130b..47fdb02 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,11 @@ aiohttp-sse-client>=0.2.1 stellar-sdk>=9.0.0 tenacity>=8.4.0 +# Transitive constraint: pin starlette >= 1.0.1 to address PYSEC-2026-161 +# (Host header path injection). mlflow / fastapi-style deps pull it in +# transitively; without this pin pip-audit flags the older resolver pick. +starlette>=1.0.1 + # ── Observability ────────────────────────────────────────────────────────── prometheus-client>=0.19.0 From 9227cce06196d33b8575675a35abee099e7223e6 Mon Sep 17 00:00:00 2001 From: Mawuli Ejere Date: Sun, 31 May 2026 19:32:23 +0100 Subject: [PATCH 4/5] fix(pytest): scope collection to tests/ so root smoke scripts don't break it The cpu pytest matrix jobs and build-and-test were exiting with "INTERNALERROR ... SystemExit: 1" because pytest discovered test_data_quality_import.py at the repo root and tried to import it; that file is a manual smoke (`python test_data_quality_import.py`) that calls sys.exit(1) on its ImportError fall-through. Setting `testpaths = ["tests"]` keeps collection within the actual test tree and leaves the smoke script importable by hand without hijacking pytest. --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d14dce3..d254e23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,10 @@ where = ["."] include = ["astroml*"] [tool.pytest.ini_options] +# Scope collection to the dedicated tests/ tree so root-level standalone +# scripts (e.g. test_data_quality_import.py — a manual smoke that calls +# sys.exit(1) on ImportError) don't poison pytest collection. +testpaths = ["tests"] # Custom markers used by the CI matrix (#186). markers = [ "gpu: requires a CUDA-capable runner; auto-skipped on CPU-only environments", From dc6712b1c8d77ced3b9abddb9a91d02ff34800be Mon Sep 17 00:00:00 2001 From: Mawuli Ejere Date: Sun, 31 May 2026 22:00:03 +0100 Subject: [PATCH 5/5] fix(ci,tests): unblock pytest cpu + Rust contract test Three CI fixes: 1. pytest.yml: add `pip install -e . --no-deps` after requirements so the astroml package itself is importable during test collection. Without this every test that does `import astroml` aborted with ModuleNotFoundError at collection time. 2. requirements-cpu.txt: add scikit-learn>=1.3.0. The full requirements.txt already pins it but the cpu-only install skips that file, so tests importing sklearn (e.g. test_feature_transformers) failed with ModuleNotFoundError. 3. auth_tests.rs: set ledger timestamp to 1_000_000 before registering the validator in test_validator_registration_timestamp_persists. Env::default() starts at timestamp 0, so the contract stored 0 and the assert!(timestamp > 0) always failed. --- .github/workflows/pytest.yml | 2 ++ requirements-cpu.txt | 1 + src/auth_tests.rs | 11 +++++++---- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 0d1aed0..19e6ece 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -44,6 +44,8 @@ jobs: elif [ -f requirements.txt ]; then pip install -r requirements.txt fi + # Install the package itself so `import astroml` resolves in tests. + pip install -e . --no-deps - name: Run pytest (CPU) if: matrix.flavor == 'cpu' diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 917622d..7897933 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -4,6 +4,7 @@ torch==2.0.0+cpu torch-geometric>=2.3.0 numpy>=1.24 +scikit-learn>=1.3.0 pandas>=2.0 polars>=1.0 sqlalchemy>=2.0 diff --git a/src/auth_tests.rs b/src/auth_tests.rs index 441a060..bc59c1f 100644 --- a/src/auth_tests.rs +++ b/src/auth_tests.rs @@ -413,15 +413,18 @@ mod auth_tests { #[test] fn test_validator_registration_timestamp_persists() { let env = Env::default(); + // Env::default() starts at ledger timestamp 0; set a non-zero value + // so the contract's stored registration_timestamp is also non-zero. + env.ledger().set_timestamp(1_000_000); let (client, admin) = setup_contract(&env); - + let validator = Address::generate(&env); - + client.register_validator(&admin, &validator, &75_u32); - + let validator_info = client.get_validator(&validator); let timestamp = validator_info.registration_timestamp; - + // Timestamp should be non-zero (set during registration) assert!(timestamp > 0, "Registration timestamp should be set"); }