diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f2ff11e..9e2679a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,11 +60,19 @@ jobs: GH_TOKEN: ${{ github.token }} integration: - name: End-to-end SQL on ${{ matrix.os }} + name: SQL E2E (${{ matrix.transport }}) on ${{ matrix.os }} needs: resolve-haybarn strategy: fail-fast: false matrix: + # Run the SAME sqllogictest suite over every VGI transport. The vgi + # extension picks the transport from the ATTACH LOCATION string that + # run-integration.sh builds per $TRANSPORT: + # subprocess : `.venv/bin/vgi-explain` (stdio; extension spawns it) + # http : `http://127.0.0.1:` (worker booted with --http) + # unix : `unix://` (worker booted with --unix) + os: [ubuntu-latest, macos-latest] + transport: [subprocess, http, unix] include: - { os: ubuntu-latest, asset: haybarn_unittest-linux-amd64.zip } - { os: macos-latest, asset: haybarn_unittest-osx-arm64.zip } @@ -88,6 +96,10 @@ jobs: run: brew install libomp - name: Install the worker (from the lockfile) + # waitress (needed to serve `--http`) is pulled in transitively by the + # main `vgi-python[http]` dependency — vgi-explain has no separate `http` + # extra — so a plain `--extra dev` sync already installs it. The `dev` + # extra brings pyarrow (the worker imports it). run: uv sync --frozen --extra dev --python 3.13 - name: Download haybarn-unittest @@ -106,13 +118,17 @@ jobs: # Absolute paths: run-integration.sh cd's into a staging dir before # invoking the runner, so relative paths would not resolve. The worker # is the installed `vgi-explain` console script in the project venv. + # VGI_EXPLAIN_WORKER is used directly as the stdio LOCATION for + # subprocess, and as the process the harness boots for --http / --unix. UNITTEST="$PWD/$(find hb -name 'haybarn-unittest' -type f | head -1)" chmod +x "$UNITTEST" echo "HAYBARN_UNITTEST=$UNITTEST" >> "$GITHUB_ENV" echo "VGI_EXPLAIN_WORKER=$PWD/.venv/bin/vgi-explain" >> "$GITHUB_ENV" - - name: Run extension integration suite + - name: Run extension integration suite (${{ matrix.transport }}) run: ci/run-integration.sh + env: + TRANSPORT: ${{ matrix.transport }} lint: name: Lint diff --git a/ci/README.md b/ci/README.md index e2c487d..91ea862 100644 --- a/ci/README.md +++ b/ci/README.md @@ -24,10 +24,44 @@ extension from the Haybarn community channel: `INSTALL vgi FROM community;` right before each bare `LOAD vgi;`. `require-env` and everything else pass through untouched. 4. **Run** — [`run-integration.sh`](run-integration.sh) stages the preprocessed - tree, points `VGI_EXPLAIN_WORKER` at `uv run .venv/bin/vgi-explain`, warms the - extension cache once, then runs the suite in a single `haybarn-unittest` + tree, resolves `VGI_EXPLAIN_WORKER` per the selected transport (below), warms + the extension cache once, then runs the suite in a single `haybarn-unittest` invocation. Any failed assertion exits non-zero and fails the job. +## Transport matrix (subprocess / http / unix) + +The same suite runs over all three VGI transports — the vgi extension picks the +transport from the ATTACH `LOCATION` string that `run-integration.sh` builds per +the `TRANSPORT` env var (`subprocess` | `http` | `unix`, default `subprocess`): + +- **subprocess** — `VGI_EXPLAIN_WORKER` = the stdio command (`.venv/bin/vgi-explain`). + The extension spawns the worker per query and talks Arrow IPC over stdin/stdout. +- **http** — the script boots `vgi-explain --http --port 0 --port-file ` (cwd = + the stage dir), polls the port-file for the auto-selected port, and sets + `VGI_EXPLAIN_WORKER=http://127.0.0.1:` (bare scheme://host:port, no path). + HTTP needs waitress (pulled in by the `vgi-python[http]` main dependency) and, + on the DuckDB side, the **httpfs** extension — without it `ATTACH 'http://…'` + fails with "VGI HTTP transport requires the httpfs extension" (which the runner + would then *silently skip*). The script injects `INSTALL httpfs FROM core; LOAD + httpfs;` after each `LOAD vgi;` for the http leg only. +- **unix** — the script boots `vgi-explain --unix ` (cwd = the stage dir), + polls for the socket file, and sets `VGI_EXPLAIN_WORKER=unix://`. + +The run step **guards against the silent-skip fake-pass**: the sqllogictest +runner SKIPS (exit 0) any test whose error contains "HTTP"/"Unable to connect", +so a broken http setup would report "All tests were skipped" and go green having +run nothing. The script fails the leg if it sees that, surfacing the skip reason. + +All three streaming/buffering functions (`shap_values` table-in-out, +`shap_base_value` source, `feature_importance` buffering) work over the stateless +HTTP transport unchanged: each drains its rows within a single `process`/ +`finalize` tick and `feature_importance`'s `DrainState` already extends +`ArrowSerializableDataclass`, so the SDK round-trips state through the +continuation token correctly. No tests are gated. + +The CI `integration` job is a matrix of `transport: [subprocess, http, unix]` × +`os: [ubuntu-latest, macos-latest]`. + ## Run it locally ```bash diff --git a/ci/run-integration.sh b/ci/run-integration.sh index a5a6530..ca3e0ee 100755 --- a/ci/run-integration.sh +++ b/ci/run-integration.sh @@ -5,21 +5,47 @@ # VGI worker, using a prebuilt standalone `haybarn-unittest` and the signed # community `vgi` extension — no C++ build from source. See ci/README.md. # +# The SAME suite is exercised over three VGI transports, selected by $TRANSPORT. +# The vgi extension picks the transport from the LOCATION string the .test files +# ATTACH (`${VGI_EXPLAIN_WORKER}`): +# +# subprocess : a bare stdio command (`.venv/bin/vgi-explain`) — the extension +# spawns the worker per query and talks Arrow IPC over +# stdin/stdout. Default; current behavior. +# http : the worker is started out-of-band in `--http` mode on an auto +# port; LOCATION becomes `http://127.0.0.1:`. +# unix : the worker is started out-of-band on an AF_UNIX socket; +# LOCATION becomes `unix://`. +# # Required environment: # HAYBARN_UNITTEST path to the haybarn-unittest binary -# VGI_EXPLAIN_WORKER worker LOCATION the .test files ATTACH (a stdio command -# such as `.venv/bin/vgi-explain`, or an http:// URL) +# VGI_EXPLAIN_WORKER the stdio command that runs the worker (the installed +# `vgi-explain` console script in the project venv). Used +# directly as the LOCATION for subprocess, and as the +# process this script boots for http/unix. # Optional: +# TRANSPORT subprocess | http | unix (default: subprocess) # STAGE scratch dir for the preprocessed test tree (default: mktemp) set -euo pipefail : "${HAYBARN_UNITTEST:?path to the haybarn-unittest binary}" -: "${VGI_EXPLAIN_WORKER:?worker LOCATION (stdio command or http:// URL)}" +: "${VGI_EXPLAIN_WORKER:?worker LOCATION (stdio command running the worker)}" + +TRANSPORT="${TRANSPORT:-subprocess}" +case "$TRANSPORT" in + subprocess|http|unix) ;; + *) echo "ERROR: unknown TRANSPORT='$TRANSPORT' (expected subprocess|http|unix)" >&2; exit 2 ;; +esac HERE="$(cd "$(dirname "$0")" && pwd)" REPO="$(cd "$HERE/.." && pwd)" STAGE="${STAGE:-$(mktemp -d)}" +# The stdio command the subprocess transport ATTACHes to is also the command we +# launch out-of-band for http/unix. Capture it before we possibly overwrite +# VGI_EXPLAIN_WORKER with a URL. +WORKER_CMD="$VGI_EXPLAIN_WORKER" + echo "Staging preprocessed tests into $STAGE ..." mkdir -p "$STAGE/test/sql" for f in "$REPO"/test/sql/*.test; do @@ -27,9 +53,132 @@ for f in "$REPO"/test/sql/*.test; do done # The .test files read committed parquet fixtures by relative path -# (test/fixtures/*.parquet). The runner cd's into the stage dir, so copy them in. +# (test/fixtures/*.parquet) via DuckDB's read_parquet, which resolves them +# relative to the runner's cwd. The runner cd's into the stage dir, so copy them +# in. (For http/unix we also boot the worker with cwd = the stage dir below, so +# any relative path the worker itself resolves matches.) cp -R "$REPO/test/fixtures" "$STAGE/test/fixtures" +# --------------------------------------------------------------------------- +# Per-transport: resolve VGI_EXPLAIN_WORKER (the LOCATION) and, for the +# out-of-band transports, boot the worker server + arrange trap-cleanup. +# --------------------------------------------------------------------------- +SERVER_PID="" +SOCK="" +PORT_FILE="" + +cleanup() { + # Preserve the script's exit status: this runs on EXIT, so its own last + # command must not clobber the real exit code (a bare `[ -n "$x" ]` that is + # false returns 1 and would turn a green run red under `set -e`). + local rc=$? + if [ -n "$SERVER_PID" ]; then kill "$SERVER_PID" 2>/dev/null || true; wait "$SERVER_PID" 2>/dev/null || true; fi + if [ -n "$SOCK" ]; then rm -f "$SOCK"; fi + if [ -n "$PORT_FILE" ]; then rm -f "$PORT_FILE"; fi + return "$rc" +} +trap cleanup EXIT + +case "$TRANSPORT" in + subprocess) + echo "Transport: subprocess/stdio — VGI_EXPLAIN_WORKER=$VGI_EXPLAIN_WORKER" + ;; + + http) + # The vgi extension's HTTP transport is implemented on top of DuckDB's + # httpfs extension, so an `http://` ATTACH binds with + # "Binder Error: VGI HTTP transport requires the httpfs extension." + # unless httpfs is loaded first. (The haybarn sqllogictest runner's default + # skip list swallows any error containing "HTTP", so without this the whole + # suite would silently SKIP rather than fail — a fake pass; see the run-step + # guard below.) The .test files are transport-agnostic; inject a signed + # `INSTALL httpfs FROM core; LOAD httpfs;` right after the `LOAD vgi;` in + # each staged file, so httpfs is present only when we run over HTTP. + echo "Transport http: injecting 'LOAD httpfs' into staged tests (required for the worker HTTP RPC) ..." + for sf in "$STAGE"/test/sql/*.test; do + awk ' + { print } + /^LOAD[ \t]+vgi[ \t]*;[ \t]*$/ { + print ""; + print "statement ok"; + print "INSTALL httpfs FROM core;"; + print ""; + print "statement ok"; + print "LOAD httpfs;"; + } + ' "$sf" > "$sf.tmp" && mv "$sf.tmp" "$sf" + done + + # Boot the worker in HTTP mode on an auto-selected port. The worker writes + # the chosen port to --port-file atomically, so we watch for the file to + # appear rather than parsing stdout. HTTP mode needs the `http` extra + # (waitress); the integration job installs it via `uv sync --extra http`. + # cwd = STAGE so any relative path the worker resolves matches the runner. + PORT_FILE="$(mktemp -u "${TMPDIR:-/tmp}/exp-port.XXXXXX")" + LOG_FILE="${TMPDIR:-/tmp}/exp-http-server.log" + echo "Starting HTTP worker: $WORKER_CMD --http --port 0 --port-file $PORT_FILE (cwd=$STAGE)" + # shellcheck disable=SC2086 + ( cd "$STAGE" && exec $WORKER_CMD --http --port 0 --port-file "$PORT_FILE" ) > "$LOG_FILE" 2>&1 & + SERVER_PID=$! + + PORT="" + for _ in $(seq 1 120); do + if ! kill -0 "$SERVER_PID" 2>/dev/null; then + echo "ERROR: HTTP worker exited before reporting a port. Log:" >&2 + cat "$LOG_FILE" >&2 + exit 1 + fi + if [ -s "$PORT_FILE" ]; then + PORT="$(tr -d '[:space:]' < "$PORT_FILE")" + [ -n "$PORT" ] && break + fi + sleep 0.5 + done + if [ -z "$PORT" ]; then + echo "ERROR: timed out waiting for HTTP worker port-file. Log:" >&2 + cat "$LOG_FILE" >&2 + exit 1 + fi + # The LOCATION must be the bare scheme://host:port with NO path suffix; the + # extension POSTs each RPC method at /. + export VGI_EXPLAIN_WORKER="http://127.0.0.1:$PORT" + echo "HTTP worker ready on $VGI_EXPLAIN_WORKER (pid $SERVER_PID)" + ;; + + unix) + # Boot the worker bound to an AF_UNIX socket; it prints `UNIX:` once + # bound. We poll for the socket file to appear. cwd = STAGE (see http note). + SOCK="${TMPDIR:-/tmp}/exp-$$.sock" + rm -f "$SOCK" + LOG_FILE="${TMPDIR:-/tmp}/exp-unix-server.log" + echo "Starting unix worker: $WORKER_CMD --unix $SOCK (cwd=$STAGE)" + # shellcheck disable=SC2086 + ( cd "$STAGE" && exec $WORKER_CMD --unix "$SOCK" ) > "$LOG_FILE" 2>&1 & + SERVER_PID=$! + + READY="" + for _ in $(seq 1 120); do + if ! kill -0 "$SERVER_PID" 2>/dev/null; then + echo "ERROR: unix worker exited before binding the socket. Log:" >&2 + cat "$LOG_FILE" >&2 + exit 1 + fi + if [ -S "$SOCK" ]; then + READY=1 + break + fi + sleep 0.5 + done + if [ -z "$READY" ]; then + echo "ERROR: timed out waiting for unix worker socket. Log:" >&2 + cat "$LOG_FILE" >&2 + exit 1 + fi + export VGI_EXPLAIN_WORKER="unix://$SOCK" + echo "unix worker ready on $VGI_EXPLAIN_WORKER (pid $SERVER_PID)" + ;; +esac + cd "$STAGE" # Warm the extension cache once: vgi from the signed community channel. A miss @@ -46,7 +195,34 @@ EOF "$HAYBARN_UNITTEST" "test/_warm.test" >/dev/null 2>&1 || echo "::warning::extension warm step did not fully succeed" rm -f "$STAGE/test/_warm.test" -# Run the whole suite in one invocation, streaming the runner's native -# sqllogictest report. Any failed assertion exits non-zero and fails the job. -echo "Running suite (worker: $VGI_EXPLAIN_WORKER) ..." -"$HAYBARN_UNITTEST" "test/sql/*" +# Run the whole suite in one invocation, capturing the runner's native +# sqllogictest report so we can both stream it AND guard against a silent skip. +# +# IMPORTANT: the DuckDB/Haybarn sqllogictest runner SKIPS (not fails, exit 0) a +# test whose error message matches a built-in network-error allowlist that +# includes the substring "HTTP". So a broken HTTP transport would otherwise show +# "All tests were skipped" and the job would go GREEN having run nothing — a +# fake pass. We detect that and fail explicitly. A real run prints +# "All tests passed (N assertions ...)". +echo "Running suite (transport: $TRANSPORT, worker: $VGI_EXPLAIN_WORKER) ..." +RUN_LOG="$STAGE/run.log" +set +e +"$HAYBARN_UNITTEST" "test/sql/*" 2>&1 | tee "$RUN_LOG" +RUN_RC="${PIPESTATUS[0]}" +set -e + +if [ "$RUN_RC" -ne 0 ]; then + echo "ERROR: suite failed (transport: $TRANSPORT, rc=$RUN_RC)" >&2 + exit "$RUN_RC" +fi + +# Guard against the silent-skip fake-pass (see comment above). If every test was +# skipped — and none ran — treat it as a failure for this transport, surfacing +# the skip reason the runner reported. +if grep -q 'All tests were skipped' "$RUN_LOG"; then + echo "ERROR: every test was SKIPPED on transport '$TRANSPORT' (the runner's" >&2 + echo " built-in network-error skip swallowed the real error). This is" >&2 + echo " NOT a pass. Skip reason reported by the runner:" >&2 + grep -A3 'Skipped tests for the following reasons' "$RUN_LOG" >&2 || true + exit 1 +fi