From c59e0b9350c52007e677e3b2febb7896c0964314 Mon Sep 17 00:00:00 2001 From: "Austin S. Lin" Date: Mon, 25 May 2026 07:07:57 -0400 Subject: [PATCH] download_model.sh: prefer huggingface CLI when available The plain curl path 400s on the ~430 GB PRO bundle because Hugging Face's xet bridge rejects range-less GETs above a certain size. The official 'hf' (or 'huggingface-cli') client is xet-bridge native and already handles the chunked / ranged retrieval the bridge requires. Pick it up when on PATH; otherwise fall through to the existing curl flow unchanged. Token is passed via HF_TOKEN env rather than --token so it does not appear in 'ps' output of the child process. --- download_model.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/download_model.sh b/download_model.sh index d8114e9c..e5641573 100755 --- a/download_model.sh +++ b/download_model.sh @@ -165,6 +165,22 @@ download_one() { echo "from https://huggingface.co/$REPO" echo "If the download stops, run the same command again to resume it." + # Prefer the Hugging Face CLI when available: it is xet-bridge native + # and resumable. The plain curl path below 400s on some large files + # (e.g. the ~430 GB PRO bundle) because Hugging Face's xet bridge + # rejects range-less GETs above a certain size. + HF_BIN=$(command -v hf 2>/dev/null || command -v huggingface-cli 2>/dev/null || true) + if [ -n "$HF_BIN" ]; then + # Pass the token via HF_TOKEN env rather than --token so it does + # not appear in `ps` output of the child process. + if [ -n "$TOKEN" ]; then + HF_TOKEN="$TOKEN" "$HF_BIN" download "$REPO" "$file" --local-dir "$OUT_DIR" + else + "$HF_BIN" download "$REPO" "$file" --local-dir "$OUT_DIR" + fi + return + fi + if [ -n "$TOKEN" ]; then curl -fL --progress-meter -C - -H "Authorization: Bearer $TOKEN" -o "$part" "$url" else