From e095851fb78aef20e56279e72ad3fb6b45b2a5b0 Mon Sep 17 00:00:00 2001 From: berkkirik Date: Fri, 24 Apr 2026 10:00:42 +0300 Subject: [PATCH] feat: allow pinning HuggingFace revision for default checkpoint download MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ensure_default_checkpoint called snapshot_download with no revision=, silently floating to whatever commit tops openai/privacy-filter at download time. Two users running the same version of opf at different times can get different weights; there is no escape hatch for users in regulated environments who need to pin. Plumb an optional revision through ensure_default_checkpoint: - Module constant DEFAULT_HF_MODEL_REVISION (currently None) that maintainers can set to a specific commit SHA to fully pin and silence bandit B615. - Environment variable OPF_HF_REVISION so downstream users can pin without forking. - Resolved revision is included in the "Downloading from ..." stderr message so users can see what they are getting. Default behavior is unchanged (revision=None → floats on main). Verified with mocked snapshot_download: - OPF_HF_REVISION=abc123def456 → revision='abc123def456' - unset → revision=None (same as today) - empty string → revision=None (empty treated as unset) - OPF_HF_REVISION=main → revision='main' (user pin to branch) --- opf/_common/checkpoint_download.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/opf/_common/checkpoint_download.py b/opf/_common/checkpoint_download.py index 743f889..ace1968 100644 --- a/opf/_common/checkpoint_download.py +++ b/opf/_common/checkpoint_download.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from pathlib import Path import shutil import sys @@ -9,6 +10,8 @@ DEFAULT_HF_MODEL_REPO: Final[str] = "openai/privacy-filter" +DEFAULT_HF_MODEL_REVISION: Final[str | None] = None +HF_MODEL_REVISION_ENV_VAR: Final[str] = "OPF_HF_REVISION" def _checkpoint_override_message() -> str: @@ -104,17 +107,20 @@ def ensure_default_checkpoint() -> str: f"{_checkpoint_override_message()}." ) from exc + revision = os.environ.get(HF_MODEL_REVISION_ENV_VAR) or DEFAULT_HF_MODEL_REVISION try: + revision_suffix = f" (revision={revision!r})" if revision else "" print( "Default OPF checkpoint not found at " f"{target}. Downloading from HuggingFace repo " - f"{DEFAULT_HF_MODEL_REPO!r} to {target}.", + f"{DEFAULT_HF_MODEL_REPO!r}{revision_suffix} to {target}.", file=sys.stderr, flush=True, ) try: snapshot_download( repo_id=DEFAULT_HF_MODEL_REPO, + revision=revision, local_dir=str(target), tqdm_class=_build_download_progress_class(), allow_patterns=["original/*"],