From e4aedcb49b768154402f1010e15b17867fe5c185 Mon Sep 17 00:00:00 2001 From: Jason Robert Date: Tue, 5 May 2026 19:14:10 -0400 Subject: [PATCH 1/2] feat(registry): tag-based versioning with # ref syntax and CDN-bypass - New ref grammar: [@][#] (was @@version) - Auto-discover versions from git tags; semver-sort with v-prefix strip - Fall back to default branch HEAD when no tags exist - Any tag/branch/SHA can be pinned via #ref - Drop versions: field from index.yaml (silently ignored for back-compat) - Cache by commit SHA (sha[:12]); mutable refs re-resolve fresh - Atomic cache writes (tempfile.mkdtemp + os.replace) - Index loads always resolve ref->SHA, fetching at //index.yaml to bypass raw.githubusercontent.com Fastly CDN - registry list/show display 'Latest tags:' footer - Path registries hard-error on non-empty refs - Empty # and multiple @/# are hard errors Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 5 +- docs/cli-reference.md | 31 +- docs/design/registry.md | 108 +++-- src/conductor/cli/app.py | 12 +- src/conductor/cli/registry.py | 65 ++- src/conductor/registry/cache.py | 138 ++++--- src/conductor/registry/github.py | 91 ++++- src/conductor/registry/index.py | 78 ++-- src/conductor/registry/resolver.py | 66 +++- src/conductor/registry/version_resolver.py | 85 ++++ tests/test_cli/test_registry_commands.py | 31 +- tests/test_registry/test_cache.py | 394 ++++++++++++++----- tests/test_registry/test_github.py | 126 ++++++ tests/test_registry/test_index.py | 194 +++++---- tests/test_registry/test_integration.py | 86 ++-- tests/test_registry/test_resolver.py | 179 +++++++-- tests/test_registry/test_version_resolver.py | 139 +++++++ 17 files changed, 1377 insertions(+), 451 deletions(-) create mode 100644 src/conductor/registry/version_resolver.py create mode 100644 tests/test_registry/test_version_resolver.py diff --git a/README.md b/README.md index 8f089fc..5e3b659 100644 --- a/README.md +++ b/README.md @@ -231,8 +231,9 @@ conductor registry add official myorg/conductor-workflows --default conductor registry list official # Run a workflow from the registry -conductor run qa-bot # latest from default registry -conductor run qa-bot@official@1.2.3 # specific version +conductor run qa-bot # latest from default registry +conductor run 'qa-bot@official#v1.2.3' # specific tag (quote the #) +conductor run 'qa-bot@official#main' # branch HEAD (re-resolved on fetch) ``` See [docs/design/registry.md](docs/design/registry.md) for the full design. diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 7d67aab..5db4895 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -224,12 +224,12 @@ conductor registry [OPTIONS] | Subcommand | Description | |------------|-------------| -| `list [NAME]` | List registries, or list workflows in a specific registry | +| `list [NAME]` | List configured registries, or list workflows in a specific registry. For GitHub registries, the per-registry listing also prints a "Latest tags:" footer with up to 5 newest tags. | | `add ` | Add a new registry (GitHub `owner/repo` or local path) | | `remove ` | Remove a registry | | `set-default ` | Set the default registry | -| `update [NAME]` | Update cached registry index (all or specific) | -| `show ` | Show registry details and status | +| `update [NAME]` | Refresh the cached index for one or all registries. For GitHub registries, the index is re-fetched via a SHA-pinned raw URL that bypasses Fastly's CDN, so updates always reflect the current state of the registry repo. | +| `show ` | Show details for a single configured registry: type, source, default status, and (for GitHub registries) a "Latest tags:" footer listing up to 5 newest tags discovered on the registry repo. Use `list ` to inspect the workflows it contains. | ### Options @@ -267,19 +267,34 @@ conductor registry remove local ### Running Workflows from a Registry -Once a registry is configured, `conductor run` accepts short workflow names: +Once a registry is configured, `conductor run` accepts short workflow names +of the form `[@][#]`. `@` selects the registry; +`#` selects a git ref (tag, branch, or commit SHA). Quote the reference in +shell commands so `#` isn't treated as a comment. ```bash -# Run from default registry (latest version) +# Run from default registry (latest tag, or default-branch HEAD if no tags) conductor run qa-bot -# Run from a specific registry +# Run from a specific registry (latest) conductor run qa-bot@official -# Run a specific version -conductor run qa-bot@official@1.2.3 +# Pin a specific tag +conductor run 'qa-bot@official#v1.2.3' + +# Pin the default-branch HEAD or any other branch +conductor run 'qa-bot@official#main' + +# Pin a specific commit SHA +conductor run 'qa-bot@official#a1b2c3d' + +# Pin a tag in the default registry (empty registry segment) +conductor run 'qa-bot@#v1.2.3' ``` +Path-type registries do not support `#` and will reject any reference +that includes one. + See [design/registry.md](./design/registry.md) for the full design. ## Environment Variables diff --git a/docs/design/registry.md b/docs/design/registry.md index 614af58..b0b9744 100644 --- a/docs/design/registry.md +++ b/docs/design/registry.md @@ -92,16 +92,30 @@ The `type` flag is optional; `add` infers `github` if `` matches ### Reference syntax ``` -[@][@] +[@][#] ``` +`@` separates the workflow name from the registry name. `#` separates the +ref (a git tag, branch name, or commit SHA) from the rest. + Resolution rules, in order: 1. If the argument exists as a file on disk, treat it as a local path. 2. Otherwise parse as a registry reference. 3. Missing `@` → use the configured default registry. -4. Missing `@` → use `latest` (highest version listed in the - registry index). +4. An empty registry between `@` and `#` (e.g. `name@#ref`) is allowed and + means "use the default registry at this ref". +5. Missing `#` → use `latest`. `latest` resolves to the newest + semver-sorted git tag (with a leading `v` stripped for parsing). If the + registry repo has no tags, `latest` falls back to the default branch HEAD. +6. An empty ref after `#` (e.g. `name@reg#`) is a hard error. +7. Multiple `@` or multiple `#` in a single reference are hard errors. +8. Path-type registries do not support `#`. Passing + `name@local#anything` against a path registry is a hard error. + +Note: `#` is significant to most shells. Quote registry references in +shell commands (`conductor run 'qa-bot@team#v1.2.3'`) and avoid spaces +around the `#`. Examples: @@ -109,8 +123,10 @@ Examples: conductor run ./my-workflow.yaml # local file (unchanged) conductor run qa-bot # latest from default registry conductor run qa-bot@team # latest from `team` registry -conductor run qa-bot@team@1.2.3 # exact version from `team` -conductor run qa-bot@@1.2.3 # exact version from default registry +conductor run 'qa-bot@team#v1.2.3' # tag v1.2.3 from `team` +conductor run 'qa-bot@#v1.2.3' # tag v1.2.3 from default registry +conductor run 'qa-bot@team#main' # default-branch HEAD of `team` +conductor run 'qa-bot@team#a1b2c3d' # specific commit SHA ``` ### Registry index @@ -122,35 +138,63 @@ workflows: qa-bot: description: "Simple Q&A workflow" path: workflows/qa-bot.yaml # path relative to registry root - versions: ["1.0.0", "1.1.0", "2.0.0"] code-review: description: "Multi-agent code review" path: workflows/code-review.yaml - versions: ["0.3.0"] ``` -For GitHub registries, versions correspond to git tags on the registry repo. -For local registries, the maintainer maintains the version list directly. - -The index is the single source of truth for what workflows exist and what -versions are available. Conductor does not auto-discover YAML files in a -registry — the maintainer curates the index. +The index is the single source of truth for what workflows exist and where +they live in the repo. Available versions are **not** listed in the index — +for GitHub registries they are auto-discovered from the registry repo's git +tags; for path registries no versioning exists. Conductor does not +auto-discover YAML files in a registry — the maintainer curates the index. + +### Versioning + +Versioning is automatic and tag-driven for GitHub registries: + +- **Auto-discovery**: available versions are the registry repo's git tags, + fetched on demand via the GitHub API. Maintainers do not list versions in + `index.yaml`. +- **`latest` resolution**: `latest` resolves to the newest semver-sorted tag + (a leading `v` is stripped before parsing, so `v1.2.3` and `1.2.3` sort + identically). If the repo has no tags, `latest` falls back to the + default-branch HEAD. +- **Flexible refs**: any tag, branch, or commit SHA can be pinned via + `#`. Branch refs are re-resolved to their current commit SHA at + fetch time, so a branch ref always refers to the latest commit on that + branch when a fresh fetch is performed. +- **SHA-based caching**: workflows are cached by the resolved commit SHA + (`////`). When a branch advances, the + cache key changes automatically — no manual invalidation needed for the + next fresh fetch. +- **CDN bypass**: index fetches resolve the ref to a commit SHA via the + GitHub API, then download from + `raw.githubusercontent.com////index.yaml`. The unique + per-SHA URL bypasses Fastly's CDN cache, so you always see the current + index for a given ref without needing a `--force` flag. +- **Path registries**: do not support refs at all. Local registries are + always read directly from disk. ### Caching Fetched workflows are cached at: ``` -~/.conductor/cache/registries//// +~/.conductor/cache/registries//// ``` -- Cache is keyed by `(registry, workflow, version)`. -- Explicit versions are immutable: once cached, never re-fetched. -- `latest` is re-resolved on `conductor registry update`. Each resolved - version is cached in its own directory. +- Cache is keyed by `(registry, workflow, resolved-commit-sha)`. +- A given commit SHA is immutable, so cached entries are never re-fetched + for the same SHA. Branch refs re-resolve to a (possibly new) SHA on each + fresh fetch, which transparently invalidates the cache. +- Workflow files are first downloaded into a temp directory and then renamed + atomically into the final cache path, so a partial fetch never leaves a + half-populated entry visible to other commands. - Index files are cached separately at `~/.conductor/cache/registries//index.` and refreshed - on `update`. + on `update`. Index fetches always go through a SHA-pinned raw URL to + bypass the CDN. This produces a stable on-disk path for every registry-fetched workflow, which is required by: @@ -168,17 +212,17 @@ operation. Registry maintainers should keep a workflow and its assets in the same directory. For GitHub registries, sibling fetch uses the Git Trees API to enumerate the -directory and `raw.githubusercontent.com` to download files at the pinned -ref (tag). +directory and SHA-pinned `raw.githubusercontent.com` URLs to download files +at the resolved commit SHA. ### Run / resume / validate These commands accept either a local path or a registry reference: ``` -conductor run qa-bot@team@1.2.3 --input question="What is X?" -conductor resume qa-bot@team@1.2.3 -conductor validate qa-bot@team@1.2.3 +conductor run 'qa-bot@team#v1.2.3' --input question="What is X?" +conductor resume 'qa-bot@team#v1.2.3' +conductor validate 'qa-bot@team#v1.2.3' ``` The resolver runs first, returns a concrete `Path` to the cached file, and @@ -203,10 +247,10 @@ recent, undocumented in many places, and supersede-able by a 5-line | Module | Responsibility | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `config.py` | Pydantic models for `registries.toml`. Atomic load/save. Handles missing-file case (returns empty config). | -| `resolver.py` | Parses `name[@registry][@version]`. Decides file-vs-ref. Returns a `ResolvedRef` with registry name, workflow name, version, and the registry config. | -| `index.py` | Loads and parses `index.yaml`/`index.json`. Validates structure. Resolves `latest` to a concrete version. Backed by either the local FS or `github.py`. | -| `cache.py` | Manages `~/.conductor/cache/registries/`. `get_or_fetch(ref) -> Path`. Idempotent. Fetches sibling files. Knows when to refetch (`latest`) vs. reuse (explicit version). | -| `github.py` | Public-only GitHub helpers: fetch a file at a ref via raw URL, list tags via the REST API for `latest`, list directory contents via Git Trees API for sibling enumeration. Uses `httpx`, no auth. | +| `resolver.py` | Parses `name[@registry][#ref]`. Decides file-vs-ref. Returns a `ResolvedRef` with registry name, workflow name, ref, and the registry config. Rejects multiple `@`/`#`, empty `#`, and `#ref` against path registries. | +| `index.py` | Loads and parses `index.yaml`/`index.json`. Validates structure. Resolves `latest` to a concrete tag (or default-branch HEAD if no tags). Backed by either the local FS or `github.py`. | +| `cache.py` | Manages `~/.conductor/cache/registries/`. `get_or_fetch(ref) -> Path`. Idempotent. Fetches sibling files. Cache is keyed by resolved commit SHA; writes are staged in a temp dir and renamed atomically. | +| `github.py` | Public-only GitHub helpers: resolve a ref to a commit SHA via the GitHub API, fetch files at a SHA via SHA-pinned raw URLs (bypassing the CDN), list tags via the REST API for `latest`, list directory contents via Git Trees API for sibling enumeration. Uses `httpx`, no auth. | ### CLI: `src/conductor/cli/registry.py` @@ -244,10 +288,10 @@ file of the same name. | Decision | Choice | Why | | ----------------------------------------- | ------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | | Named registries vs always-inline source | Named, configured once | Mirrors npm/cargo. Short refs in `run` commands. Default registry makes the common case zero-config. | -| Versioning | Explicit, npm-style | Reproducibility. Lockfile-friendly later. Matches existing GitHub tag conventions. | -| Local-registry layout | Directory + `index.yaml` | Consistent with GitHub registries. Maintainer controls what's exposed and at what versions. | -| Caching strategy | Local cache, refresh on `registry update` or new version | Avoids per-run network. Stable on-disk paths needed for `!file` and checkpoints. | -| Reference syntax | `name@registry@version` | Visually unambiguous. `@` parses cleanly. Supports either-or-both omissions. | +| Versioning | Auto-discovered from git tags; pin any tag/branch/SHA via `#ref` | Reproducibility without forcing maintainers to maintain a parallel version list. `latest` follows newest semver tag, falling back to default-branch HEAD. Branches and SHAs are first-class refs. | +| Local-registry layout | Directory + `index.yaml` | Consistent with GitHub registries. Maintainer controls what's exposed. Local registries do not support refs. | +| Caching strategy | Local cache keyed by resolved commit SHA, atomic writes | Avoids per-run network. SHA-based keys make branch refs self-invalidate on a fresh fetch. SHA-pinned raw URLs bypass the CDN, so no `--force` flag is needed. | +| Reference syntax | `name@registry#ref` | Visually unambiguous: `@` selects the registry, `#` selects a git ref (tag, branch, or SHA). Both segments are independently optional. | | Publish / publish validation | Dropped | Distribution is `git push` + tag. Validation belongs in user CI, not the CLI. | | Authenticated/private registries | Out of scope v1 | Public raw URLs cover the common case. Token support can come later via a registry config field. | | SemVer ranges | Out of scope v1 | Adds resolver complexity for marginal benefit until ecosystems exist. | diff --git a/src/conductor/cli/app.py b/src/conductor/cli/app.py index fd80ef1..73f3f03 100644 --- a/src/conductor/cli/app.py +++ b/src/conductor/cli/app.py @@ -370,7 +370,7 @@ def run( registry_name=ref.registry_name, registry_entry=ref.registry_entry, workflow_name=ref.workflow, - version=ref.version, + ref=ref.ref, ) except RegistryError as e: print_error(e) @@ -518,7 +518,7 @@ def validate( registry_name=ref.registry_name, registry_entry=ref.registry_entry, workflow_name=ref.workflow, - version=ref.version, + ref=ref.ref, ) except RegistryError as e: print_error(e) @@ -577,7 +577,7 @@ def show( registry_name=ref.registry_name, registry_entry=ref.registry_entry, workflow_name=ref.workflow, - version=ref.version, + ref=ref.ref, ) except RegistryError as e: print_error(e) @@ -600,8 +600,8 @@ def show( if ref.kind == "registry": output_console.print(f"[bold]Registry:[/bold] {ref.registry_name}") - if ref.version: - output_console.print(f"[bold]Version:[/bold] {ref.version}") + if ref.ref: + output_console.print(f"[bold]Version:[/bold] {ref.ref}") from rich.table import Table @@ -773,7 +773,7 @@ def resume( registry_name=ref.registry_name, registry_entry=ref.registry_entry, workflow_name=ref.workflow, - version=ref.version, + ref=ref.ref, ) except RegistryError as e: print_error(e) diff --git a/src/conductor/cli/registry.py b/src/conductor/cli/registry.py index d95bbd5..3eae683 100644 --- a/src/conductor/cli/registry.py +++ b/src/conductor/cli/registry.py @@ -19,7 +19,11 @@ save_config, ) from conductor.registry.errors import RegistryError +from conductor.registry.github import list_tags, parse_github_source from conductor.registry.index import load_index +from conductor.registry.version_resolver import _sort_tags + +_MAX_DISPLAY_TAGS = 5 registry_app = typer.Typer( name="registry", @@ -83,16 +87,44 @@ def _list_registry_workflows(name: str) -> None: table = Table(title=f"Workflows in '{name}'") table.add_column("Name", style="cyan") table.add_column("Description") - table.add_column("Versions", style="green") for wf_name, info in index.workflows.items(): - versions = ", ".join(info.versions) if info.versions else "-" - table.add_row(wf_name, info.description or "-", versions) + table.add_row(wf_name, info.description or "-") output_console.print(table) + + tags_line = _format_latest_tags(entry) + if tags_line is not None: + output_console.print(f"\nLatest tags: {tags_line}") + output_console.print("\n[dim]Use 'conductor show ' to see inputs and details.[/dim]") +def _format_latest_tags(entry: RegistryEntry) -> str | None: + """Return a formatted "Latest tags" string for a registry, or None for path registries. + + For github registries, fetches tags via the API and returns up to + ``_MAX_DISPLAY_TAGS`` newest tags (semver-sorted). Returns "(unavailable)" + on any fetch failure and "(no tags)" if the repo has none. + """ + if entry.type != RegistryType.github: + return None + + try: + owner, repo = parse_github_source(entry.source) + tags = list_tags(owner, repo) + except Exception: + return "(unavailable)" + + if not tags: + return "(no tags)" + + sorted_tags = _sort_tags(tags) + display = sorted_tags[:_MAX_DISPLAY_TAGS] + suffix = ", ..." if len(sorted_tags) > _MAX_DISPLAY_TAGS else "" + return ", ".join(display) + suffix + + @registry_app.command() def add( name: Annotated[str, typer.Argument(help="Name for the registry.")], @@ -154,14 +186,28 @@ def set_default( raise typer.Exit(code=1) from None -@registry_app.command() +@registry_app.command( + help=( + "Refresh registry index and clear cached workflows.\n\n" + "Re-fetches the latest index from each configured registry and clears " + "locally-cached workflow files. Index fetches always bypass GitHub's CDN " + "cache (via SHA-based URLs), so this primarily clears cached workflow " + "contents pinned to mutable refs (branches)." + ), +) def update( name: Annotated[ str | None, typer.Argument(help="Registry to update (all if omitted)."), ] = None, ) -> None: - """Refresh registry index and clear cached workflows.""" + """Refresh registry index and clear cached workflows. + + Re-fetches the latest index from each configured registry and clears + locally-cached workflow files. Index fetches always bypass GitHub's CDN + cache (via SHA-based URLs), so this primarily clears cached workflow + contents pinned to mutable refs (branches). + """ try: config = load_config() @@ -218,11 +264,14 @@ def _show_registry(name: str, entry: RegistryEntry) -> None: table = Table(title="Workflows") table.add_column("Name", style="cyan") table.add_column("Description") - table.add_column("Versions", style="green") for wf_name, info in index.workflows.items(): - versions = ", ".join(info.versions) if info.versions else "-" - table.add_row(wf_name, info.description or "-", versions) + table.add_row(wf_name, info.description or "-") output_console.print(table) + + tags_line = _format_latest_tags(entry) + if tags_line is not None: + output_console.print(f"\nLatest tags: {tags_line}") + output_console.print("\n[dim]Use 'conductor show ' to see inputs and details.[/dim]") diff --git a/src/conductor/registry/cache.py b/src/conductor/registry/cache.py index 5803a36..4ba2420 100644 --- a/src/conductor/registry/cache.py +++ b/src/conductor/registry/cache.py @@ -10,19 +10,21 @@ Cache layout:: - /cache/registries//// + /cache/registries//// """ from __future__ import annotations import os import shutil +import tempfile from pathlib import Path from conductor.registry.config import RegistryEntry, RegistryType from conductor.registry.errors import RegistryError from conductor.registry.github import fetch_file, list_directory, parse_github_source -from conductor.registry.index import load_index, resolve_latest +from conductor.registry.index import load_index +from conductor.registry.version_resolver import materialize_to_sha, resolve_ref # fetch_file returns bytes; list_directory returns filenames (not full paths) @@ -45,32 +47,33 @@ def get_cache_base() -> Path: def get_cached_workflow_path( registry_name: str, workflow_name: str, - version: str, + sha: str, ) -> Path | None: """Return the cached workflow YAML path if it exists, else ``None``. Looks for the workflow at:: - //// + //// The filename is derived from a glob of ``*.yaml`` / ``*.yml`` files in the - version directory. Returns the first YAML file found (there should be - exactly one workflow file per cached version directory). + SHA directory. Returns the first YAML file found (there should be + exactly one workflow file per cached SHA directory). Args: registry_name: Name of the registry. workflow_name: Name of the workflow. - version: Resolved version string. + sha: Full immutable commit SHA. The first 12 chars are used as the + on-disk directory name. Returns: ``Path`` to the cached workflow YAML, or ``None`` when not cached. """ - version_dir = get_cache_base() / registry_name / workflow_name / version - if not version_dir.is_dir(): + sha_dir = get_cache_base() / registry_name / workflow_name / sha[:12] + if not sha_dir.is_dir(): return None for ext in ("*.yaml", "*.yml"): - matches = list(version_dir.glob(ext)) + matches = list(sha_dir.glob(ext)) if matches: return matches[0] return None @@ -85,31 +88,30 @@ def fetch_workflow( registry_name: str, registry_entry: RegistryEntry, workflow_name: str, - version: str | None = None, + ref: str | None = None, ) -> Path: """Fetch a workflow from a registry and cache it locally. - Steps: + For **path** registries, reads directly from the source directory (no + caching). The ``ref`` argument must be ``None`` — :func:`resolve_ref` + raises if a ref is provided for a path registry. - 1. Load the registry index. - 2. Resolve version (if ``None``, resolve ``latest``). - 3. Check cache — return cached path if explicit version already present. - 4. Fetch the workflow file **and** sibling files in the same directory. - 5. Write everything to the cache directory. - 6. Return the ``Path`` to the cached workflow YAML. + For **github** registries: - For **GitHub** registries files are fetched at the git tag matching the - version via :func:`~conductor.registry.github.fetch_file` and siblings - are enumerated with :func:`~conductor.registry.github.list_directory`. - - For **path** registries files are copied from the source directory to - guarantee a stable snapshot even when the source changes. + 1. Resolve ``ref`` (or "latest") to a concrete git ref name. + 2. Materialize that ref to an immutable commit SHA. + 3. Return the cached workflow path if already present. + 4. Otherwise, load the registry index **at that SHA** (so the index and + workflow file are guaranteed to come from the same commit), look up + the workflow path, fetch the workflow + sibling files into a temp + directory, and atomically rename it into the cache. Args: registry_name: Configured registry name. registry_entry: The registry definition (type + source). workflow_name: Workflow key as listed in the registry index. - version: Explicit version string, or ``None`` for ``latest``. + ref: Explicit git ref (tag, branch, or SHA), or ``None`` for the + registry's default (latest tag, falling back to default branch). Returns: Path to the cached workflow YAML file. @@ -117,19 +119,19 @@ def fetch_workflow( Raises: RegistryError: On fetch failure, missing workflow, or I/O errors. """ - # 1. Load the index - index = load_index(registry_entry) - - # 2. Look up workflow metadata from the index - if workflow_name not in index.workflows: - raise RegistryError( - f"Workflow '{workflow_name}' not found in registry '{registry_name}'", - suggestion=f"Run 'conductor registry list {registry_name}' to see available workflows.", - ) - workflow_info = index.workflows[workflow_name] - - # 3. For path registries, read directly from source (no caching, no versioning) + # Path registries: read directly from source. resolve_ref raises if a + # ref was supplied, propagating a clear error to the caller. if registry_entry.type == RegistryType.path: + resolve_ref(registry_entry, ref) + index = load_index(registry_entry) + if workflow_name not in index.workflows: + raise RegistryError( + f"Workflow '{workflow_name}' not found in registry '{registry_name}'", + suggestion=( + f"Run 'conductor registry list {registry_name}' to see available workflows." + ), + ) + workflow_info = index.workflows[workflow_name] source_path = Path(registry_entry.source) / workflow_info.path if not source_path.exists(): raise RegistryError( @@ -139,33 +141,49 @@ def fetch_workflow( ) return source_path - # 4. For GitHub registries, resolve version - if version is None: - version = resolve_latest(index, workflow_name) + # GitHub registry: resolve ref → SHA, then check cache. + resolved_ref = resolve_ref(registry_entry, ref) + sha = materialize_to_sha(registry_entry, resolved_ref) - # 5. Check cache (explicit versions are immutable) - cached = get_cached_workflow_path(registry_name, workflow_name, version) + cached = get_cached_workflow_path(registry_name, workflow_name, sha) if cached is not None: return cached - # 6. Prepare cache directory - version_dir = get_cache_base() / registry_name / workflow_name / version - version_dir.mkdir(parents=True, exist_ok=True) + # Load the index pinned to the SHA so the workflow path comes from the + # exact commit we're about to fetch. + index = load_index(registry_entry, ref=sha) + if workflow_name not in index.workflows: + raise RegistryError( + f"Workflow '{workflow_name}' not found in registry '{registry_name}'", + suggestion=f"Run 'conductor registry list {registry_name}' to see available workflows.", + ) + workflow_info = index.workflows[workflow_name] + + # Atomically write to cache: fetch into a tmp dir under the workflow + # parent (so the rename is intra-filesystem), then os.replace(). + parent = get_cache_base() / registry_name / workflow_name + parent.mkdir(parents=True, exist_ok=True) + final_dir = parent / sha[:12] - # 7. Fetch from GitHub + tmp_dir = Path(tempfile.mkdtemp(prefix=".tmp-", dir=parent)) try: - _fetch_github(registry_entry, workflow_info.path, version, version_dir) - except RegistryError: + _fetch_github(registry_entry, workflow_info.path, sha, tmp_dir) + try: + os.replace(tmp_dir, final_dir) + except OSError: + # Likely a race: another process populated final_dir first. If it + # exists, drop our tmp work and use the cached entry. Otherwise + # re-raise. + if final_dir.exists(): + shutil.rmtree(tmp_dir, ignore_errors=True) + else: + raise + except Exception: + shutil.rmtree(tmp_dir, ignore_errors=True) raise - except Exception as exc: - raise RegistryError( - f"Failed to fetch workflow '{workflow_name}' from registry '{registry_name}': {exc}", - suggestion="Check your network connection and registry configuration.", - ) from exc - # 8. Return the cached workflow path workflow_filename = Path(workflow_info.path).name - result = version_dir / workflow_filename + result = final_dir / workflow_filename if not result.exists(): raise RegistryError( f"Workflow file '{workflow_filename}' not found in cache after fetch", @@ -182,7 +200,7 @@ def fetch_workflow( def _fetch_github( registry_entry: RegistryEntry, workflow_path: str, - version: str, + sha: str, dest_dir: Path, ) -> None: """Fetch a workflow and its sibling files from a GitHub registry. @@ -190,7 +208,7 @@ def _fetch_github( Args: registry_entry: Registry entry with ``source`` as ``owner/repo``. workflow_path: Relative path to the workflow YAML in the repo. - version: Git ref (tag) to fetch at. + sha: Immutable commit SHA to fetch at. dest_dir: Local directory to write files into. """ owner, repo = parse_github_source(registry_entry.source) @@ -199,12 +217,12 @@ def _fetch_github( workflow_filename = workflow_p.name # Fetch the workflow file itself (returns bytes) - content = fetch_file(owner, repo, workflow_path, ref=version) + content = fetch_file(owner, repo, workflow_path, ref=sha) (dest_dir / workflow_filename).write_bytes(content) # Fetch sibling files — list_directory returns filenames (not full paths) try: - sibling_names = list_directory(owner, repo, parent_dir, ref=version) + sibling_names = list_directory(owner, repo, parent_dir, ref=sha) except Exception: # If listing fails, we already have the workflow file return @@ -214,7 +232,7 @@ def _fetch_github( continue # already fetched sibling_repo_path = f"{parent_dir}/{name}" if parent_dir != "." else name try: - sibling_content = fetch_file(owner, repo, sibling_repo_path, ref=version) + sibling_content = fetch_file(owner, repo, sibling_repo_path, ref=sha) (dest_dir / name).write_bytes(sibling_content) except Exception: # Best-effort for siblings — don't fail the whole fetch diff --git a/src/conductor/registry/github.py b/src/conductor/registry/github.py index 9c56237..5d8b8c4 100644 --- a/src/conductor/registry/github.py +++ b/src/conductor/registry/github.py @@ -119,12 +119,15 @@ def fetch_file_text(owner: str, repo: str, path: str, ref: str = "main") -> str: return fetch_file(owner, repo, path, ref).decode("utf-8") +_MAX_TAGS = 1000 + + def list_tags(owner: str, repo: str) -> list[str]: """List all git tags for a repository, newest first. Uses GET /repos/{owner}/{repo}/tags from the GitHub REST API. - Returns just the tag names as strings. - No pagination in v1 — returns first page (up to 100). + Follows ``Link: <...>; rel="next"`` headers to paginate, capping at + ``_MAX_TAGS`` (1000) tags total to prevent runaway loops. Args: owner: Repository owner. @@ -136,18 +139,92 @@ def list_tags(owner: str, repo: str) -> list[str]: Raises: RegistryError: If the API request fails. """ - url = f"{GITHUB_API_BASE}/repos/{owner}/{repo}/tags" + url: str | None = f"{GITHUB_API_BASE}/repos/{owner}/{repo}/tags?per_page=100" + tags: list[str] = [] + while url is not None and len(tags) < _MAX_TAGS: + try: + response = httpx.get( + url, + headers=_build_headers(api=True), + timeout=DEFAULT_TIMEOUT, + follow_redirects=True, + ) + except httpx.TimeoutException as exc: + raise RegistryError(f"Timeout listing tags for {owner}/{repo}") from exc + except httpx.HTTPError as exc: + raise RegistryError(f"HTTP error listing tags for {owner}/{repo}: {exc}") from exc + + _raise_for_status(response, context=f"Listing tags for {owner}/{repo}") + tags.extend(tag["name"] for tag in response.json()) + + next_link = response.links.get("next") + url = next_link["url"] if next_link else None + + return tags[:_MAX_TAGS] + + +def get_default_branch(owner: str, repo: str) -> str: + """Get the default branch name for a GitHub repository. + + Uses GET /repos/{owner}/{repo} from the GitHub REST API and returns + the ``default_branch`` field. + + Args: + owner: Repository owner. + repo: Repository name. + + Returns: + Default branch name (e.g. "main" or "master"). + + Raises: + RegistryError: If the request fails or the repo is not found. + """ + url = f"{GITHUB_API_BASE}/repos/{owner}/{repo}" + try: + response = httpx.get( + url, headers=_build_headers(api=True), timeout=DEFAULT_TIMEOUT, follow_redirects=True + ) + except httpx.TimeoutException as exc: + raise RegistryError(f"Timeout fetching default branch for {owner}/{repo}") from exc + except httpx.HTTPError as exc: + raise RegistryError( + f"HTTP error fetching default branch for {owner}/{repo}: {exc}" + ) from exc + + _raise_for_status(response, context=f"Fetching default branch for {owner}/{repo}") + return response.json()["default_branch"] + + +def resolve_ref_to_sha(owner: str, repo: str, ref: str) -> str: + """Resolve a git ref (branch, tag, or short SHA) to a full commit SHA. + + Uses GET /repos/{owner}/{repo}/commits/{ref} which accepts any kind of + ref — branch names, tag names, or full/short SHAs — and returns the + commit metadata. We return the full ``sha`` field. + + Args: + owner: Repository owner. + repo: Repository name. + ref: A branch name, tag name, or commit SHA (full or short). + + Returns: + The full 40-character commit SHA. + + Raises: + RegistryError: If the ref cannot be resolved or request fails. + """ + url = f"{GITHUB_API_BASE}/repos/{owner}/{repo}/commits/{ref}" try: response = httpx.get( url, headers=_build_headers(api=True), timeout=DEFAULT_TIMEOUT, follow_redirects=True ) except httpx.TimeoutException as exc: - raise RegistryError(f"Timeout listing tags for {owner}/{repo}") from exc + raise RegistryError(f"Timeout resolving ref {ref} for {owner}/{repo}") from exc except httpx.HTTPError as exc: - raise RegistryError(f"HTTP error listing tags for {owner}/{repo}: {exc}") from exc + raise RegistryError(f"HTTP error resolving ref {ref} for {owner}/{repo}: {exc}") from exc - _raise_for_status(response, context=f"Listing tags for {owner}/{repo}") - return [tag["name"] for tag in response.json()] + _raise_for_status(response, context=f"Resolving ref {ref} for {owner}/{repo}") + return response.json()["sha"] def list_directory(owner: str, repo: str, path: str, ref: str = "main") -> list[str]: diff --git a/src/conductor/registry/index.py b/src/conductor/registry/index.py index 2539cae..e4a3553 100644 --- a/src/conductor/registry/index.py +++ b/src/conductor/registry/index.py @@ -15,7 +15,6 @@ from conductor.registry.config import RegistryEntry, RegistryType from conductor.registry.errors import RegistryError -_GITHUB_BRANCHES = ("main", "master") _INDEX_FILENAMES = ("index.yaml", "index.json") @@ -30,7 +29,6 @@ class WorkflowInfo(BaseModel): description: str = "" path: str """Relative path from registry root to the workflow YAML.""" - versions: list[str] = [] class RegistryIndex(BaseModel): @@ -44,17 +42,23 @@ class RegistryIndex(BaseModel): # --------------------------------------------------------------------------- -def load_index(entry: RegistryEntry) -> RegistryIndex: +def load_index(entry: RegistryEntry, ref: str | None = None) -> RegistryIndex: """Load and parse the index for a registry. For path registries: reads ``index.yaml`` or ``index.json`` from the - source directory. + source directory. The ``ref`` argument is ignored for path registries + since they have no concept of refs. - For GitHub registries: fetches from ``raw.githubusercontent.com`` on the - default branch (``main``, falling back to ``master``). + For GitHub registries: resolves ``ref`` to an immutable commit SHA and + fetches the index file at that SHA. When ``ref`` is ``None`` or + ``"latest"``, the repository's default branch is queried first and then + resolved to a SHA. Pinning to a SHA bypasses Fastly's CDN cache for + ``raw.githubusercontent.com`` because each commit produces a unique URL. Args: entry: The registry entry describing the backend type and source. + ref: Optional git ref (branch, tag, or SHA) for GitHub registries. + Defaults to the repository's default branch. Returns: A parsed ``RegistryIndex``. @@ -64,29 +68,7 @@ def load_index(entry: RegistryEntry) -> RegistryIndex: """ if entry.type == RegistryType.path: return _load_path_index(entry.source) - return _load_github_index(entry.source) - - -def resolve_latest(index: RegistryIndex, workflow_name: str) -> str: - """Resolve 'latest' to the last version in the versions list. - - Args: - index: The registry index to look up. - workflow_name: Name of the workflow. - - Returns: - The last element of the workflow's versions list. - - Raises: - RegistryError: If the workflow is not found or has no versions listed. - """ - info = get_workflow_info(index, workflow_name) - if not info.versions: - raise RegistryError( - f"Workflow '{workflow_name}' has no versions listed", - suggestion="Add at least one version to the workflow entry in the registry index.", - ) - return info.versions[-1] + return _load_github_index(entry.source, ref) def get_workflow_info(index: RegistryIndex, workflow_name: str) -> WorkflowInfo: @@ -194,25 +176,41 @@ def _parse_json_file(path: Path) -> RegistryIndex: return _parse_index_data(data, str(path)) -def _load_github_index(source: str) -> RegistryIndex: - """Fetch index from a GitHub repository using the shared GitHub helpers.""" - from conductor.registry.github import fetch_file_text, parse_github_source +def _load_github_index(source: str, ref: str | None) -> RegistryIndex: + """Fetch index from a GitHub repository, pinned to an immutable SHA. + + Resolves ``ref`` to a commit SHA via the GitHub API before fetching, so + the resulting raw.githubusercontent.com URL is unique per commit and + bypasses Fastly's CDN cache. + """ + from conductor.registry.github import ( + fetch_file_text, + get_default_branch, + parse_github_source, + resolve_ref_to_sha, + ) owner, repo = parse_github_source(source) + if ref is None or ref == "latest": + branch = get_default_branch(owner, repo) + sha = resolve_ref_to_sha(owner, repo, branch) + else: + sha = resolve_ref_to_sha(owner, repo, ref) + for filename in _INDEX_FILENAMES: - for branch in _GITHUB_BRANCHES: - try: - text = fetch_file_text(owner, repo, filename, ref=branch) - except RegistryError: - continue + try: + text = fetch_file_text(owner, repo, filename, ref=sha) + except RegistryError: + continue - return _parse_github_response(text, filename, f"{source}/{branch}/{filename}") + return _parse_github_response(text, filename, f"{source}/{sha}/{filename}") + tried_label = ref if ref is not None else "default branch" raise RegistryError( f"No index.yaml or index.json found in GitHub repo '{source}' " - f"(tried branches: {', '.join(_GITHUB_BRANCHES)})", - suggestion="Ensure the repository contains an index.yaml or index.json on main or master.", + f"at ref '{tried_label}' (resolved to {sha})", + suggestion="Ensure the repository contains an index.yaml or index.json at this ref.", ) diff --git a/src/conductor/registry/resolver.py b/src/conductor/registry/resolver.py index 52f4d0c..4dcf407 100644 --- a/src/conductor/registry/resolver.py +++ b/src/conductor/registry/resolver.py @@ -1,14 +1,14 @@ """Workflow reference resolution. -Parses user-supplied workflow references (e.g. ``qa-bot@team@1.2.3``) and +Parses user-supplied workflow references (e.g. ``qa-bot@team#v1.2.3``) and determines whether an argument is a local file path or a registry reference. Resolution rules (in order): 1. If the argument exists as a file on disk, treat it as a local path. 2. If it looks like a file path (has path separators or YAML extension), treat it as a local path — even if the file doesn't exist yet. -3. Otherwise parse as a registry reference using ``name[@registry][@version]`` - syntax. +3. Otherwise parse as a registry reference using ``[@][#]`` + syntax, where ```` is a git tag, branch, or commit SHA. """ from __future__ import annotations @@ -35,7 +35,7 @@ class ResolvedRef: # For registry refs workflow: str | None = None registry_name: str | None = None - version: str | None = None # None means "latest" + ref: str | None = None # Git tag / branch / SHA. None means "latest" registry_entry: RegistryEntry | None = None @@ -45,7 +45,10 @@ def resolve_ref(ref: str) -> ResolvedRef: If *ref* is an existing file path or looks like a file path (contains path separators or has a ``.yaml``/``.yml`` extension), a **file** ref is returned. Otherwise *ref* is parsed as a registry reference using - ``name[@registry][@version]`` syntax. + ``[@][#]`` syntax, where ``@`` introduces an + optional registry name and ``#`` introduces an optional git ref (tag, + branch, or commit SHA). An empty registry segment (``name@#ref``) selects + the configured default registry. Args: ref: The raw reference string from the CLI. @@ -54,8 +57,8 @@ def resolve_ref(ref: str) -> ResolvedRef: A :class:`ResolvedRef` describing the resolved target. Raises: - RegistryError: If the reference requires a default registry but none is - configured, or if the named registry does not exist. + RegistryError: If the reference is malformed, requires a default + registry but none is configured, or names an unknown registry. """ if _looks_like_file_path(ref): return ResolvedRef(kind="file", path=Path(ref)) @@ -82,17 +85,50 @@ def _looks_like_file_path(ref: str) -> bool: return path.exists() and path.is_file() -def _parse_registry_ref(ref: str) -> ResolvedRef: - """Parse *ref* as ``name[@registry][@version]`` and resolve against config. +def _parse_registry_ref(raw: str) -> ResolvedRef: + """Parse *raw* as ``[@][#]`` and resolve against config. Raises: - RegistryError: On missing default registry or unknown registry name. + RegistryError: On malformed syntax, missing default registry, or + unknown registry name. """ - parts = ref.split("@", maxsplit=2) + # Split on '#' first — the right side (if any) is the git ref. + hash_parts = raw.split("#") + if len(hash_parts) > 2: + raise RegistryError( + "Workflow ref may contain at most one '#'", + suggestion="Use '[@][#]' (e.g. qa-bot@team#v1.0.0).", + ) + + left = hash_parts[0] + git_ref: str | None + if len(hash_parts) == 2: + git_ref = hash_parts[1] + if git_ref == "": + raise RegistryError( + "Ref cannot be empty after '#'", + suggestion="Provide a tag, branch, or commit SHA after '#' (e.g. qa-bot#v1.0.0).", + ) + else: + git_ref = None + + # Split the left side on '@' — at most one '@' is allowed. + at_parts = left.split("@") + if len(at_parts) > 2: + raise RegistryError( + "Workflow ref may contain at most one '@' " + "(use '#' for refs, e.g. name@registry#v1.0.0)", + suggestion="Use '[@][#]' syntax.", + ) + + workflow = at_parts[0] + if workflow == "": + raise RegistryError( + "Workflow name is required", + suggestion="Provide a workflow name (e.g. qa-bot, qa-bot@team#v1.0.0).", + ) - workflow = parts[0] - raw_registry: str | None = parts[1] if len(parts) >= 2 else None - version: str | None = parts[2] if len(parts) >= 3 else None + raw_registry: str | None = at_parts[1] if len(at_parts) == 2 else None config = load_config() @@ -124,6 +160,6 @@ def _parse_registry_ref(ref: str) -> ResolvedRef: kind="registry", workflow=workflow, registry_name=registry_name, - version=version, + ref=git_ref, registry_entry=config.registries[registry_name], ) diff --git a/src/conductor/registry/version_resolver.py b/src/conductor/registry/version_resolver.py new file mode 100644 index 0000000..8a3da3d --- /dev/null +++ b/src/conductor/registry/version_resolver.py @@ -0,0 +1,85 @@ +"""Resolve workflow refs to git refs and immutable SHAs.""" + +from __future__ import annotations + +from conductor.registry.config import RegistryEntry, RegistryType +from conductor.registry.errors import RegistryError +from conductor.registry.github import ( + get_default_branch, + list_tags, + parse_github_source, + resolve_ref_to_sha, +) + + +def resolve_ref(entry: RegistryEntry, requested: str | None) -> str: + """Resolve a requested ref (or "latest") to a concrete git ref name. + + For path registries, raises RegistryError if ``requested`` is non-empty + (path registries do not support refs). + + For github registries: + * If ``requested`` is None or "latest", returns the newest tag + (semver-sorted when possible). If no tags exist, returns the default + branch name. + * Otherwise returns ``requested`` verbatim — this allows pinning to any + tag, branch, or commit SHA. + """ + if entry.type == RegistryType.path: + if requested is not None and requested != "": + raise RegistryError( + "Path registries do not support refs", + suggestion=( + "Path registries always read from the source directory; " + "remove '#' from your reference." + ), + ) + return "" + + if requested is None or requested.lower() == "latest": + owner, repo = parse_github_source(entry.source) + tags = list_tags(owner, repo) + if tags: + return _sort_tags(tags)[0] + return get_default_branch(owner, repo) + + return requested + + +def materialize_to_sha(entry: RegistryEntry, ref: str) -> str: + """Resolve a git ref to a full immutable commit SHA. + + Used as the cache key so mutable branch refs are always re-resolved to + the current commit before fetching. For path registries, raises (callers + should not invoke this for path). + """ + if entry.type == RegistryType.path: + raise RegistryError("materialize_to_sha not applicable to path registries") + owner, repo = parse_github_source(entry.source) + return resolve_ref_to_sha(owner, repo, ref) + + +def _sort_tags(tags: list[str]) -> list[str]: + """Sort tags newest-first, preferring semver order for parseable tags. + + Tags that parse as PEP 440 / semver (after stripping a leading ``v``) are + placed first in descending version order. Unparseable tags follow in + their original input order (which is GitHub's newest-commit-first). + """ + try: + from packaging.version import InvalidVersion, Version + except ImportError: + # packaging not available — fall back to input order. + return list(tags) + + parseable: list[tuple[Version, str]] = [] + unparseable: list[str] = [] + for tag in tags: + candidate = tag[1:] if tag.startswith(("v", "V")) else tag + try: + parseable.append((Version(candidate), tag)) + except InvalidVersion: + unparseable.append(tag) + + parseable.sort(key=lambda pair: pair[0], reverse=True) + return [tag for _, tag in parseable] + unparseable diff --git a/tests/test_cli/test_registry_commands.py b/tests/test_cli/test_registry_commands.py index 712cf67..2defa33 100644 --- a/tests/test_cli/test_registry_commands.py +++ b/tests/test_cli/test_registry_commands.py @@ -97,22 +97,26 @@ def test_list_workflows(self) -> None: mock_index = RegistryIndex( workflows={ - "qa-bot": WorkflowInfo( - description="QA helper", path="qa/bot.yaml", versions=["1.0", "1.1"] - ), - "summarizer": WorkflowInfo( - description="Summarize docs", path="summarizer.yaml", versions=["2.0"] - ), + "qa-bot": WorkflowInfo(description="QA helper", path="qa/bot.yaml"), + "summarizer": WorkflowInfo(description="Summarize docs", path="summarizer.yaml"), } ) - with patch("conductor.cli.registry.load_index", return_value=mock_index): + with ( + patch("conductor.cli.registry.load_index", return_value=mock_index), + patch( + "conductor.cli.registry.list_tags", + return_value=["v2.0.0", "v1.5.0", "v1.4.0", "v1.3.0", "v1.2.0", "v1.1.0"], + ), + ): result = runner.invoke(app, ["registry", "list", "team"]) assert result.exit_code == 0 assert "qa-bot" in result.output assert "summarizer" in result.output - assert "1.0" in result.output + assert "Latest tags:" in result.output + assert "v2.0.0" in result.output + assert "..." in result.output # >5 tags → truncation indicator def test_list_workflows_unknown_registry(self) -> None: result = runner.invoke(app, ["registry", "list", "nope"]) @@ -182,13 +186,14 @@ def test_show_registry(self) -> None: mock_index = RegistryIndex( workflows={ - "qa-bot": WorkflowInfo( - description="QA helper", path="qa/bot.yaml", versions=["1.0", "1.1"] - ), + "qa-bot": WorkflowInfo(description="QA helper", path="qa/bot.yaml"), } ) - with patch("conductor.cli.registry.load_index", return_value=mock_index): + with ( + patch("conductor.cli.registry.load_index", return_value=mock_index), + patch("conductor.cli.registry.list_tags", return_value=["v1.0.0"]), + ): result = runner.invoke(app, ["registry", "show", "team"]) assert result.exit_code == 0 @@ -196,6 +201,8 @@ def test_show_registry(self) -> None: assert "acme/workflows" in result.output assert "qa-bot" in result.output assert "QA helper" in result.output + assert "Latest tags:" in result.output + assert "v1.0.0" in result.output assert "conductor show" in result.output def test_show_unknown_registry(self) -> None: diff --git a/tests/test_registry/test_cache.py b/tests/test_registry/test_cache.py index bca491c..5889e3f 100644 --- a/tests/test_registry/test_cache.py +++ b/tests/test_registry/test_cache.py @@ -17,6 +17,13 @@ from conductor.registry.config import RegistryEntry, RegistryType from conductor.registry.errors import RegistryError +# A canned 40-char hex SHA used throughout these tests. +_FAKE_SHA = "a" * 40 +_FAKE_SHA2 = "b" * 40 +_SHA_DIR = _FAKE_SHA[:12] +_SHA_DIR2 = _FAKE_SHA2[:12] + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -51,7 +58,6 @@ def _create_path_registry(tmp_path: Path) -> Path: qa-bot: description: "Simple Q&A" path: workflows/qa-bot.yaml - versions: ["1.0.0", "2.0.0"] """), encoding="utf-8", ) @@ -63,10 +69,9 @@ def _create_path_registry(tmp_path: Path) -> Path: class _FakeWorkflowInfo: """Minimal stand-in for WorkflowInfo returned by the index module.""" - def __init__(self, *, description: str, path: str, versions: list[str]) -> None: + def __init__(self, *, description: str, path: str) -> None: self.description = description self.path = path - self.versions = versions class _FakeIndex: @@ -76,6 +81,22 @@ def __init__(self, workflows: dict[str, _FakeWorkflowInfo]) -> None: self.workflows = workflows +def _make_index() -> _FakeIndex: + return _FakeIndex( + workflows={ + "qa-bot": _FakeWorkflowInfo( + description="Simple Q&A", + path="workflows/qa-bot.yaml", + ), + } + ) + + +def _write_workflow_file(dest_dir: Path) -> None: + """Helper: simulate _fetch_github writing the workflow file.""" + (dest_dir / "qa-bot.yaml").write_bytes(b"name: qa-bot\nagents: []\n") + + # --------------------------------------------------------------------------- # get_cache_base # --------------------------------------------------------------------------- @@ -104,30 +125,44 @@ def test_returns_none_when_not_cached( self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: _setup_conductor_home(tmp_path, monkeypatch) - result = get_cached_workflow_path("myregistry", "qa-bot", "1.0.0") + result = get_cached_workflow_path("myregistry", "qa-bot", _FAKE_SHA) assert result is None def test_returns_path_when_cached( self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: home = _setup_conductor_home(tmp_path, monkeypatch) - version_dir = home / "cache" / "registries" / "myregistry" / "qa-bot" / "1.0.0" - version_dir.mkdir(parents=True) - wf_file = version_dir / "qa-bot.yaml" + sha_dir = home / "cache" / "registries" / "myregistry" / "qa-bot" / _SHA_DIR + sha_dir.mkdir(parents=True) + wf_file = sha_dir / "qa-bot.yaml" wf_file.write_text("name: qa-bot\n", encoding="utf-8") - result = get_cached_workflow_path("myregistry", "qa-bot", "1.0.0") + result = get_cached_workflow_path("myregistry", "qa-bot", _FAKE_SHA) assert result is not None assert result == wf_file + def test_uses_first_12_chars_of_sha( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Cache directory name is sha[:12]; full SHA is accepted for lookup.""" + home = _setup_conductor_home(tmp_path, monkeypatch) + sha = "0123456789abcdef" * 2 + "01234567" # 40 chars + sha_dir = home / "cache" / "registries" / "myregistry" / "qa-bot" / sha[:12] + sha_dir.mkdir(parents=True) + (sha_dir / "qa-bot.yaml").write_text("name: qa-bot\n", encoding="utf-8") + + result = get_cached_workflow_path("myregistry", "qa-bot", sha) + assert result is not None + assert sha[:12] in str(result) + def test_returns_none_for_empty_dir( self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: home = _setup_conductor_home(tmp_path, monkeypatch) - version_dir = home / "cache" / "registries" / "myregistry" / "qa-bot" / "1.0.0" - version_dir.mkdir(parents=True) + sha_dir = home / "cache" / "registries" / "myregistry" / "qa-bot" / _SHA_DIR + sha_dir.mkdir(parents=True) # directory exists but has no YAML files - result = get_cached_workflow_path("myregistry", "qa-bot", "1.0.0") + result = get_cached_workflow_path("myregistry", "qa-bot", _FAKE_SHA) assert result is None @@ -137,17 +172,6 @@ def test_returns_none_for_empty_dir( class TestFetchWorkflowPath: - def _make_index(self) -> _FakeIndex: - return _FakeIndex( - workflows={ - "qa-bot": _FakeWorkflowInfo( - description="Simple Q&A", - path="workflows/qa-bot.yaml", - versions=["1.0.0", "2.0.0"], - ), - } - ) - @patch("conductor.registry.cache.load_index") def test_returns_source_path_directly( self, mock_load_index: object, tmp_path: Path, monkeypatch: pytest.MonkeyPatch @@ -155,10 +179,10 @@ def test_returns_source_path_directly( """Path registries return the source file directly (no caching).""" _setup_conductor_home(tmp_path, monkeypatch) registry_dir = _create_path_registry(tmp_path) - mock_load_index.return_value = self._make_index() # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] entry = RegistryEntry(type=RegistryType.path, source=str(registry_dir)) - result = fetch_workflow("local", entry, "qa-bot", version="1.0.0") + result = fetch_workflow("local", entry, "qa-bot") assert result.exists() assert result.name == "qa-bot.yaml" @@ -166,22 +190,30 @@ def test_returns_source_path_directly( assert str(result).startswith(str(registry_dir)) @patch("conductor.registry.cache.load_index") - def test_version_is_ignored( + def test_no_ref_returns_source( self, mock_load_index: object, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - """Path registries ignore the version — same file regardless.""" + """Path registries with ref=None succeed and return the source file.""" _setup_conductor_home(tmp_path, monkeypatch) registry_dir = _create_path_registry(tmp_path) - mock_load_index.return_value = self._make_index() # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] entry = RegistryEntry(type=RegistryType.path, source=str(registry_dir)) + result = fetch_workflow("local", entry, "qa-bot", ref=None) + assert result.exists() - v1 = fetch_workflow("local", entry, "qa-bot", version="1.0.0") - v2 = fetch_workflow("local", entry, "qa-bot", version="2.0.0") - latest = fetch_workflow("local", entry, "qa-bot", version=None) + @patch("conductor.registry.cache.load_index") + def test_path_registry_with_ref_raises( + self, mock_load_index: object, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Path registries reject any non-empty ref.""" + _setup_conductor_home(tmp_path, monkeypatch) + registry_dir = _create_path_registry(tmp_path) + mock_load_index.return_value = _make_index() # type: ignore[union-attr] - # All return the same source file - assert v1 == v2 == latest + entry = RegistryEntry(type=RegistryType.path, source=str(registry_dir)) + with pytest.raises(RegistryError, match="Path registries do not support refs"): + fetch_workflow("local", entry, "qa-bot", ref="v1.0.0") @patch("conductor.registry.cache.load_index") def test_edits_reflected_immediately( @@ -190,16 +222,15 @@ def test_edits_reflected_immediately( """Changes to the source file are visible without cache refresh.""" _setup_conductor_home(tmp_path, monkeypatch) registry_dir = _create_path_registry(tmp_path) - mock_load_index.return_value = self._make_index() # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] entry = RegistryEntry(type=RegistryType.path, source=str(registry_dir)) - result = fetch_workflow("local", entry, "qa-bot", version="1.0.0") + result = fetch_workflow("local", entry, "qa-bot") original = result.read_text() result.write_text(original + "\n# edited") - # Re-fetch returns the same path with the edit visible - result2 = fetch_workflow("local", entry, "qa-bot", version="1.0.0") + result2 = fetch_workflow("local", entry, "qa-bot") assert "# edited" in result2.read_text() @patch("conductor.registry.cache.load_index") @@ -211,21 +242,21 @@ def test_missing_workflow_raises( entry = RegistryEntry(type=RegistryType.path, source=str(tmp_path)) with pytest.raises(RegistryError, match="not found"): - fetch_workflow("local", entry, "nonexistent", version="1.0.0") + fetch_workflow("local", entry, "nonexistent") @patch("conductor.registry.cache.load_index") def test_missing_source_file_raises( self, mock_load_index: object, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: _setup_conductor_home(tmp_path, monkeypatch) - mock_load_index.return_value = self._make_index() # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] empty_registry = tmp_path / "empty-registry" empty_registry.mkdir() entry = RegistryEntry(type=RegistryType.path, source=str(empty_registry)) with pytest.raises(RegistryError, match="not found"): - fetch_workflow("local", entry, "qa-bot", version="1.0.0") + fetch_workflow("local", entry, "qa-bot") # --------------------------------------------------------------------------- @@ -234,85 +265,259 @@ def test_missing_source_file_raises( class TestFetchWorkflowGitHub: - def _make_index(self) -> _FakeIndex: - return _FakeIndex( - workflows={ - "qa-bot": _FakeWorkflowInfo( - description="Simple Q&A", - path="workflows/qa-bot.yaml", - versions=["1.0.0"], - ), - } - ) - - @patch("conductor.registry.cache.fetch_file") - @patch("conductor.registry.cache.list_directory") - @patch("conductor.registry.cache.parse_github_source", return_value=("myorg", "workflows")) + @patch("conductor.registry.cache._fetch_github") @patch("conductor.registry.cache.load_index") + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") def test_fetches_from_github( self, + mock_resolve_ref: object, + mock_materialize: object, mock_load_index: object, - mock_parse: object, - mock_list_dir: object, - mock_fetch_file: object, + mock_fetch_github: object, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - _setup_conductor_home(tmp_path, monkeypatch) - mock_load_index.return_value = self._make_index() # type: ignore[union-attr] - mock_list_dir.return_value = [ # type: ignore[union-attr] - "qa-bot.yaml", - "prompt.txt", - ] + """Happy path: ref → SHA → cache miss → fetch → file present.""" + home = _setup_conductor_home(tmp_path, monkeypatch) + mock_resolve_ref.return_value = "v1.0.0" # type: ignore[union-attr] + mock_materialize.return_value = _FAKE_SHA # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] + mock_fetch_github.side_effect = ( # type: ignore[union-attr] + lambda entry, path, sha, dest_dir: _write_workflow_file(dest_dir) + ) - def fake_fetch(owner: str, repo: str, path: str, *, ref: str) -> bytes: - if path.endswith("qa-bot.yaml"): - return b"name: qa-bot\nagents: []\n" - return b"You are a helpful assistant.\n" + entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") + result = fetch_workflow("official", entry, "qa-bot", ref="v1.0.0") - mock_fetch_file.side_effect = fake_fetch # type: ignore[union-attr] + assert result.exists() + assert result.name == "qa-bot.yaml" + # Cache directory uses sha[:12] + assert _SHA_DIR in str(result) + expected_dir = home / "cache" / "registries" / "official" / "qa-bot" / _SHA_DIR + assert result.parent == expected_dir + + # load_index should be pinned to the SHA + mock_load_index.assert_called_once() # type: ignore[union-attr] + call_kwargs = mock_load_index.call_args.kwargs # type: ignore[union-attr] + assert call_kwargs.get("ref") == _FAKE_SHA + + # _fetch_github called with the SHA (not the ref name) + mock_fetch_github.assert_called_once() # type: ignore[union-attr] + args = mock_fetch_github.call_args.args # type: ignore[union-attr] + assert args[2] == _FAKE_SHA # sha positional arg + + @patch("conductor.registry.cache._fetch_github") + @patch("conductor.registry.cache.load_index") + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") + def test_cache_hit_skips_fetch( + self, + mock_resolve_ref: object, + mock_materialize: object, + mock_load_index: object, + mock_fetch_github: object, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """When cache directory already exists for the SHA, skip the fetch.""" + home = _setup_conductor_home(tmp_path, monkeypatch) + mock_resolve_ref.return_value = "v1.0.0" # type: ignore[union-attr] + mock_materialize.return_value = _FAKE_SHA # type: ignore[union-attr] + + # Pre-populate the cache at the resolved SHA dir. + sha_dir = home / "cache" / "registries" / "official" / "qa-bot" / _SHA_DIR + sha_dir.mkdir(parents=True) + (sha_dir / "qa-bot.yaml").write_text("name: qa-bot\n", encoding="utf-8") entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") - result = fetch_workflow("official", entry, "qa-bot", version="1.0.0") + result = fetch_workflow("official", entry, "qa-bot", ref="v1.0.0") assert result.exists() - assert result.name == "qa-bot.yaml" - assert "1.0.0" in str(result) + assert result.parent == sha_dir + # Fetch and index load were not invoked — pure cache hit. + mock_fetch_github.assert_not_called() # type: ignore[union-attr] + mock_load_index.assert_not_called() # type: ignore[union-attr] + + @patch("conductor.registry.cache._fetch_github") + @patch("conductor.registry.cache.load_index") + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") + def test_atomic_write_on_failure( + self, + mock_resolve_ref: object, + mock_materialize: object, + mock_load_index: object, + mock_fetch_github: object, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """If fetch fails mid-write, final dir is not created and tmp dir is cleaned up.""" + home = _setup_conductor_home(tmp_path, monkeypatch) + mock_resolve_ref.return_value = "v1.0.0" # type: ignore[union-attr] + mock_materialize.return_value = _FAKE_SHA # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] - # Sibling should be cached - sibling = result.parent / "prompt.txt" - assert sibling.exists() + def boom(entry: object, path: str, sha: str, dest_dir: Path) -> None: + # Simulate partial write before failure + (dest_dir / "partial.yaml").write_bytes(b"oops") + raise RuntimeError("network blew up") - mock_parse.assert_called_once_with("myorg/workflows") # type: ignore[union-attr] + mock_fetch_github.side_effect = boom # type: ignore[union-attr] - @patch("conductor.registry.cache.fetch_file") - @patch("conductor.registry.cache.list_directory") - @patch("conductor.registry.cache.parse_github_source", return_value=("myorg", "workflows")) + entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") + with pytest.raises(RuntimeError, match="network blew up"): + fetch_workflow("official", entry, "qa-bot", ref="v1.0.0") + + # Final dir was never created. + final_dir = home / "cache" / "registries" / "official" / "qa-bot" / _SHA_DIR + assert not final_dir.exists() + + # Workflow parent exists (mkdir runs before fetch) but contains no + # leftover .tmp-* directories. + parent = home / "cache" / "registries" / "official" / "qa-bot" + assert parent.exists() + leftovers = [p for p in parent.iterdir() if p.name.startswith(".tmp-")] + assert leftovers == [] + + @patch("conductor.registry.cache._fetch_github") + @patch("conductor.registry.cache.load_index") + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") + def test_branch_ref_re_resolution( + self, + mock_resolve_ref: object, + mock_materialize: object, + mock_load_index: object, + mock_fetch_github: object, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Calling fetch twice with the same branch ref re-resolves the SHA each time. + + When the underlying branch advances between calls (materialize_to_sha + returns different SHAs), the second fetch must populate a *new* cache + dir rather than reuse the old one. + """ + home = _setup_conductor_home(tmp_path, monkeypatch) + mock_resolve_ref.return_value = "main" # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] + mock_fetch_github.side_effect = ( # type: ignore[union-attr] + lambda entry, path, sha, dest_dir: _write_workflow_file(dest_dir) + ) + + # Branch advances between calls. + mock_materialize.side_effect = [_FAKE_SHA, _FAKE_SHA2] # type: ignore[union-attr] + + entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") + + first = fetch_workflow("official", entry, "qa-bot", ref="main") + second = fetch_workflow("official", entry, "qa-bot", ref="main") + + # Each call resolved to a different SHA → different cache dirs. + assert first != second + assert _SHA_DIR in str(first) + assert _SHA_DIR2 in str(second) + + first_dir = home / "cache" / "registries" / "official" / "qa-bot" / _SHA_DIR + second_dir = home / "cache" / "registries" / "official" / "qa-bot" / _SHA_DIR2 + assert first_dir.exists() + assert second_dir.exists() + + # Both fetches actually executed (no spurious cache reuse). + assert mock_fetch_github.call_count == 2 # type: ignore[union-attr] + assert mock_materialize.call_count == 2 # type: ignore[union-attr] + + @patch("conductor.registry.cache._fetch_github") @patch("conductor.registry.cache.load_index") - def test_cached_github_not_refetched( + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") + def test_race_on_rename_uses_existing_cache( self, + mock_resolve_ref: object, + mock_materialize: object, mock_load_index: object, - mock_parse: object, - mock_list_dir: object, - mock_fetch_file: object, + mock_fetch_github: object, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: + """If another process wins the race, our tmp dir is cleaned up and the + cached path is returned successfully.""" home = _setup_conductor_home(tmp_path, monkeypatch) - mock_load_index.return_value = self._make_index() # type: ignore[union-attr] + mock_resolve_ref.return_value = "v1.0.0" # type: ignore[union-attr] + mock_materialize.return_value = _FAKE_SHA # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] - # Pre-populate the cache - version_dir = home / "cache" / "registries" / "official" / "qa-bot" / "1.0.0" - version_dir.mkdir(parents=True) - (version_dir / "qa-bot.yaml").write_text("name: qa-bot\n", encoding="utf-8") + parent = home / "cache" / "registries" / "official" / "qa-bot" + final_dir = parent / _SHA_DIR + + def racing_fetch(entry: object, path: str, sha: str, dest_dir: Path) -> None: + # Write our own workflow file into the temp dir. + _write_workflow_file(dest_dir) + # Simulate another process creating the final dir before we rename. + final_dir.mkdir(parents=True, exist_ok=True) + (final_dir / "qa-bot.yaml").write_bytes(b"name: qa-bot\n# from racer\n") + + mock_fetch_github.side_effect = racing_fetch # type: ignore[union-attr] entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") - result = fetch_workflow("official", entry, "qa-bot", version="1.0.0") + result = fetch_workflow("official", entry, "qa-bot", ref="v1.0.0") + # The cached path (populated by the racer) is returned. assert result.exists() - # fetch_file should never have been called — cache hit - mock_fetch_file.assert_not_called() # type: ignore[union-attr] - mock_list_dir.assert_not_called() # type: ignore[union-attr] + assert result.parent == final_dir + assert b"from racer" in result.read_bytes() + + # Our temp dir was cleaned up — no .tmp-* residue. + leftovers = [p for p in parent.iterdir() if p.name.startswith(".tmp-")] + assert leftovers == [] + + @patch("conductor.registry.cache._fetch_github") + @patch("conductor.registry.cache.load_index") + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") + def test_missing_workflow_after_fetch_raises( + self, + mock_resolve_ref: object, + mock_materialize: object, + mock_load_index: object, + mock_fetch_github: object, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """If the index points to a file that wasn't written, raise RegistryError.""" + _setup_conductor_home(tmp_path, monkeypatch) + mock_resolve_ref.return_value = "v1.0.0" # type: ignore[union-attr] + mock_materialize.return_value = _FAKE_SHA # type: ignore[union-attr] + mock_load_index.return_value = _make_index() # type: ignore[union-attr] + # Fetch that does not write the expected workflow file. + mock_fetch_github.side_effect = ( # type: ignore[union-attr] + lambda entry, path, sha, dest_dir: None + ) + + entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") + with pytest.raises(RegistryError, match="not found in cache after fetch"): + fetch_workflow("official", entry, "qa-bot", ref="v1.0.0") + + @patch("conductor.registry.cache.load_index") + @patch("conductor.registry.cache.materialize_to_sha") + @patch("conductor.registry.cache.resolve_ref") + def test_unknown_workflow_raises( + self, + mock_resolve_ref: object, + mock_materialize: object, + mock_load_index: object, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + _setup_conductor_home(tmp_path, monkeypatch) + mock_resolve_ref.return_value = "v1.0.0" # type: ignore[union-attr] + mock_materialize.return_value = _FAKE_SHA # type: ignore[union-attr] + mock_load_index.return_value = _FakeIndex(workflows={}) # type: ignore[union-attr] + + entry = RegistryEntry(type=RegistryType.github, source="myorg/workflows") + with pytest.raises(RegistryError, match="not found"): + fetch_workflow("official", entry, "nope", ref="v1.0.0") # --------------------------------------------------------------------------- @@ -325,9 +530,8 @@ def test_clear_all(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Non home = _setup_conductor_home(tmp_path, monkeypatch) cache_base = home / "cache" / "registries" - # Create two registries in the cache - (cache_base / "reg-a" / "wf" / "1.0").mkdir(parents=True) - (cache_base / "reg-b" / "wf" / "2.0").mkdir(parents=True) + (cache_base / "reg-a" / "wf" / _SHA_DIR).mkdir(parents=True) + (cache_base / "reg-b" / "wf" / _SHA_DIR2).mkdir(parents=True) clear_cache() @@ -337,8 +541,8 @@ def test_clear_specific_registry(self, tmp_path: Path, monkeypatch: pytest.Monke home = _setup_conductor_home(tmp_path, monkeypatch) cache_base = home / "cache" / "registries" - (cache_base / "reg-a" / "wf" / "1.0").mkdir(parents=True) - (cache_base / "reg-b" / "wf" / "2.0").mkdir(parents=True) + (cache_base / "reg-a" / "wf" / _SHA_DIR).mkdir(parents=True) + (cache_base / "reg-b" / "wf" / _SHA_DIR2).mkdir(parents=True) clear_cache(registry_name="reg-a") diff --git a/tests/test_registry/test_github.py b/tests/test_registry/test_github.py index b89d211..a0fd43e 100644 --- a/tests/test_registry/test_github.py +++ b/tests/test_registry/test_github.py @@ -11,9 +11,11 @@ from conductor.registry.github import ( fetch_file, fetch_file_text, + get_default_branch, list_directory, list_tags, parse_github_source, + resolve_ref_to_sha, ) @@ -21,6 +23,7 @@ def _mock_response( status_code: int = 200, content: bytes = b"", json_data: object = None, + links: dict[str, dict[str, str]] | None = None, ) -> MagicMock: """Build a mock httpx.Response.""" resp = MagicMock(spec=httpx.Response) @@ -29,6 +32,7 @@ def _mock_response( resp.content = content if json_data is not None: resp.json.return_value = json_data + resp.links = links or {} return resp @@ -110,6 +114,128 @@ def test_http_error(self, mock_get: MagicMock) -> None: with pytest.raises(RegistryError, match="HTTP error"): list_tags("owner", "repo") + @patch("conductor.registry.github.httpx.get") + def test_pagination_follows_link_header(self, mock_get: MagicMock) -> None: + page1 = _mock_response( + json_data=[{"name": "v3.0"}, {"name": "v2.0"}], + links={"next": {"url": "https://api.github.com/repos/owner/repo/tags?page=2"}}, + ) + page2 = _mock_response(json_data=[{"name": "v1.0"}]) + mock_get.side_effect = [page1, page2] + + result = list_tags("owner", "repo") + + assert result == ["v3.0", "v2.0", "v1.0"] + assert mock_get.call_count == 2 + # Second call should use the next URL from the Link header + assert mock_get.call_args_list[1][0][0] == ( + "https://api.github.com/repos/owner/repo/tags?page=2" + ) + + @patch("conductor.registry.github.httpx.get") + def test_pagination_three_pages(self, mock_get: MagicMock) -> None: + page1 = _mock_response( + json_data=[{"name": "a"}], + links={"next": {"url": "https://api.github.com/p2"}}, + ) + page2 = _mock_response( + json_data=[{"name": "b"}], + links={"next": {"url": "https://api.github.com/p3"}}, + ) + page3 = _mock_response(json_data=[{"name": "c"}]) + mock_get.side_effect = [page1, page2, page3] + + result = list_tags("owner", "repo") + + assert result == ["a", "b", "c"] + assert mock_get.call_count == 3 + + +# --- get_default_branch --- + + +class TestGetDefaultBranch: + @patch("conductor.registry.github.httpx.get") + def test_success(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(json_data={"default_branch": "main"}) + + result = get_default_branch("owner", "repo") + + assert result == "main" + call_args = mock_get.call_args + assert "api.github.com/repos/owner/repo" in call_args[0][0] + + @patch("conductor.registry.github.httpx.get") + def test_returns_master(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(json_data={"default_branch": "master"}) + assert get_default_branch("owner", "repo") == "master" + + @patch("conductor.registry.github.httpx.get") + def test_404_raises_registry_error(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(status_code=404) + + with pytest.raises(RegistryError, match="not found"): + get_default_branch("owner", "missing-repo") + + @patch("conductor.registry.github.httpx.get") + def test_timeout_raises_registry_error(self, mock_get: MagicMock) -> None: + mock_get.side_effect = httpx.TimeoutException("timed out") + + with pytest.raises(RegistryError, match="Timeout"): + get_default_branch("owner", "repo") + + +# --- resolve_ref_to_sha --- + + +class TestResolveRefToSha: + FULL_SHA = "abc1234567890abcdef1234567890abcdef12345" + + @patch("conductor.registry.github.httpx.get") + def test_resolves_branch(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(json_data={"sha": self.FULL_SHA}) + + result = resolve_ref_to_sha("owner", "repo", "main") + + assert result == self.FULL_SHA + call_args = mock_get.call_args + assert "api.github.com/repos/owner/repo/commits/main" in call_args[0][0] + + @patch("conductor.registry.github.httpx.get") + def test_resolves_tag(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(json_data={"sha": self.FULL_SHA}) + + result = resolve_ref_to_sha("owner", "repo", "v1.0.0") + + assert result == self.FULL_SHA + assert "commits/v1.0.0" in mock_get.call_args[0][0] + + @patch("conductor.registry.github.httpx.get") + def test_resolves_short_sha(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(json_data={"sha": self.FULL_SHA}) + + result = resolve_ref_to_sha("owner", "repo", "abc1234") + + assert result == self.FULL_SHA + assert "commits/abc1234" in mock_get.call_args[0][0] + + @patch("conductor.registry.github.httpx.get") + def test_404_raises_registry_error_with_suggestion(self, mock_get: MagicMock) -> None: + mock_get.return_value = _mock_response(status_code=404) + + with pytest.raises(RegistryError, match="not found") as exc_info: + resolve_ref_to_sha("owner", "repo", "nonexistent-branch") + + assert exc_info.value.suggestion is not None + assert "gh auth login" in exc_info.value.suggestion + + @patch("conductor.registry.github.httpx.get") + def test_http_error(self, mock_get: MagicMock) -> None: + mock_get.side_effect = httpx.HTTPError("connection failed") + + with pytest.raises(RegistryError, match="HTTP error"): + resolve_ref_to_sha("owner", "repo", "main") + # --- list_directory --- diff --git a/tests/test_registry/test_index.py b/tests/test_registry/test_index.py index db235a3..a08e295 100644 --- a/tests/test_registry/test_index.py +++ b/tests/test_registry/test_index.py @@ -16,7 +16,6 @@ WorkflowInfo, get_workflow_info, load_index, - resolve_latest, ) # --------------------------------------------------------------------------- @@ -28,12 +27,10 @@ "qa-bot": { "description": "Simple Q&A workflow", "path": "workflows/qa-bot.yaml", - "versions": ["1.0.0", "1.1.0", "2.0.0"], }, "code-review": { "description": "Multi-agent code review", "path": "workflows/code-review.yaml", - "versions": ["0.3.0"], }, } } @@ -81,7 +78,6 @@ def test_load_yaml(self, tmp_path: Path) -> None: assert "code-review" in idx.workflows assert idx.workflows["qa-bot"].description == "Simple Q&A workflow" assert idx.workflows["qa-bot"].path == "workflows/qa-bot.yaml" - assert idx.workflows["qa-bot"].versions == ["1.0.0", "1.1.0", "2.0.0"] def test_load_json_fallback(self, tmp_path: Path) -> None: """index.json is loaded when index.yaml does not exist.""" @@ -89,7 +85,7 @@ def test_load_json_fallback(self, tmp_path: Path) -> None: idx = load_index(_path_entry(str(tmp_path))) assert "qa-bot" in idx.workflows - assert idx.workflows["code-review"].versions == ["0.3.0"] + assert idx.workflows["code-review"].path == "workflows/code-review.yaml" def test_yaml_preferred_over_json(self, tmp_path: Path) -> None: """index.yaml takes priority when both files exist.""" @@ -168,9 +164,17 @@ class TestLoadGitHubIndex: """Tests for loading index from GitHub registries.""" @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="abc123def456") + @patch("conductor.registry.github.get_default_branch", return_value="main") @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) - def test_fetch_yaml_main(self, _mock_parse: MagicMock, mock_fetch: MagicMock) -> None: - """Fetches index.yaml from main branch.""" + def test_fetch_yaml_default_branch( + self, + _mock_parse: MagicMock, + mock_default_branch: MagicMock, + mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """Without a ref, queries the default branch and resolves to a SHA.""" yaml = YAML(typ="safe") from io import StringIO @@ -183,109 +187,158 @@ def test_fetch_yaml_main(self, _mock_parse: MagicMock, mock_fetch: MagicMock) -> idx = load_index(_github_entry("myorg/myrepo")) assert "qa-bot" in idx.workflows - mock_fetch.assert_called_once_with("myorg", "myrepo", "index.yaml", ref="main") + mock_default_branch.assert_called_once_with("myorg", "myrepo") + mock_resolve.assert_called_once_with("myorg", "myrepo", "main") + mock_fetch.assert_called_once_with("myorg", "myrepo", "index.yaml", ref="abc123def456") @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="deadbeef00000") + @patch("conductor.registry.github.get_default_branch", return_value="trunk") @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) - def test_fallback_to_master(self, _mock_parse: MagicMock, mock_fetch: MagicMock) -> None: - """Falls back to master branch when main raises RegistryError.""" + def test_latest_ref_uses_default_branch( + self, + _mock_parse: MagicMock, + mock_default_branch: MagicMock, + mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """Passing ref='latest' is equivalent to no ref — uses default branch.""" + json_text = json.dumps(_SAMPLE_INDEX) + # yaml fails, json succeeds + mock_fetch.side_effect = [RegistryError("not found"), json_text] + + idx = load_index(_github_entry("myorg/myrepo"), ref="latest") + assert "qa-bot" in idx.workflows + + mock_default_branch.assert_called_once_with("myorg", "myrepo") + mock_resolve.assert_called_once_with("myorg", "myrepo", "trunk") + + @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="tagsha9876543") + @patch("conductor.registry.github.get_default_branch") + @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) + def test_explicit_ref_passes_through( + self, + _mock_parse: MagicMock, + mock_default_branch: MagicMock, + mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """Explicit ref is resolved directly without consulting default branch.""" yaml = YAML(typ="safe") from io import StringIO stream = StringIO() yaml.dump(_SAMPLE_INDEX, stream) - yaml_text = stream.getvalue() - - # main raises error, master succeeds - mock_fetch.side_effect = [ - RegistryError("not found"), - yaml_text, - ] + mock_fetch.return_value = stream.getvalue() - idx = load_index(_github_entry("myorg/myrepo")) + idx = load_index(_github_entry("myorg/myrepo"), ref="v1.2.3") assert "qa-bot" in idx.workflows - assert mock_fetch.call_count == 2 + + # Default branch should NOT be queried for an explicit ref + mock_default_branch.assert_not_called() + mock_resolve.assert_called_once_with("myorg", "myrepo", "v1.2.3") + # Resolved SHA — not the original ref — must be passed to fetch_file_text + mock_fetch.assert_called_once_with("myorg", "myrepo", "index.yaml", ref="tagsha9876543") @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) - def test_fallback_to_json(self, _mock_parse: MagicMock, mock_fetch: MagicMock) -> None: - """Falls back to index.json when index.yaml not found on any branch.""" + def test_fallback_to_json( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """Falls back to index.json when index.yaml is not found at the SHA.""" json_text = json.dumps(_SAMPLE_INDEX) - - # yaml on main → error, yaml on master → error, json on main → success - mock_fetch.side_effect = [ - RegistryError("not found"), - RegistryError("not found"), - json_text, - ] + mock_fetch.side_effect = [RegistryError("not found"), json_text] idx = load_index(_github_entry("myorg/myrepo")) assert "qa-bot" in idx.workflows - assert mock_fetch.call_count == 3 + assert mock_fetch.call_count == 2 + # Both attempts use the resolved SHA + for call in mock_fetch.call_args_list: + assert call.kwargs["ref"] == "sha1234567890" @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) - def test_all_404_raises(self, _mock_parse: MagicMock, mock_fetch: MagicMock) -> None: - """RegistryError when all fetch attempts fail.""" + def test_all_404_raises( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """RegistryError when neither index file is found at the SHA.""" mock_fetch.side_effect = RegistryError("not found") with pytest.raises(RegistryError, match="No index.yaml or index.json"): load_index(_github_entry("myorg/myrepo")) - # yaml(main, master) + json(main, master) = 4 attempts - assert mock_fetch.call_count == 4 + # Only one attempt per filename — no branch fallback any more + assert mock_fetch.call_count == 2 @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) - def test_network_error_raises(self, _mock_parse: MagicMock, mock_fetch: MagicMock) -> None: + def test_network_error_raises( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: """RegistryError on network failure propagates.""" mock_fetch.side_effect = RegistryError("Failed to fetch: connection refused") with pytest.raises(RegistryError, match="No index.yaml or index.json"): load_index(_github_entry("myorg/myrepo")) + @patch("conductor.registry.github.resolve_ref_to_sha") + @patch("conductor.registry.github.get_default_branch") + @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) + def test_resolve_ref_failure_propagates( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + mock_resolve: MagicMock, + ) -> None: + """If ref cannot be resolved to a SHA, the error propagates.""" + mock_resolve.side_effect = RegistryError("ref not found") -# --------------------------------------------------------------------------- -# Tests: resolve_latest -# --------------------------------------------------------------------------- + with pytest.raises(RegistryError, match="ref not found"): + load_index(_github_entry("myorg/myrepo"), ref="nonexistent") -class TestResolveLatest: - """Tests for resolve_latest.""" +# --------------------------------------------------------------------------- +# Tests: legacy schema compatibility +# --------------------------------------------------------------------------- - def test_returns_last_version(self) -> None: - """Returns the last version in the list.""" - idx = RegistryIndex( - workflows={ - "wf": WorkflowInfo(path="w.yaml", versions=["1.0.0", "1.1.0", "2.0.0"]), - } - ) - assert resolve_latest(idx, "wf") == "2.0.0" - def test_single_version(self) -> None: - """Works with a single version.""" - idx = RegistryIndex( - workflows={ - "wf": WorkflowInfo(path="w.yaml", versions=["0.1.0"]), - } - ) - assert resolve_latest(idx, "wf") == "0.1.0" +class TestLegacySchema: + """Tests for back-compat with older index schemas.""" - def test_no_versions_raises(self) -> None: - """RegistryError when the workflow has no versions.""" - idx = RegistryIndex( - workflows={ - "wf": WorkflowInfo(path="w.yaml", versions=[]), + def test_index_with_legacy_versions_field_is_ignored(self) -> None: + """Indexes that still include a legacy 'versions' field parse without error.""" + legacy = { + "workflows": { + "wf": { + "description": "legacy entry", + "path": "workflows/wf.yaml", + "versions": ["1.0.0"], + } } - ) - with pytest.raises(RegistryError, match="no versions"): - resolve_latest(idx, "wf") - - def test_workflow_not_found_raises(self) -> None: - """RegistryError when the workflow doesn't exist.""" - idx = RegistryIndex(workflows={}) - with pytest.raises(RegistryError, match="not found"): - resolve_latest(idx, "nope") + } + idx = RegistryIndex.model_validate(legacy) + assert "wf" in idx.workflows + assert idx.workflows["wf"].path == "workflows/wf.yaml" + assert not hasattr(idx.workflows["wf"], "versions") # --------------------------------------------------------------------------- @@ -298,13 +351,12 @@ class TestGetWorkflowInfo: def test_found(self) -> None: """Returns WorkflowInfo for an existing workflow.""" - info = WorkflowInfo(description="My workflow", path="workflows/my.yaml", versions=["1.0.0"]) + info = WorkflowInfo(description="My workflow", path="workflows/my.yaml") idx = RegistryIndex(workflows={"my-wf": info}) result = get_workflow_info(idx, "my-wf") assert result.description == "My workflow" assert result.path == "workflows/my.yaml" - assert result.versions == ["1.0.0"] def test_not_found_raises(self) -> None: """RegistryError when the workflow is not in the index.""" diff --git a/tests/test_registry/test_integration.py b/tests/test_registry/test_integration.py index 6a3af50..6c3108f 100644 --- a/tests/test_registry/test_integration.py +++ b/tests/test_registry/test_integration.py @@ -19,6 +19,7 @@ load_config, remove_registry, ) +from conductor.registry.errors import RegistryError from conductor.registry.resolver import resolve_ref runner = CliRunner() @@ -48,8 +49,8 @@ def _create_local_registry( Args: root: Parent directory (e.g. tmp_path). workflows: Mapping of workflow-name → dict with keys - ``description``, ``path``, ``versions``, and ``content`` - (the YAML text of the workflow file). + ``description``, ``path``, and ``content`` (the YAML text of the + workflow file). sibling_files: Optional mapping of workflow-name → dict of filename → content for extra files alongside the workflow. @@ -61,21 +62,17 @@ def _create_local_registry( registry_dir = root / "registry" registry_dir.mkdir(parents=True, exist_ok=True) - # Build index.yaml index_data: dict = {"workflows": {}} for name, info in workflows.items(): index_data["workflows"][name] = { "description": info.get("description", ""), "path": info["path"], - "versions": info.get("versions", []), } - # Write the workflow file wf_path = registry_dir / info["path"] wf_path.parent.mkdir(parents=True, exist_ok=True) wf_path.write_text(info["content"]) - # Write sibling files if sibling_files and name in sibling_files: for fname, fcontent in sibling_files[name].items(): (wf_path.parent / fname).write_text(fcontent) @@ -117,25 +114,22 @@ def test_local_registry_end_to_end( "hello": { "description": "A greeting workflow", "path": "hello/workflow.yaml", - "versions": ["1.0.0"], "content": _SIMPLE_WORKFLOW, }, }, ) - # Add registry add_registry("my-reg", str(reg_dir), registry_type=RegistryType.path, set_default=True) - # Resolve with explicit registry and version - ref = resolve_ref("hello@my-reg@1.0.0") + # Path registries don't accept refs — use the bare name with explicit registry. + ref = resolve_ref("hello@my-reg") assert ref.kind == "registry" assert ref.workflow == "hello" assert ref.registry_name == "my-reg" - assert ref.version == "1.0.0" + assert ref.ref is None assert ref.registry_entry is not None - # Fetch - cached_path = fetch_workflow("my-reg", ref.registry_entry, "hello", version="1.0.0") + cached_path = fetch_workflow("my-reg", ref.registry_entry, "hello") assert cached_path.exists() assert cached_path.name == "workflow.yaml" assert "test-workflow" in cached_path.read_text() @@ -158,7 +152,6 @@ def test_resolve_via_default(self, tmp_path: Path, monkeypatch: pytest.MonkeyPat "greeter": { "description": "Greet someone", "path": "greeter.yaml", - "versions": ["0.1.0"], "content": _SIMPLE_WORKFLOW, }, }, @@ -166,52 +159,44 @@ def test_resolve_via_default(self, tmp_path: Path, monkeypatch: pytest.MonkeyPat add_registry("default-reg", str(reg_dir), registry_type=RegistryType.path, set_default=True) - # Resolve without @registry — should use default ref = resolve_ref("greeter") assert ref.kind == "registry" assert ref.registry_name == "default-reg" assert ref.workflow == "greeter" - # Fetch and verify - cached = fetch_workflow("default-reg", ref.registry_entry, "greeter", version="0.1.0") + cached = fetch_workflow("default-reg", ref.registry_entry, "greeter") assert cached.exists() assert "test-workflow" in cached.read_text() # --------------------------------------------------------------------------- -# Latest version resolution +# Path registries reject refs # --------------------------------------------------------------------------- -class TestLatestVersionResolution: - """When no version is specified, the last version in the list is used.""" +class TestPathRegistryRefs: + """Path registries do not support refs and raise on non-empty refs.""" - def test_resolves_latest_version(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + def test_fetch_with_ref_raises(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: _setup_home(tmp_path, monkeypatch) reg_dir = _create_local_registry( tmp_path, { - "multi-ver": { - "description": "Has multiple versions", - "path": "multi-ver.yaml", - "versions": ["1.0.0", "1.1.0", "2.0.0"], + "wf": { + "description": "", + "path": "wf.yaml", "content": _SIMPLE_WORKFLOW, }, }, ) - add_registry("ver-reg", str(reg_dir), registry_type=RegistryType.path, set_default=True) - - # Fetch without version — should resolve to 2.0.0 (last in list) - ref = resolve_ref("multi-ver") + add_registry("p-reg", str(reg_dir), registry_type=RegistryType.path, set_default=True) + ref = resolve_ref("wf") assert ref.registry_entry is not None - cached = fetch_workflow("ver-reg", ref.registry_entry, "multi-ver") - assert cached.exists() - - # Path registries return source path directly (version is ignored) - assert str(cached).startswith(str(reg_dir)) + with pytest.raises(RegistryError, match="Path registries do not support refs"): + fetch_workflow("p-reg", ref.registry_entry, "wf", ref="v1.0.0") # --------------------------------------------------------------------------- @@ -233,7 +218,6 @@ def test_second_fetch_returns_cached( "cached-wf": { "description": "Cached workflow", "path": "cached-wf.yaml", - "versions": ["1.0.0"], "content": _SIMPLE_WORKFLOW, }, }, @@ -243,15 +227,12 @@ def test_second_fetch_returns_cached( ref = resolve_ref("cached-wf") assert ref.registry_entry is not None - # First fetch - path1 = fetch_workflow("cache-reg", ref.registry_entry, "cached-wf", version="1.0.0") - - # Second fetch — should return the same source path - path2 = fetch_workflow("cache-reg", ref.registry_entry, "cached-wf", version="1.0.0") + path1 = fetch_workflow("cache-reg", ref.registry_entry, "cached-wf") + path2 = fetch_workflow("cache-reg", ref.registry_entry, "cached-wf") assert path1 == path2 assert path1.exists() - # Path registries don't use cache — returns source directly + # Path registries don't use cache — returns source directly. assert str(path1).startswith(str(reg_dir)) @@ -261,9 +242,9 @@ def test_second_fetch_returns_cached( class TestSiblingFiles: - """Sibling files in the workflow directory are also cached.""" + """Sibling files in the workflow directory are present alongside the workflow.""" - def test_siblings_copied_to_cache( + def test_siblings_alongside_workflow( self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: _setup_home(tmp_path, monkeypatch) @@ -274,7 +255,6 @@ def test_siblings_copied_to_cache( "with-siblings": { "description": "Has extra files", "path": "with-siblings/workflow.yaml", - "versions": ["1.0.0"], "content": _SIMPLE_WORKFLOW, }, }, @@ -290,10 +270,9 @@ def test_siblings_copied_to_cache( ref = resolve_ref("with-siblings") assert ref.registry_entry is not None - cached = fetch_workflow("sib-reg", ref.registry_entry, "with-siblings", version="1.0.0") + cached = fetch_workflow("sib-reg", ref.registry_entry, "with-siblings") cache_dir = cached.parent - # Siblings should be in the same directory as the workflow assert (cache_dir / "prompt.txt").exists() assert (cache_dir / "prompt.txt").read_text() == "You are a helpful assistant." assert (cache_dir / "schema.json").exists() @@ -314,44 +293,37 @@ def _isolate(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: self._tmp_path = tmp_path def test_full_cli_lifecycle(self) -> None: - # Create a local registry on disk reg_dir = _create_local_registry( self._tmp_path, { "demo": { "description": "Demo workflow", "path": "demo.yaml", - "versions": ["1.0"], "content": _SIMPLE_WORKFLOW, }, }, ) - # Add registry with --default result = runner.invoke( app, ["registry", "add", "test-reg", str(reg_dir), "--type", "path", "--default"] ) assert result.exit_code == 0, result.output assert "added" in result.output - # List registries — should show test-reg result = runner.invoke(app, ["registry", "list"]) assert result.exit_code == 0, result.output assert "test-reg" in result.output - assert "✓" in result.output # default marker + assert "✓" in result.output - # List workflows in test-reg result = runner.invoke(app, ["registry", "list", "test-reg"]) assert result.exit_code == 0, result.output assert "demo" in result.output assert "Demo workflow" in result.output - # Remove registry result = runner.invoke(app, ["registry", "remove", "test-reg"]) assert result.exit_code == 0, result.output assert "removed" in result.output - # List again — should be empty result = runner.invoke(app, ["registry", "list"]) assert result.exit_code == 0, result.output assert "No registries configured" in result.output @@ -364,7 +336,6 @@ def test_add_list_remove_multiple(self) -> None: "wf-a": { "description": "A", "path": "a.yaml", - "versions": ["1.0"], "content": _SIMPLE_WORKFLOW, }, }, @@ -375,7 +346,6 @@ def test_add_list_remove_multiple(self) -> None: "wf-b": { "description": "B", "path": "b.yaml", - "versions": ["2.0"], "content": _SIMPLE_WORKFLOW, }, }, @@ -402,7 +372,6 @@ def test_set_default_and_resolve(self) -> None: "auto": { "description": "Auto-resolved", "path": "auto.yaml", - "versions": ["1.0"], "content": _SIMPLE_WORKFLOW, }, }, @@ -411,11 +380,9 @@ def test_set_default_and_resolve(self) -> None: runner.invoke(app, ["registry", "add", "def-reg", str(reg_dir), "--type", "path"]) runner.invoke(app, ["registry", "set-default", "def-reg"]) - # Verify config config = load_config() assert config.default == "def-reg" - # Resolve bare name (no @) ref = resolve_ref("auto") assert ref.kind == "registry" assert ref.registry_name == "def-reg" @@ -441,7 +408,6 @@ def test_remove_default_clears_it( "wf": { "description": "", "path": "wf.yaml", - "versions": ["1.0"], "content": _SIMPLE_WORKFLOW, }, }, diff --git a/tests/test_registry/test_resolver.py b/tests/test_registry/test_resolver.py index b028868..f176490 100644 --- a/tests/test_registry/test_resolver.py +++ b/tests/test_registry/test_resolver.py @@ -50,7 +50,7 @@ def test_file_ref_fields(self) -> None: assert ref.path == Path("my.yaml") assert ref.workflow is None assert ref.registry_name is None - assert ref.version is None + assert ref.ref is None assert ref.registry_entry is None def test_registry_ref_fields(self) -> None: @@ -59,13 +59,13 @@ def test_registry_ref_fields(self) -> None: kind="registry", workflow="qa-bot", registry_name="team", - version="1.2.3", + ref="v1.2.3", registry_entry=entry, ) assert ref.kind == "registry" assert ref.workflow == "qa-bot" assert ref.registry_name == "team" - assert ref.version == "1.2.3" + assert ref.ref == "v1.2.3" assert ref.registry_entry is entry def test_frozen(self) -> None: @@ -107,7 +107,7 @@ def test_plain_name_not_file(self) -> None: class TestResolveRefFile: - def test_existing_file_on_disk(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + def test_existing_file_on_disk(self, tmp_path: Path) -> None: f = tmp_path / "workflow" f.write_text("content") ref = resolve_ref(str(f)) @@ -115,19 +115,24 @@ def test_existing_file_on_disk(self, tmp_path: Path, monkeypatch: pytest.MonkeyP assert ref.path == Path(str(f)) def test_yaml_extension_nonexistent(self) -> None: - ref = resolve_ref("nonexistent.yaml") + ref = resolve_ref("foo.yaml") assert ref.kind == "file" - assert ref.path == Path("nonexistent.yaml") + assert ref.path == Path("foo.yaml") def test_yml_extension_nonexistent(self) -> None: - ref = resolve_ref("nonexistent.yml") + ref = resolve_ref("foo.yml") assert ref.kind == "file" - assert ref.path == Path("nonexistent.yml") + assert ref.path == Path("foo.yml") - def test_path_with_slash(self) -> None: - ref = resolve_ref("./my-workflow.yaml") + def test_relative_path_with_slash(self) -> None: + ref = resolve_ref("./foo.yml") assert ref.kind == "file" - assert ref.path == Path("./my-workflow.yaml") + assert ref.path == Path("./foo.yml") + + def test_absolute_path(self) -> None: + ref = resolve_ref("/abs/path.yaml") + assert ref.kind == "file" + assert ref.path == Path("/abs/path.yaml") def test_path_with_backslash(self) -> None: ref = resolve_ref("dir\\workflow") @@ -136,12 +141,12 @@ def test_path_with_backslash(self) -> None: # --------------------------------------------------------------------------- -# resolve_ref — registry references +# resolve_ref — registry references (positive cases) # --------------------------------------------------------------------------- class TestResolveRefRegistry: - def test_name_only_uses_default(self, monkeypatch: pytest.MonkeyPatch) -> None: + def test_bare_name_uses_default(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default="team") _patch_config(monkeypatch, config) @@ -149,55 +154,161 @@ def test_name_only_uses_default(self, monkeypatch: pytest.MonkeyPatch) -> None: assert ref.kind == "registry" assert ref.workflow == "qa-bot" assert ref.registry_name == "team" - assert ref.version is None + assert ref.ref is None assert ref.registry_entry == config.registries["team"] - def test_name_at_registry(self, monkeypatch: pytest.MonkeyPatch) -> None: + def test_name_with_tag_ref(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default="team") _patch_config(monkeypatch, config) - ref = resolve_ref("qa-bot@local") + ref = resolve_ref("qa-bot#v1.2.3") assert ref.kind == "registry" assert ref.workflow == "qa-bot" - assert ref.registry_name == "local" - assert ref.version is None - assert ref.registry_entry == config.registries["local"] + assert ref.registry_name == "team" + assert ref.ref == "v1.2.3" - def test_name_at_registry_at_version(self, monkeypatch: pytest.MonkeyPatch) -> None: + def test_name_with_branch_ref(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default="team") _patch_config(monkeypatch, config) - ref = resolve_ref("qa-bot@team@1.2.3") + ref = resolve_ref("qa-bot#main") assert ref.kind == "registry" assert ref.workflow == "qa-bot" assert ref.registry_name == "team" - assert ref.version == "1.2.3" + assert ref.ref == "main" + + def test_name_with_commit_sha_ref(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + ref = resolve_ref("qa-bot#abc1234") + assert ref.ref == "abc1234" - def test_name_at_empty_at_version_uses_default(self, monkeypatch: pytest.MonkeyPatch) -> None: + def test_name_at_registry(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default="team") _patch_config(monkeypatch, config) - ref = resolve_ref("qa-bot@@1.2.3") + ref = resolve_ref("qa-bot@team") + assert ref.kind == "registry" + assert ref.workflow == "qa-bot" + assert ref.registry_name == "team" + assert ref.ref is None + assert ref.registry_entry == config.registries["team"] + + def test_name_at_registry_with_ref(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="local") + _patch_config(monkeypatch, config) + + ref = resolve_ref("qa-bot@team#v1.2.3") assert ref.kind == "registry" assert ref.workflow == "qa-bot" assert ref.registry_name == "team" - assert ref.version == "1.2.3" + assert ref.ref == "v1.2.3" + assert ref.registry_entry == config.registries["team"] + + def test_empty_registry_with_ref_uses_default(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + ref = resolve_ref("qa-bot@#v1") + assert ref.kind == "registry" + assert ref.workflow == "qa-bot" + assert ref.registry_name == "team" + assert ref.ref == "v1" + + def test_registry_entry_populated(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + ref = resolve_ref("qa-bot@team#v2.0.0") + assert ref.registry_entry is not None + assert ref.registry_entry.type == RegistryType.github + assert ref.registry_entry.source == "acme/workflows" + + +# --------------------------------------------------------------------------- +# resolve_ref — registry references (negative cases) +# --------------------------------------------------------------------------- + + +class TestResolveRefRegistryErrors: + def test_empty_ref_after_hash(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="Ref cannot be empty"): + resolve_ref("qa-bot#") + + def test_double_hash(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="at most one '#'"): + resolve_ref("qa-bot##v1") + + def test_multiple_hashes(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="at most one '#'"): + resolve_ref("qa-bot#v1#v2") - def test_missing_default_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + def test_double_at(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="at most one '@'"): + resolve_ref("qa-bot@@v1") + + def test_multiple_ats(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="at most one '@'"): + resolve_ref("qa-bot@a@b") + + def test_empty_workflow_with_registry_and_ref(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="Workflow name is required"): + resolve_ref("@team#v1") + + def test_empty_workflow_with_ref_only(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="Workflow name is required"): + resolve_ref("#v1") + + def test_empty_workflow_with_registry_only(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + with pytest.raises(RegistryError, match="Workflow name is required"): + resolve_ref("@team") + + def test_just_hash(self, monkeypatch: pytest.MonkeyPatch) -> None: + config = _make_config(default="team") + _patch_config(monkeypatch, config) + + # "#" splits into ["", ""], so empty-ref check fires first. + with pytest.raises(RegistryError, match="Ref cannot be empty"): + resolve_ref("#") + + def test_no_default_registry_configured(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default=None) _patch_config(monkeypatch, config) with pytest.raises(RegistryError, match="No default registry configured"): resolve_ref("qa-bot") - def test_missing_default_with_empty_registry_raises( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_no_default_with_empty_registry_segment(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default=None) _patch_config(monkeypatch, config) with pytest.raises(RegistryError, match="No default registry configured"): - resolve_ref("qa-bot@@1.0.0") + resolve_ref("qa-bot@#v1.0.0") def test_unknown_registry_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default="team") @@ -206,11 +317,9 @@ def test_unknown_registry_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: with pytest.raises(RegistryError, match="Registry 'nope' not found"): resolve_ref("qa-bot@nope") - def test_registry_entry_populated(self, monkeypatch: pytest.MonkeyPatch) -> None: + def test_unknown_registry_with_ref_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: config = _make_config(default="team") _patch_config(monkeypatch, config) - ref = resolve_ref("qa-bot@team@2.0.0") - assert ref.registry_entry is not None - assert ref.registry_entry.type == RegistryType.github - assert ref.registry_entry.source == "acme/workflows" + with pytest.raises(RegistryError, match="Registry 'nope' not found"): + resolve_ref("qa-bot@nope#v1.0.0") diff --git a/tests/test_registry/test_version_resolver.py b/tests/test_registry/test_version_resolver.py new file mode 100644 index 0000000..cb93427 --- /dev/null +++ b/tests/test_registry/test_version_resolver.py @@ -0,0 +1,139 @@ +"""Tests for conductor.registry.version_resolver.""" + +from __future__ import annotations + +import pytest + +from conductor.registry import version_resolver +from conductor.registry.config import RegistryEntry, RegistryType +from conductor.registry.errors import RegistryError +from conductor.registry.version_resolver import ( + _sort_tags, + materialize_to_sha, + resolve_ref, +) + + +def _path_entry() -> RegistryEntry: + return RegistryEntry(type=RegistryType.path, source="/tmp/workflows") + + +def _gh_entry() -> RegistryEntry: + return RegistryEntry(type=RegistryType.github, source="acme/widgets") + + +# --------------------------------------------------------------------------- +# resolve_ref — path registries +# --------------------------------------------------------------------------- + + +def test_resolve_ref_path_none_returns_empty() -> None: + assert resolve_ref(_path_entry(), None) == "" + + +def test_resolve_ref_path_empty_string_returns_empty() -> None: + assert resolve_ref(_path_entry(), "") == "" + + +def test_resolve_ref_path_with_ref_raises() -> None: + with pytest.raises(RegistryError) as exc_info: + resolve_ref(_path_entry(), "v1") + assert "Path registries do not support refs" in str(exc_info.value) + assert exc_info.value.suggestion is not None + assert "remove '#'" in exc_info.value.suggestion + + +# --------------------------------------------------------------------------- +# resolve_ref — github registries +# --------------------------------------------------------------------------- + + +def test_resolve_ref_github_none_picks_newest_tag(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + version_resolver, "list_tags", lambda owner, repo: ["v1.0.0", "v2.0.0", "v1.1.0"] + ) + monkeypatch.setattr( + version_resolver, + "get_default_branch", + lambda owner, repo: pytest.fail("should not be called"), + ) + assert resolve_ref(_gh_entry(), None) == "v2.0.0" + + +def test_resolve_ref_github_latest_same_as_none(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + version_resolver, "list_tags", lambda owner, repo: ["v1.0.0", "v2.0.0", "v1.1.0"] + ) + assert resolve_ref(_gh_entry(), "latest") == "v2.0.0" + assert resolve_ref(_gh_entry(), "LATEST") == "v2.0.0" + + +def test_resolve_ref_github_no_tags_returns_default_branch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(version_resolver, "list_tags", lambda owner, repo: []) + monkeypatch.setattr(version_resolver, "get_default_branch", lambda owner, repo: "main") + assert resolve_ref(_gh_entry(), None) == "main" + + +def test_resolve_ref_github_branch_returned_verbatim(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + version_resolver, + "list_tags", + lambda owner, repo: pytest.fail("should not be called"), + ) + assert resolve_ref(_gh_entry(), "main") == "main" + + +def test_resolve_ref_github_tag_returned_verbatim(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + version_resolver, + "list_tags", + lambda owner, repo: pytest.fail("should not be called"), + ) + assert resolve_ref(_gh_entry(), "v1.0.0") == "v1.0.0" + + +# --------------------------------------------------------------------------- +# _sort_tags +# --------------------------------------------------------------------------- + + +def test_sort_tags_v_prefix() -> None: + assert _sort_tags(["v1.0.0", "v2.0.0", "v1.1.0"]) == ["v2.0.0", "v1.1.0", "v1.0.0"] + + +def test_sort_tags_prereleases() -> None: + assert _sort_tags(["1.0.0", "1.0.0-rc1", "0.9.0"]) == ["1.0.0", "1.0.0-rc1", "0.9.0"] + + +def test_sort_tags_mixed_parseable_and_not() -> None: + result = _sort_tags(["v1.0.0", "release-2024", "v2.0.0", "experimental"]) + assert result == ["v2.0.0", "v1.0.0", "release-2024", "experimental"] + + +def test_sort_tags_empty() -> None: + assert _sort_tags([]) == [] + + +# --------------------------------------------------------------------------- +# materialize_to_sha +# --------------------------------------------------------------------------- + + +def test_materialize_to_sha_happy_path(monkeypatch: pytest.MonkeyPatch) -> None: + captured: dict[str, object] = {} + + def fake_resolve(owner: str, repo: str, ref: str) -> str: + captured["args"] = (owner, repo, ref) + return "abc123" * 6 + "abcd" # 40 chars + + monkeypatch.setattr(version_resolver, "resolve_ref_to_sha", fake_resolve) + sha = materialize_to_sha(_gh_entry(), "main") + assert sha == "abc123" * 6 + "abcd" + assert captured["args"] == ("acme", "widgets", "main") + + +def test_materialize_to_sha_path_raises() -> None: + with pytest.raises(RegistryError, match="not applicable to path registries"): + materialize_to_sha(_path_entry(), "main") From 2defc4bf879ff32eaad6fba99ed001076a6a9701 Mon Sep 17 00:00:00 2001 From: Jason Robert Date: Tue, 5 May 2026 20:31:23 -0400 Subject: [PATCH 2/2] fix(registry): apply PR review feedback Address quality issues from comprehensive PR review: - Declare packaging>=21.0 explicitly in pyproject.toml; remove the silent ImportError fallback in version_resolver._sort_tags that could return commit-date-ordered tags as 'latest'. - Promote _sort_tags -> sort_tags (was imported across module boundaries from cli/registry.py). - Add RegistryNotFoundError(RegistryError) subclass; _raise_for_status raises it on HTTP 404. _load_github_index now catches only RegistryNotFoundError in the per-filename loop, so auth, rate-limit, and network errors propagate clearly instead of being collapsed into a misleading 'no index file found' message. - _format_latest_tags now catches only RegistryError and httpx.HTTPError (was bare Exception); surfaces the actual error message in the '(unavailable: ...)' display. - New prune_temp_dirs() helper cleans orphaned .tmp-* directories (created by the atomic-write pattern) on registry update. - Tighten misleading docstrings: list_tags clarifies 'commit-date order' vs semver; fetch_workflow Raises documents silent sibling-fetch failures. 10 new tests covering the new error propagation, tmp-dir cleanup, and surfaced suggestions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyproject.toml | 1 + src/conductor/cli/registry.py | 31 ++++-- src/conductor/registry/cache.py | 43 ++++++++ src/conductor/registry/errors.py | 11 ++ src/conductor/registry/github.py | 10 +- src/conductor/registry/index.py | 6 +- src/conductor/registry/version_resolver.py | 12 +-- tests/test_cli/test_registry_commands.py | 98 ++++++++++++++++++ tests/test_registry/test_cache.py | 68 ++++++++++++ tests/test_registry/test_github.py | 10 +- tests/test_registry/test_index.py | 103 +++++++++++++++++-- tests/test_registry/test_version_resolver.py | 12 +-- uv.lock | 2 + 13 files changed, 368 insertions(+), 39 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2163ea3..3e85e84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "uvicorn>=0.30.0", "websockets>=12.0", "httpx>=0.27.0", + "packaging>=21.0", ] [project.urls] diff --git a/src/conductor/cli/registry.py b/src/conductor/cli/registry.py index 3eae683..4066c04 100644 --- a/src/conductor/cli/registry.py +++ b/src/conductor/cli/registry.py @@ -4,11 +4,12 @@ from typing import Annotated +import httpx import typer from rich.console import Console from rich.table import Table -from conductor.registry.cache import clear_cache +from conductor.registry.cache import clear_cache, prune_temp_dirs from conductor.registry.config import ( RegistryEntry, RegistryType, @@ -21,7 +22,7 @@ from conductor.registry.errors import RegistryError from conductor.registry.github import list_tags, parse_github_source from conductor.registry.index import load_index -from conductor.registry.version_resolver import _sort_tags +from conductor.registry.version_resolver import sort_tags _MAX_DISPLAY_TAGS = 5 @@ -104,8 +105,10 @@ def _format_latest_tags(entry: RegistryEntry) -> str | None: """Return a formatted "Latest tags" string for a registry, or None for path registries. For github registries, fetches tags via the API and returns up to - ``_MAX_DISPLAY_TAGS`` newest tags (semver-sorted). Returns "(unavailable)" - on any fetch failure and "(no tags)" if the repo has none. + ``_MAX_DISPLAY_TAGS`` newest tags (semver-sorted). Returns + ``"(unavailable: )"`` on a fetch failure (surfacing the + underlying ``RegistryError`` message or the HTTP exception class name) + and ``"(no tags)"`` if the repo has none. """ if entry.type != RegistryType.github: return None @@ -113,13 +116,15 @@ def _format_latest_tags(entry: RegistryEntry) -> str | None: try: owner, repo = parse_github_source(entry.source) tags = list_tags(owner, repo) - except Exception: - return "(unavailable)" + except RegistryError as error: + return f"(unavailable: {error})" + except httpx.HTTPError as error: + return f"(unavailable: {type(error).__name__})" if not tags: return "(no tags)" - sorted_tags = _sort_tags(tags) + sorted_tags = sort_tags(tags) display = sorted_tags[:_MAX_DISPLAY_TAGS] suffix = ", ..." if len(sorted_tags) > _MAX_DISPLAY_TAGS else "" return ", ".join(display) + suffix @@ -192,7 +197,8 @@ def set_default( "Re-fetches the latest index from each configured registry and clears " "locally-cached workflow files. Index fetches always bypass GitHub's CDN " "cache (via SHA-based URLs), so this primarily clears cached workflow " - "contents pinned to mutable refs (branches)." + "contents pinned to mutable refs (branches). Also prunes orphaned " + "'.tmp-*' directories left behind by interrupted fetches." ), ) def update( @@ -206,7 +212,8 @@ def update( Re-fetches the latest index from each configured registry and clears locally-cached workflow files. Index fetches always bypass GitHub's CDN cache (via SHA-based URLs), so this primarily clears cached workflow - contents pinned to mutable refs (branches). + contents pinned to mutable refs (branches). Additionally prunes any + orphaned ``.tmp-*`` directories left behind by interrupted fetches. """ try: config = load_config() @@ -218,6 +225,9 @@ def update( suggestion="Run 'conductor registry list' to see available registries.", ) clear_cache(name) + pruned = prune_temp_dirs(name) + if pruned > 0: + output_console.print(f"Pruned {pruned} stale .tmp-* directories.") load_index(config.registries[name]) output_console.print(f"Registry '{name}' updated.") else: @@ -225,6 +235,9 @@ def update( output_console.print("No registries configured.") return clear_cache() + pruned = prune_temp_dirs() + if pruned > 0: + output_console.print(f"Pruned {pruned} stale .tmp-* directories.") for reg_name, entry in config.registries.items(): load_index(entry) output_console.print(f"Registry '{reg_name}' updated.") diff --git a/src/conductor/registry/cache.py b/src/conductor/registry/cache.py index 4ba2420..964c137 100644 --- a/src/conductor/registry/cache.py +++ b/src/conductor/registry/cache.py @@ -118,6 +118,9 @@ def fetch_workflow( Raises: RegistryError: On fetch failure, missing workflow, or I/O errors. + Failures fetching sibling files in the same directory are + silently swallowed (best-effort) — only the workflow file + itself must succeed. """ # Path registries: read directly from source. resolve_ref raises if a # ref was supplied, propagating a clear error to the caller. @@ -262,3 +265,43 @@ def clear_cache(registry_name: str | None = None) -> None: else: if base.exists(): shutil.rmtree(base) + + +def prune_temp_dirs(registry_name: str | None = None) -> int: + """Remove orphaned ``.tmp-*`` directories under the cache. + + The atomic write pattern in :func:`fetch_workflow` creates ``.tmp-XXXX`` + directories alongside each workflow's SHA directory. If a process is + killed mid-write, these orphans never get cleaned up. This helper walks + the cache and removes any directory whose name starts with ``.tmp-``. + + Args: + registry_name: If provided, only that registry's cache is scanned. + Otherwise all registries under the cache base are scanned. + + Returns: + Count of directories successfully removed. + """ + base = get_cache_base() + if not base.is_dir(): + return 0 + + if registry_name is not None: + registry_roots = [base / registry_name] + else: + registry_roots = [p for p in base.iterdir() if p.is_dir()] + + removed = 0 + for reg_root in registry_roots: + if not reg_root.is_dir(): + continue + # Layout: /// + for workflow_dir in reg_root.iterdir(): + if not workflow_dir.is_dir(): + continue + for child in workflow_dir.iterdir(): + if child.is_dir() and child.name.startswith(".tmp-"): + shutil.rmtree(child, ignore_errors=True) + if not child.exists(): + removed += 1 + return removed diff --git a/src/conductor/registry/errors.py b/src/conductor/registry/errors.py index ed3919b..cae9b7c 100644 --- a/src/conductor/registry/errors.py +++ b/src/conductor/registry/errors.py @@ -11,3 +11,14 @@ class RegistryError(ConductorError): Raised for registry not found, config parse errors, and duplicate registry names. """ + + +class RegistryNotFoundError(RegistryError): + """A registry resource (file, ref, etc.) was not found (HTTP 404 or equivalent). + + Subclass of ``RegistryError`` so existing ``except RegistryError`` handlers + keep working, but callers that need to distinguish "not found" from other + failures (auth, rate-limit, network) can catch this specifically. + """ + + pass diff --git a/src/conductor/registry/github.py b/src/conductor/registry/github.py index 5d8b8c4..a682424 100644 --- a/src/conductor/registry/github.py +++ b/src/conductor/registry/github.py @@ -10,7 +10,7 @@ import httpx -from conductor.registry.errors import RegistryError +from conductor.registry.errors import RegistryError, RegistryNotFoundError GITHUB_RAW_BASE = "https://raw.githubusercontent.com" GITHUB_API_BASE = "https://api.github.com" @@ -59,7 +59,7 @@ def _raise_for_status(response: httpx.Response, *, context: str) -> None: return status = response.status_code if status == 404: - raise RegistryError( + raise RegistryNotFoundError( f"{context}: not found (404). Check that the repository exists and the ref is valid.", suggestion="If this is a private repo, ensure 'gh auth login' has been run.", ) @@ -123,7 +123,7 @@ def fetch_file_text(owner: str, repo: str, path: str, ref: str = "main") -> str: def list_tags(owner: str, repo: str) -> list[str]: - """List all git tags for a repository, newest first. + """List all git tags for a repository in GitHub's commit-date order. Uses GET /repos/{owner}/{repo}/tags from the GitHub REST API. Follows ``Link: <...>; rel="next"`` headers to paginate, capping at @@ -134,7 +134,9 @@ def list_tags(owner: str, repo: str) -> list[str]: repo: Repository name. Returns: - List of tag name strings, newest first. + List of tag name strings in GitHub's commit-date order (newest commit + first). For semver ordering of these tags, use + :func:`conductor.registry.version_resolver.sort_tags`. Raises: RegistryError: If the API request fails. diff --git a/src/conductor/registry/index.py b/src/conductor/registry/index.py index e4a3553..20be278 100644 --- a/src/conductor/registry/index.py +++ b/src/conductor/registry/index.py @@ -13,7 +13,7 @@ from ruamel.yaml import YAML, YAMLError from conductor.registry.config import RegistryEntry, RegistryType -from conductor.registry.errors import RegistryError +from conductor.registry.errors import RegistryError, RegistryNotFoundError _INDEX_FILENAMES = ("index.yaml", "index.json") @@ -201,13 +201,13 @@ def _load_github_index(source: str, ref: str | None) -> RegistryIndex: for filename in _INDEX_FILENAMES: try: text = fetch_file_text(owner, repo, filename, ref=sha) - except RegistryError: + except RegistryNotFoundError: continue return _parse_github_response(text, filename, f"{source}/{sha}/{filename}") tried_label = ref if ref is not None else "default branch" - raise RegistryError( + raise RegistryNotFoundError( f"No index.yaml or index.json found in GitHub repo '{source}' " f"at ref '{tried_label}' (resolved to {sha})", suggestion="Ensure the repository contains an index.yaml or index.json at this ref.", diff --git a/src/conductor/registry/version_resolver.py b/src/conductor/registry/version_resolver.py index 8a3da3d..90194c4 100644 --- a/src/conductor/registry/version_resolver.py +++ b/src/conductor/registry/version_resolver.py @@ -2,6 +2,8 @@ from __future__ import annotations +from packaging.version import InvalidVersion, Version + from conductor.registry.config import RegistryEntry, RegistryType from conductor.registry.errors import RegistryError from conductor.registry.github import ( @@ -40,7 +42,7 @@ def resolve_ref(entry: RegistryEntry, requested: str | None) -> str: owner, repo = parse_github_source(entry.source) tags = list_tags(owner, repo) if tags: - return _sort_tags(tags)[0] + return sort_tags(tags)[0] return get_default_branch(owner, repo) return requested @@ -59,19 +61,13 @@ def materialize_to_sha(entry: RegistryEntry, ref: str) -> str: return resolve_ref_to_sha(owner, repo, ref) -def _sort_tags(tags: list[str]) -> list[str]: +def sort_tags(tags: list[str]) -> list[str]: """Sort tags newest-first, preferring semver order for parseable tags. Tags that parse as PEP 440 / semver (after stripping a leading ``v``) are placed first in descending version order. Unparseable tags follow in their original input order (which is GitHub's newest-commit-first). """ - try: - from packaging.version import InvalidVersion, Version - except ImportError: - # packaging not available — fall back to input order. - return list(tags) - parseable: list[tuple[Version, str]] = [] unparseable: list[str] = [] for tag in tags: diff --git a/tests/test_cli/test_registry_commands.py b/tests/test_cli/test_registry_commands.py index 2defa33..aca73f4 100644 --- a/tests/test_cli/test_registry_commands.py +++ b/tests/test_cli/test_registry_commands.py @@ -259,3 +259,101 @@ def test_update_no_registries(self) -> None: result = runner.invoke(app, ["registry", "update"]) assert result.exit_code == 0 assert "No registries configured" in result.output + + +# --------------------------------------------------------------------------- +# update: tmp dir pruning +# --------------------------------------------------------------------------- + + +class TestUpdatePrunesTempDirs: + def test_update_prunes_temp_dirs( + self, tmp_path: pytest.TempPathFactory, monkeypatch: pytest.MonkeyPatch + ) -> None: + from pathlib import Path + + runner.invoke(app, ["registry", "add", "team", "acme/workflows"]) + + # Drop a .tmp-* dir under the cache layout for this registry. + cache_base = Path(str(tmp_path)) / "cache" / "registries" + orphan = cache_base / "team" / "wf" / ".tmp-orphan" + orphan.mkdir(parents=True) + + from conductor.registry.index import RegistryIndex + + # Mock clear_cache as a no-op so the orphan survives until prune_temp_dirs runs. + with ( + patch("conductor.cli.registry.clear_cache"), + patch("conductor.cli.registry.load_index", return_value=RegistryIndex(workflows={})), + ): + result = runner.invoke(app, ["registry", "update", "team"]) + + assert result.exit_code == 0 + assert not orphan.exists() + assert "Pruned 1 stale .tmp-* directories." in result.output + + def test_update_no_prune_message_when_zero( + self, tmp_path: pytest.TempPathFactory, monkeypatch: pytest.MonkeyPatch + ) -> None: + runner.invoke(app, ["registry", "add", "team", "acme/workflows"]) + + from conductor.registry.index import RegistryIndex + + with patch("conductor.cli.registry.load_index", return_value=RegistryIndex(workflows={})): + result = runner.invoke(app, ["registry", "update", "team"]) + + assert result.exit_code == 0 + assert "Pruned" not in result.output + + +# --------------------------------------------------------------------------- +# _format_latest_tags: error surfacing +# --------------------------------------------------------------------------- + + +class TestFormatLatestTagsErrors: + def test_format_latest_tags_surfaces_registry_error_message(self) -> None: + from conductor.registry.errors import RegistryError + from conductor.registry.index import RegistryIndex, WorkflowInfo + + runner.invoke(app, ["registry", "add", "team", "acme/workflows"]) + + mock_index = RegistryIndex( + workflows={"qa-bot": WorkflowInfo(description="QA helper", path="qa/bot.yaml")}, + ) + + with ( + patch("conductor.cli.registry.load_index", return_value=mock_index), + patch( + "conductor.cli.registry.list_tags", + side_effect=RegistryError("rate limit"), + ), + ): + result = runner.invoke(app, ["registry", "list", "team"]) + + assert result.exit_code == 0 + assert "rate limit" in result.output + assert "unavailable" in result.output + + def test_format_latest_tags_surfaces_http_error_class(self) -> None: + import httpx + + from conductor.registry.index import RegistryIndex, WorkflowInfo + + runner.invoke(app, ["registry", "add", "team", "acme/workflows"]) + + mock_index = RegistryIndex( + workflows={"qa-bot": WorkflowInfo(description="QA helper", path="qa/bot.yaml")}, + ) + + with ( + patch("conductor.cli.registry.load_index", return_value=mock_index), + patch( + "conductor.cli.registry.list_tags", + side_effect=httpx.ConnectError("boom"), + ), + ): + result = runner.invoke(app, ["registry", "list", "team"]) + + assert result.exit_code == 0 + assert "ConnectError" in result.output diff --git a/tests/test_registry/test_cache.py b/tests/test_registry/test_cache.py index 5889e3f..68ba955 100644 --- a/tests/test_registry/test_cache.py +++ b/tests/test_registry/test_cache.py @@ -13,6 +13,7 @@ fetch_workflow, get_cache_base, get_cached_workflow_path, + prune_temp_dirs, ) from conductor.registry.config import RegistryEntry, RegistryType from conductor.registry.errors import RegistryError @@ -556,3 +557,70 @@ def test_clear_nonexistent_is_noop( # Should not raise clear_cache(registry_name="does-not-exist") clear_cache() + + +# --------------------------------------------------------------------------- +# prune_temp_dirs +# --------------------------------------------------------------------------- + + +class TestPruneTempDirs: + def test_prune_temp_dirs_removes_orphans( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + home = _setup_conductor_home(tmp_path, monkeypatch) + cache_base = home / "cache" / "registries" + + # Real SHA dir alongside an orphan .tmp-* dir. + real = cache_base / "reg-a" / "wf" / _SHA_DIR + orphan = cache_base / "reg-a" / "wf" / ".tmp-abc" + real.mkdir(parents=True) + orphan.mkdir(parents=True) + (orphan / "junk.yaml").write_text("x", encoding="utf-8") + + removed = prune_temp_dirs() + + assert removed == 1 + assert not orphan.exists() + assert real.exists() + + def test_prune_temp_dirs_scoped_to_registry( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + home = _setup_conductor_home(tmp_path, monkeypatch) + cache_base = home / "cache" / "registries" + + orphan_a = cache_base / "reg-a" / "wf" / ".tmp-aaa" + orphan_b = cache_base / "reg-b" / "wf" / ".tmp-bbb" + orphan_a.mkdir(parents=True) + orphan_b.mkdir(parents=True) + + removed = prune_temp_dirs("reg-a") + + assert removed == 1 + assert not orphan_a.exists() + assert orphan_b.exists() + + def test_prune_temp_dirs_returns_count( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + home = _setup_conductor_home(tmp_path, monkeypatch) + cache_base = home / "cache" / "registries" + + for n in range(3): + (cache_base / "reg-a" / "wf" / f".tmp-{n}").mkdir(parents=True) + (cache_base / "reg-b" / "other-wf" / ".tmp-xyz").mkdir(parents=True) + # Real dirs - should not be counted. + (cache_base / "reg-a" / "wf" / _SHA_DIR).mkdir(parents=True) + + removed = prune_temp_dirs() + + assert removed == 4 + + def test_prune_temp_dirs_missing_base_returns_zero( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + _setup_conductor_home(tmp_path, monkeypatch) + # No cache dir at all + assert prune_temp_dirs() == 0 + assert prune_temp_dirs("reg-a") == 0 diff --git a/tests/test_registry/test_github.py b/tests/test_registry/test_github.py index a0fd43e..bdd3195 100644 --- a/tests/test_registry/test_github.py +++ b/tests/test_registry/test_github.py @@ -7,7 +7,7 @@ import httpx import pytest -from conductor.registry.errors import RegistryError +from conductor.registry.errors import RegistryError, RegistryNotFoundError from conductor.registry.github import ( fetch_file, fetch_file_text, @@ -57,6 +57,14 @@ def test_404_raises_registry_error(self, mock_get: MagicMock) -> None: with pytest.raises(RegistryError, match="not found"): fetch_file("owner", "repo", "missing.txt") + @patch("conductor.registry.github.httpx.get") + def test_404_raises_registry_not_found_error(self, mock_get: MagicMock) -> None: + """404 specifically raises RegistryNotFoundError (subclass of RegistryError).""" + mock_get.return_value = _mock_response(status_code=404) + + with pytest.raises(RegistryNotFoundError, match="not found"): + fetch_file("owner", "repo", "missing.txt") + @patch("conductor.registry.github.httpx.get") def test_timeout_raises_registry_error(self, mock_get: MagicMock) -> None: mock_get.side_effect = httpx.TimeoutException("timed out") diff --git a/tests/test_registry/test_index.py b/tests/test_registry/test_index.py index a08e295..4bb9326 100644 --- a/tests/test_registry/test_index.py +++ b/tests/test_registry/test_index.py @@ -10,7 +10,7 @@ from ruamel.yaml import YAML from conductor.registry.config import RegistryEntry, RegistryType -from conductor.registry.errors import RegistryError +from conductor.registry.errors import RegistryError, RegistryNotFoundError from conductor.registry.index import ( RegistryIndex, WorkflowInfo, @@ -205,7 +205,7 @@ def test_latest_ref_uses_default_branch( """Passing ref='latest' is equivalent to no ref — uses default branch.""" json_text = json.dumps(_SAMPLE_INDEX) # yaml fails, json succeeds - mock_fetch.side_effect = [RegistryError("not found"), json_text] + mock_fetch.side_effect = [RegistryNotFoundError("not found"), json_text] idx = load_index(_github_entry("myorg/myrepo"), ref="latest") assert "qa-bot" in idx.workflows @@ -254,7 +254,7 @@ def test_fallback_to_json( ) -> None: """Falls back to index.json when index.yaml is not found at the SHA.""" json_text = json.dumps(_SAMPLE_INDEX) - mock_fetch.side_effect = [RegistryError("not found"), json_text] + mock_fetch.side_effect = [RegistryNotFoundError("not found"), json_text] idx = load_index(_github_entry("myorg/myrepo")) assert "qa-bot" in idx.workflows @@ -274,10 +274,10 @@ def test_all_404_raises( _mock_resolve: MagicMock, mock_fetch: MagicMock, ) -> None: - """RegistryError when neither index file is found at the SHA.""" - mock_fetch.side_effect = RegistryError("not found") + """RegistryNotFoundError when neither index file is found at the SHA.""" + mock_fetch.side_effect = RegistryNotFoundError("not found") - with pytest.raises(RegistryError, match="No index.yaml or index.json"): + with pytest.raises(RegistryNotFoundError, match="No index.yaml or index.json"): load_index(_github_entry("myorg/myrepo")) # Only one attempt per filename — no branch fallback any more @@ -294,10 +294,10 @@ def test_network_error_raises( _mock_resolve: MagicMock, mock_fetch: MagicMock, ) -> None: - """RegistryError on network failure propagates.""" + """RegistryError on network failure propagates immediately, not collapsed to 'no index'.""" mock_fetch.side_effect = RegistryError("Failed to fetch: connection refused") - with pytest.raises(RegistryError, match="No index.yaml or index.json"): + with pytest.raises(RegistryError, match="connection refused"): load_index(_github_entry("myorg/myrepo")) @patch("conductor.registry.github.resolve_ref_to_sha") @@ -315,6 +315,93 @@ def test_resolve_ref_failure_propagates( with pytest.raises(RegistryError, match="ref not found"): load_index(_github_entry("myorg/myrepo"), ref="nonexistent") + @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") + @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) + def test_load_index_propagates_auth_error( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """A non-404 RegistryError (e.g. HTTP 403 auth/rate-limit) on the first + filename propagates immediately rather than being swallowed and reported + as 'no index file found'.""" + mock_fetch.side_effect = RegistryError( + "Fetching myorg/myrepo/index.yaml at ref sha1234567890: HTTP 403. " + "GitHub API rate limit may be exceeded. Try again later." + ) + + with pytest.raises(RegistryError, match="HTTP 403") as exc_info: + load_index(_github_entry("myorg/myrepo")) + + assert "No index.yaml or index.json" not in str(exc_info.value) + # Only the first filename should be tried — the loop must NOT fall through + assert mock_fetch.call_count == 1 + + @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") + @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) + def test_load_index_propagates_rate_limit( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """An HTTP 429 rate-limit error propagates immediately.""" + mock_fetch.side_effect = RegistryError( + "Fetching myorg/myrepo/index.yaml at ref sha1234567890: HTTP 429. " + "GitHub API rate limit may be exceeded. Try again later." + ) + + with pytest.raises(RegistryError, match="HTTP 429") as exc_info: + load_index(_github_entry("myorg/myrepo")) + + assert "No index.yaml or index.json" not in str(exc_info.value) + assert mock_fetch.call_count == 1 + + @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") + @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) + def test_load_index_falls_back_yaml_to_json_on_404( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """A 404 (RegistryNotFoundError) on index.yaml falls through to index.json.""" + json_text = json.dumps(_SAMPLE_INDEX) + mock_fetch.side_effect = [RegistryNotFoundError("not found"), json_text] + + idx = load_index(_github_entry("myorg/myrepo")) + assert "qa-bot" in idx.workflows + assert mock_fetch.call_count == 2 + + @patch("conductor.registry.github.fetch_file_text") + @patch("conductor.registry.github.resolve_ref_to_sha", return_value="sha1234567890") + @patch("conductor.registry.github.get_default_branch", return_value="main") + @patch("conductor.registry.github.parse_github_source", return_value=("myorg", "myrepo")) + def test_load_index_404_for_both_raises_not_found( + self, + _mock_parse: MagicMock, + _mock_default_branch: MagicMock, + _mock_resolve: MagicMock, + mock_fetch: MagicMock, + ) -> None: + """When both yaml and json 404, the final error is a RegistryNotFoundError.""" + mock_fetch.side_effect = RegistryNotFoundError("not found") + + with pytest.raises(RegistryNotFoundError, match="No index.yaml or index.json"): + load_index(_github_entry("myorg/myrepo")) + + assert mock_fetch.call_count == 2 + # --------------------------------------------------------------------------- # Tests: legacy schema compatibility diff --git a/tests/test_registry/test_version_resolver.py b/tests/test_registry/test_version_resolver.py index cb93427..5525e15 100644 --- a/tests/test_registry/test_version_resolver.py +++ b/tests/test_registry/test_version_resolver.py @@ -8,9 +8,9 @@ from conductor.registry.config import RegistryEntry, RegistryType from conductor.registry.errors import RegistryError from conductor.registry.version_resolver import ( - _sort_tags, materialize_to_sha, resolve_ref, + sort_tags, ) @@ -95,25 +95,25 @@ def test_resolve_ref_github_tag_returned_verbatim(monkeypatch: pytest.MonkeyPatc # --------------------------------------------------------------------------- -# _sort_tags +# sort_tags # --------------------------------------------------------------------------- def test_sort_tags_v_prefix() -> None: - assert _sort_tags(["v1.0.0", "v2.0.0", "v1.1.0"]) == ["v2.0.0", "v1.1.0", "v1.0.0"] + assert sort_tags(["v1.0.0", "v2.0.0", "v1.1.0"]) == ["v2.0.0", "v1.1.0", "v1.0.0"] def test_sort_tags_prereleases() -> None: - assert _sort_tags(["1.0.0", "1.0.0-rc1", "0.9.0"]) == ["1.0.0", "1.0.0-rc1", "0.9.0"] + assert sort_tags(["1.0.0", "1.0.0-rc1", "0.9.0"]) == ["1.0.0", "1.0.0-rc1", "0.9.0"] def test_sort_tags_mixed_parseable_and_not() -> None: - result = _sort_tags(["v1.0.0", "release-2024", "v2.0.0", "experimental"]) + result = sort_tags(["v1.0.0", "release-2024", "v2.0.0", "experimental"]) assert result == ["v2.0.0", "v1.0.0", "release-2024", "experimental"] def test_sort_tags_empty() -> None: - assert _sort_tags([]) == [] + assert sort_tags([]) == [] # --------------------------------------------------------------------------- diff --git a/uv.lock b/uv.lock index 7e785a3..d005d4d 100644 --- a/uv.lock +++ b/uv.lock @@ -159,6 +159,7 @@ dependencies = [ { name = "httpx" }, { name = "jinja2" }, { name = "mcp" }, + { name = "packaging" }, { name = "pydantic" }, { name = "rich" }, { name = "ruamel-yaml" }, @@ -185,6 +186,7 @@ requires-dist = [ { name = "httpx", specifier = ">=0.27.0" }, { name = "jinja2", specifier = ">=3.1.0" }, { name = "mcp", specifier = ">=1.0.0" }, + { name = "packaging", specifier = ">=21.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "rich", specifier = ">=13.0.0" }, { name = "ruamel-yaml", specifier = ">=0.18.0" },