From 87e6f19447368d7e65c17f0d7d831d146ade741e Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 13 Feb 2026 13:21:25 -0600
Subject: [PATCH 01/14] check-nightly-success: limit to 1 branch, other
 improvements

---
 .gitignore                                    |   1 +
 .pre-commit-config.yaml                       |  20 +-
 check_nightly_success/README.md               |  67 ++++
 .../check-nightly-success/action.yaml         |  32 +-
 .../check-nightly-success/check.py            | 299 ++++++++++++------
 check_nightly_success/dispatch/action.yml     |  12 +-
 pyproject.toml                                |  18 +-
 telemetry-impls/summarize/bump_time.py        |   6 +-
 telemetry-impls/summarize/send_trace.py       |   4 +-
 9 files changed, 325 insertions(+), 134 deletions(-)
 create mode 100644 check_nightly_success/README.md

diff --git a/.gitignore b/.gitignore
index 1377554e..9c92e4ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 *.swp
+.venv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 66c32140..23d91c8a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,12 +18,24 @@ repos:
     hooks:
       - id: actionlint-docker
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.10
+    rev: v0.15.1
     hooks:
-      - id: ruff
-        args: ["--fix"]
+      - id: ruff-check
+        args: ["--fix", "--config=pyproject.toml"]
       - id: ruff-format
+        args: ["--config=pyproject.toml"]
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v1.2.1
+    rev: v1.4.3
     hooks:
       - id: verify-copyright
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: 'v1.13.0'
+    hooks:
+      - id: mypy
+        additional_dependencies:
+          - "requests>=2.32.4"
+          - "types-requests>=2.32.4"
+        args:
+          - "--config-file=pyproject.toml"
+          - "check_nightly_success/"
+        pass_filenames: false
diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
new file mode 100644
index 00000000..56b1ff6c
--- /dev/null
+++ b/check_nightly_success/README.md
@@ -0,0 +1,67 @@
+# check_nightly_success
+
+Action that can be used to fail CI if a given GitHub Actions workflow hasn't had at least 1 recent succcessful run.
+
+Add it to any GitHub Actions workflow configuration like this:
+
+```yaml
+  check-nightly-ci:
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      id-token: write
+    env:
+      GH_TOKEN: ${{ github.token }}
+    steps:
+      - name: Check if nightly CI is passing
+        uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
+        with:
+          repo: ${{ github.repository }}
+          target_branch: ${{ github.base_ref }}
+          workflow_id: 'test.yaml'
+          max_days_without_success: 7
+```
+
+## Testing
+
+The code for the actions is implemented in Python.
+Try the following locally to test it.
+
+```shell
+python -m venv .venv/
+source .venv/bin/activate
+python -m pip install requests
+
+GH_TOKEN=$(gh auth token) \
+python ./check-nightly-success/check.py \
+  --repo 'rapidsai/cudf' \
+  --branch 'main' \
+  --workflow-id 'test.yaml' \
+  --max-days-without-success 7
+```
+
+If this succeeds, you'll see a `0` exit code and output text similar to the following:
+
+> Found 4 successful runs of workflow 'test.yaml' on branch 'main' in the previous 7 days.
+The most recent successful run of workflow 'test.yaml' on branch 'main' was '2026-02-13 13:40:18+00:00', which is within the last 7 days. View logs:
+ - https://github.com/rapidsai/cudf/actions/runs/21978265026
+
+ To see it fail, try on a repo that doesn't have that workflow.
+
+```shell
+GH_TOKEN=$(gh auth token) \
+python ./check-nightly-success/check.py \
+  --repo 'rapidsai/build-planniing' \
+  --branch 'main' \
+  --workflow-id 'test.yaml' \
+  --max-days-without-success 7
+```
+
+That'll return exit code `1` and output similar to this:
+
+> RuntimeError: Failed to fetch https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs after 5 attempts with the following errors:
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
diff --git a/check_nightly_success/check-nightly-success/action.yaml b/check_nightly_success/check-nightly-success/action.yaml
index 2ad52971..803ac5dd 100644
--- a/check_nightly_success/check-nightly-success/action.yaml
+++ b/check_nightly_success/check-nightly-success/action.yaml
@@ -1,24 +1,26 @@
 name: check-nightly-success
 description: Check if the nightlies have succeeded recently.
+
+# these inputs should all be 'required: true' without defaults... this action should only
+# ever be invoked by check_nightly_success/dispatch
 inputs:
   repo:
-    description: "The repository to check"
+    description: "Repository name with owner (e.g. 'rapidsai/cudf' not 'cudf')"
     required: true
     type: string
-  repo_owner:
-    description: "The org that owns the repo (default: rapidsai)"
-    required: false
-    default: "rapidsai"
+  target_branch:
+    description: |
+      Branch the pull request this is running on targets.
+      Only statuses of nightly runs on that branch will be considered.
+    required: true
     type: string
   workflow_id:
     description: "The workflow whose runs to check"
-    required: false
-    default: "test.yaml"
+    required: true
     type: string
   max_days_without_success:
     description: "The number of consecutive days that may go by without a successful CI run"
-    required: false
-    default: 7
+    required: true
     type: integer
 
 runs:
@@ -28,9 +30,15 @@ runs:
       shell: bash
       env:
         REPO: ${{ inputs.repo }}
-        REPO_OWNER: ${{ inputs.repo_owner }}
+        TARGET_BRANCH: ${{ inputs.target_branch }}
         WORKFLOW_ID: ${{ inputs.workflow_id }}
         MAX_DAYS_WITHOUT_SUCCESS: ${{ inputs.max_days_without_success }}
       run: |
-          python -m pip install requests
-          python shared-actions/check_nightly_success/check-nightly-success/check.py ${REPO} --repo-owner ${REPO_OWNER} --workflow-id ${WORKFLOW_ID} --max-days-without-success ${MAX_DAYS_WITHOUT_SUCCESS}
+          python -m pip install \
+            --prefer-binary \
+            'requests>=2.32.4'
+          python shared-actions/check_nightly_success/check-nightly-success/check.py \
+            --repo ${REPO} \
+            --branch ${TARGET_BRANCH} \
+            --workflow-id ${WORKFLOW_ID} \
+            --max-days-without-success ${MAX_DAYS_WITHOUT_SUCCESS}
diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index e36c2298..4d375d80 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -1,148 +1,237 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 
 """Check whether a GHA workflow has run successfully in the last N days."""
-# ruff: noqa: INP001
 
 import argparse
 import os
-import re
 import sys
-from collections import defaultdict
-from datetime import datetime
+import time
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
 
 import requests
 
 # Constants
-GITHUB_TOKEN = os.environ["RAPIDS_GH_TOKEN"]
-GOOD_STATUSES = {"success"}
+GITHUB_TOKEN = os.environ["GH_TOKEN"]
+
+
+@dataclass
+class _WorkflowRun:
+    """GitHub workflow run data, filtered to only the fields this action cares about."""
+
+    html_url: str
+    run_started_at: datetime
+
+
+@dataclass
+class _ResponseData:
+    data: list[_WorkflowRun]
+    next_url: str | None
+
+
+# We are producing Unix return codes so success/failure is inverted from the
+# expected Python boolean values.
+@dataclass
+class ExitCode:
+    FAILURE = 1
+    SUCCESS = 0
+
+
+class GitHubClient:
+    def __init__(
+        self,
+        *,
+        max_retries: int,
+        retry_backoff_seconds: float,
+        request_timeout_seconds: float,
+    ) -> None:
+        self.max_retries = max_retries
+        self.request_timeout_seconds = request_timeout_seconds
+        self.retry_backoff_seconds = retry_backoff_seconds
+
+    def __get_next_page(
+        self,
+        *,
+        url: str,
+        headers: dict[str, str],
+        params: dict[str, int | str],
+    ) -> _ResponseData:
+        """Get one page of results"""
+        exceptions = []
+        for _ in range(self.max_retries):
+            try:
+                response = requests.get(
+                    url,
+                    headers=headers,
+                    params=params,
+                    timeout=self.request_timeout_seconds,
+                )
+                response.raise_for_status()
+                break
+            except requests.RequestException as e:
+                exceptions.append(str(e))
+                # simple backoff, without jitter, exponential backoff, etc., should be fine for this
+                time.sleep(self.retry_backoff_seconds)
+        else:
+            # this needs to be done outside the f-string to avoid:
+            # "Cannot use an escape sequence (backslash) in f-strings on Python 3.10 (syntax was added in Python 3.12)"
+            exception_text = "\n\t".join(exceptions)
+            msg = (
+                f"Failed to fetch {url} after {self.max_retries} attempts with the following "
+                f"errors: \n\t{exception_text}"
+            )
+            raise RuntimeError(msg)
+
+        # if we get here, the request succeeded...return its data, in the format we want
+        return _ResponseData(
+            data=[
+                _WorkflowRun(
+                    html_url=workflow_run["html_url"],
+                    run_started_at=datetime.fromisoformat(workflow_run["run_started_at"]),
+                )
+                for workflow_run in response.json()["workflow_runs"]
+            ],
+            next_url=response.links.get("next", None),
+        )
+
+    def get_all_runs(
+        self,
+        *,
+        url: str,
+        headers: dict[str, str],
+        params: dict[str, int | str],
+    ) -> list[_WorkflowRun]:
+        """
+        Paginate over requests to api.github.com/repos/{repo_owner}/{repo}/actions/workflows/{workflow_id}/runs
+        and return all the results.
+        """
+        data = []
+        while True:
+            page = self.__get_next_page(
+                url=url,
+                headers=headers,
+                params=params,
+            )
+            data.extend(page.data)
+            if page.next_url is None:
+                break
+            # just use the pagination URL, not the original query one
+            url = page.next_url
+            params = None  # type: ignore[assignment]
+        return data
 
 
 def main(
+    *,
     repo: str,
-    repo_owner: str,
+    target_branch: str,
     workflow_id: str,
     max_days_without_success: int,
-    num_attempts: int = 5,
-) -> bool:
+    num_attempts: int,
+    request_timeout_seconds: float,
+    retry_backoff_seconds: float,
+) -> int:
     """Check whether a GHA workflow has run successfully in the last N days.
 
     Returns True if the workflow has not run successfully in the last N days, False
     otherwise (values are inverted for use as a return code).
     """
-    headers = {"Authorization": f"token {GITHUB_TOKEN}"}
-    url = f"https://api.github.com/repos/{repo_owner}/{repo}/actions/workflows/{workflow_id}/runs"
-    exceptions = []
-    for _ in range(num_attempts):
-        try:
-            response = requests.get(url, headers=headers, timeout=10)
-            response.raise_for_status()
-            break
-        except requests.RequestException as e:
-            exceptions.append(e)
-    else:
-        sep = "\n\t"
-        msg = (
-            f"Failed to fetch {url} after {num_attempts} attempts with the following "
-            f"errors: {sep}{'{sep}'.join(exceptions)}"
-        )
-        raise RuntimeError(msg)
-
-    runs = response.json()["workflow_runs"]
-    tz = datetime.fromisoformat(runs[0]["run_started_at"]).tzinfo
-    now = datetime.now(tz=tz)
-
-    latest_success = {}
-    workflow_active_for_max_days = {}
-    # Rather frustratingly, the workflow runs returned from the GitHub API can
-    # have alternating ordering of `head_branch`
-    # e.g.
-    #   run[0]['head_branch'] == "release/25.02"
-    #   run[1]['head_branch'] == "release/25.04"
-    #   run[2]['head_branch'] == "release/25.02"
+    # Timezones in GitHub API responses are guaranteed to be in UTC time.
     #
-    # In this situation, the behavior of `itertools.groupby` (previously used
-    # here) is to only group _consecutive_ runs, so the results of the
-    # subsequent branch match (i.e.  the second group of `release/25.02` runs)
-    # will overwrite the results of the first one, potentially overwriting a
-    # previous success. The snippet below unifies the groups so it's more like a
-    # SQL groupby and there is no chance of overwriting.
-    branch_dict = defaultdict(list)
-    for run in runs:
-        branch_dict[run["head_branch"]].append(run)
-
-    for branch, branch_runs in branch_dict.items():
-        # Only consider 'main' and RAPIDS release branches, which have versions like
-        # '25.10' (RAPIDS) or '0.46' (ucxx).
-        if not re.match(r"(main|release/[0-9]{1,2}\.[0-9]{2})", branch):
-            continue
-
-        latest_success[branch] = None
-        runs = sorted(branch_runs, key=lambda r: r["run_started_at"], reverse=True)
-        for run in runs:
-            days_since_run = (now - datetime.fromisoformat(run["run_started_at"])).days
-            if days_since_run > max_days_without_success:
-                break
-            if run["conclusion"] in GOOD_STATUSES:
-                latest_success[branch] = run
-                break
+    # ref: https://docs.github.com/en/rest/using-the-rest-api/timezones-and-the-rest-api?apiVersion=2022-11-28
+    #
+    # This code is a little imprecise (doing the math in 'days' means that moving from 11:59p to 12:01a buys you
+    # another 23 hours and 58 minutes of time), but that difference shouldn't be important for this action.
+    #
+    # Dealing with day-precision date-times makes filtering in the GitHub API simpler, see
+    # https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates
+    #
+    oldest_date_to_pull = datetime.now(timezone.utc) - timedelta(days=max_days_without_success)
 
-        workflow_active_for_max_days[branch] = False
-        if len(runs) > 0:
-            run = runs[-1]
-            days_since_run = (now - datetime.fromisoformat(run["run_started_at"])).days
-            if days_since_run > max_days_without_success:
-                workflow_active_for_max_days[branch] = True
-
-    latest_branch = max(latest_success)
-    has_latest_success = latest_success[latest_branch] is not None
-
-    # We are producing Unix return codes so success/failure is inverted from the
-    # expected Python boolean values.
-    if has_latest_success:
-        print(  # noqa: T201
-            f"The most recent successful run of the {workflow_id} workflow on "
-            f"{latest_branch} was "
-            f"{datetime.fromisoformat(latest_success[latest_branch]['run_started_at'])}, "
-            f"which is within the last {max_days_without_success} days. View logs:"
-            f"\n  - {latest_success[latest_branch]['html_url']}"
-        )
-        return 0
-    elif not workflow_active_for_max_days[latest_branch]:
-        print(  # noqa: T201
-            f"The oldest run of the {workflow_id} workflow on {latest_branch} was less "
-            f"than {max_days_without_success} days ago. This exempts the workflow from "
-            "check-nightly-success because the workflow has not been running for very long."
+    # get all the matching runs
+    client = GitHubClient(
+        max_retries=num_attempts,
+        request_timeout_seconds=request_timeout_seconds,
+        retry_backoff_seconds=retry_backoff_seconds,
+    )
+    all_runs = client.get_all_runs(
+        url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
+        headers={"Authorization": f"token {GITHUB_TOKEN}"},
+        params={
+            # only care about runs from one branch (usually, the PR target branch)
+            "branch": target_branch,
+            # only care about successful runs
+            "status": "success",
+            # pull as many results per page as possible
+            "per_page": 100,
+            # filter to recent-enough runs
+            "created": f">={oldest_date_to_pull.strftime('%Y-%m-%d')}",
+        },
+    )
+
+    # if there were 0 successful runs, immediately exit with 1... by definition that means there
+    # hasn't been a success in the last `max_days_without_success` days
+    if not all_runs:
+        print(
+            f"There were 0 successful runs of workflow '{workflow_id}' on branch '{target_branch}' in the last "
+            f"{max_days_without_success} days."
         )
-        return 0
+        return ExitCode.FAILURE
+
+    # If we get here, then there must have been at least 1 successful run in the allowed time. We need to find its
+    # precise time for a log message, but otherwise the code an exit with success.
+    print(
+        f"Found {len(all_runs)} successful runs of workflow '{workflow_id}' on branch '{target_branch}' "
+        f"in the previous {max_days_without_success} days."
+    )
 
-    print(  # noqa: T201
-        f"{latest_branch} has no successful runs of {workflow_id} in the last {max_days_without_success} days"
+    # sort runs by "run_started_at"
+    most_recent_successful_run = max(all_runs, key=lambda r: r.run_started_at)
+    print(
+        f"The most recent successful run of workflow '{workflow_id}' on branch '{target_branch}' "
+        f"was '{most_recent_successful_run.run_started_at}', which is within the "
+        f"last {max_days_without_success} days. "
+        f"View logs:\n - {most_recent_successful_run.html_url}"
     )
-    return 1
+    return ExitCode.SUCCESS
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("repo", type=str, help="Repository name")
     parser.add_argument(
-        "--repo-owner",
-        default="rapidsai",
-        help="Repository organization/owner",
+        "--repo",
+        type=str,
+        required=True,
+        help="Repository name with owner (e.g. 'rapidsai/cudf' not 'cudf')",
+    )
+    parser.add_argument(
+        "--branch",
+        type=str,
+        required=True,
+        help="Branch to check for recent workflow runs.",
+    )
+    parser.add_argument(
+        "--workflow-id",
+        type=str,
+        required=True,
+        help="Workflow ID (e.g. 'test.yaml')",
     )
-    parser.add_argument("--workflow-id", default="test.yaml", help="Workflow ID")
     parser.add_argument(
         "--max-days-without-success",
         type=int,
-        default=7,
+        required=True,
         help="Maximum number of days without a successful run",
     )
     args = parser.parse_args()
 
     sys.exit(
         main(
-            args.repo,
-            args.repo_owner,
-            args.workflow_id,
-            args.max_days_without_success,
+            repo=args.repo,
+            target_branch=args.branch,
+            workflow_id=args.workflow_id,
+            max_days_without_success=args.max_days_without_success,
+            num_attempts=5,
+            request_timeout_seconds=10,
+            retry_backoff_seconds=0.5,
         ),
     )
diff --git a/check_nightly_success/dispatch/action.yml b/check_nightly_success/dispatch/action.yml
index 5a4223ac..4656b345 100644
--- a/check_nightly_success/dispatch/action.yml
+++ b/check_nightly_success/dispatch/action.yml
@@ -2,13 +2,15 @@ name: dispatch-check-nightly-success
 description: Clone shared-actions and dispatch to the check-nightly-success action.
 inputs:
   repo:
-    description: "The repository to check"
+    description: "Repository name with owner (e.g. 'rapidsai/cudf' not 'cudf')"
     required: true
     type: string
-  repo_owner:
-    description: "The org that owns the repo (default: rapidsai)"
+  target_branch:
+    description: |
+      Branch the pull request this is running on targets.
+      Only statuses of nightly runs on that branch will be considered.
     required: false
-    default: "rapidsai"
+    default: "main"
     type: string
   workflow_id:
     description: "The workflow whose runs to check"
@@ -34,6 +36,6 @@ runs:
       uses: ./shared-actions/check_nightly_success/check-nightly-success
       with:
         repo: ${{ inputs.repo }}
-        repo_owner: ${{ inputs.repo_owner }}
+        target_branch: ${{ inputs.target_branch }}
         workflow_id: ${{ inputs.workflow_id }}
         max_days_without_success: ${{ inputs.max_days_without_success }}
diff --git a/pyproject.toml b/pyproject.toml
index 5304ac2f..23c81e9f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 
 [tool.ruff]
 line-length = 120
@@ -6,6 +6,10 @@ target-version = "py310"
 
 [tool.ruff.lint]
 select = [
+    # flake8-bugbear
+    "B",
+    # flake8-datetimez
+    "DTZ",
     # pycodestyle
     "E",
     # pyflakes
@@ -14,10 +18,18 @@ select = [
     "I",
     # numpy
     "NPY",
+    # perflint
+    "PERF",
+    # flake8-pie
+    "PIE",
+    # flake8-return
+    "RET",
+    # ruff-exclusive checks
+    "RUF",
     # pyupgrade
     "UP",
-    # flake8-bugbear
-    "B"
+    # flake8-bandit
+    "S",
 ]
 ignore = [
     # Incompatible with D211
diff --git a/telemetry-impls/summarize/bump_time.py b/telemetry-impls/summarize/bump_time.py
index 906e686f..2d58c371 100644
--- a/telemetry-impls/summarize/bump_time.py
+++ b/telemetry-impls/summarize/bump_time.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 
 # This script is meant to act on an 'all_jobs.json' file that comes from
 # the summarize job when debug info is enabled. Bumping the time makes
@@ -14,12 +14,12 @@
 
 
 def _parse_time(x: str) -> int:
-    return int(datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ").timestamp() * 1e9)
+    return int(datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ").timestamp() * 1e9)  # noqa: DTZ007
 
 
 start_time = _parse_time(jobs[0]["created_at"])
 needed_time = _parse_time(jobs[-3]["completed_at"]) - _parse_time(jobs[0]["created_at"])
-new_start_time = datetime.datetime.utcnow() - datetime.timedelta(minutes=60)
+new_start_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=60)
 
 for idx, job in enumerate(jobs):
     if job["created_at"]:
diff --git a/telemetry-impls/summarize/send_trace.py b/telemetry-impls/summarize/send_trace.py
index df2fcc61..4d597e7c 100644
--- a/telemetry-impls/summarize/send_trace.py
+++ b/telemetry-impls/summarize/send_trace.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+# Copyright (c) 2019-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -247,7 +247,7 @@ def get_sccache_stats(artifact_folder: Path) -> dict[str, str]:
     return parsed_stats
 
 
-def process_job_blob(  # noqa: PLR0913
+def process_job_blob(
     trace_id: int,
     job: Mapping[str, Any],
     env_vars: Mapping[str, str],

From c0a20e29c878339a3bdac7f2ab2b0f1077088df1 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 13 Feb 2026 13:59:00 -0600
Subject: [PATCH 02/14] make page size configurable

---
 check_nightly_success/README.md                  | 12 ++++++++++++
 .../check-nightly-success/check.py               | 16 ++++++++++++++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index 56b1ff6c..ba992237 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -65,3 +65,15 @@ That'll return exit code `1` and output similar to this:
         404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
         404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
         404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
+
+Set `--request-page-size` to `1` to test that pagination is working.
+
+```shell
+GH_TOKEN=$(gh auth token) \
+python ./check-nightly-success/check.py \
+  --repo 'rapidsai/cudf' \
+  --branch 'main' \
+  --workflow-id 'test.yaml' \
+  --max-days-without-success 30 \
+  --request-page-size 5
+```
diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index 4d375d80..8ddb5fa6 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -91,7 +91,7 @@ def __get_next_page(
                 )
                 for workflow_run in response.json()["workflow_runs"]
             ],
-            next_url=response.links.get("next", None),
+            next_url=response.links.get("next", dict()).get("url", None),
         )
 
     def get_all_runs(
@@ -106,7 +106,9 @@ def get_all_runs(
         and return all the results.
         """
         data = []
+        page_num = 1
         while True:
+            print(f"requesting page {page_num} of results")
             page = self.__get_next_page(
                 url=url,
                 headers=headers,
@@ -118,6 +120,7 @@ def get_all_runs(
             # just use the pagination URL, not the original query one
             url = page.next_url
             params = None  # type: ignore[assignment]
+            page_num += 1
         return data
 
 
@@ -128,6 +131,7 @@ def main(
     workflow_id: str,
     max_days_without_success: int,
     num_attempts: int,
+    request_page_size: int,
     request_timeout_seconds: float,
     retry_backoff_seconds: float,
 ) -> int:
@@ -163,7 +167,7 @@ def main(
             # only care about successful runs
             "status": "success",
             # pull as many results per page as possible
-            "per_page": 100,
+            "per_page": request_page_size,
             # filter to recent-enough runs
             "created": f">={oldest_date_to_pull.strftime('%Y-%m-%d')}",
         },
@@ -222,6 +226,13 @@ def main(
         required=True,
         help="Maximum number of days without a successful run",
     )
+    parser.add_argument(
+        "--request-page-size",
+        type=int,
+        default=100,
+        required=False,
+        help="Number of responses per page of data. Decrease this to reduce memory usage.",
+    )
     args = parser.parse_args()
 
     sys.exit(
@@ -231,6 +242,7 @@ def main(
             workflow_id=args.workflow_id,
             max_days_without_success=args.max_days_without_success,
             num_attempts=5,
+            request_page_size=args.request_page_size,
             request_timeout_seconds=10,
             retry_backoff_seconds=0.5,
         ),

From e6fb6cd0f3ad4c2d7f9bd08919507bc370d2235b Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 13 Feb 2026 16:02:45 -0600
Subject: [PATCH 03/14] use kebob-case everywhere

---
 check_nightly_success/README.md                      |  6 +++---
 .../check-nightly-success/action.yaml                | 12 ++++++------
 check_nightly_success/dispatch/action.yml            | 10 +++++-----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index ba992237..fd559965 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -17,9 +17,9 @@ Add it to any GitHub Actions workflow configuration like this:
         uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
         with:
           repo: ${{ github.repository }}
-          target_branch: ${{ github.base_ref }}
-          workflow_id: 'test.yaml'
-          max_days_without_success: 7
+          target-branch: ${{ github.base_ref }}
+          workflow-id: 'test.yaml'
+          max-days-without-success: 7
 ```
 
 ## Testing
diff --git a/check_nightly_success/check-nightly-success/action.yaml b/check_nightly_success/check-nightly-success/action.yaml
index 803ac5dd..4c149b7a 100644
--- a/check_nightly_success/check-nightly-success/action.yaml
+++ b/check_nightly_success/check-nightly-success/action.yaml
@@ -8,17 +8,17 @@ inputs:
     description: "Repository name with owner (e.g. 'rapidsai/cudf' not 'cudf')"
     required: true
     type: string
-  target_branch:
+  target-branch:
     description: |
       Branch the pull request this is running on targets.
       Only statuses of nightly runs on that branch will be considered.
     required: true
     type: string
-  workflow_id:
+  workflow-id:
     description: "The workflow whose runs to check"
     required: true
     type: string
-  max_days_without_success:
+  max-days-without-success:
     description: "The number of consecutive days that may go by without a successful CI run"
     required: true
     type: integer
@@ -30,9 +30,9 @@ runs:
       shell: bash
       env:
         REPO: ${{ inputs.repo }}
-        TARGET_BRANCH: ${{ inputs.target_branch }}
-        WORKFLOW_ID: ${{ inputs.workflow_id }}
-        MAX_DAYS_WITHOUT_SUCCESS: ${{ inputs.max_days_without_success }}
+        TARGET_BRANCH: ${{ inputs.target-branch }}
+        WORKFLOW_ID: ${{ inputs.workflow-id }}
+        MAX_DAYS_WITHOUT_SUCCESS: ${{ inputs.max-days-without-success }}
       run: |
           python -m pip install \
             --prefer-binary \
diff --git a/check_nightly_success/dispatch/action.yml b/check_nightly_success/dispatch/action.yml
index 4656b345..872dc96f 100644
--- a/check_nightly_success/dispatch/action.yml
+++ b/check_nightly_success/dispatch/action.yml
@@ -12,12 +12,12 @@ inputs:
     required: false
     default: "main"
     type: string
-  workflow_id:
+  workflow-id:
     description: "The workflow whose runs to check"
     required: false
     default: "test.yaml"
     type: string
-  max_days_without_success:
+  max-days-without-success:
     description: "The number of consecutive days that may go by without a successful CI run"
     required: false
     default: 7
@@ -36,6 +36,6 @@ runs:
       uses: ./shared-actions/check_nightly_success/check-nightly-success
       with:
         repo: ${{ inputs.repo }}
-        target_branch: ${{ inputs.target_branch }}
-        workflow_id: ${{ inputs.workflow_id }}
-        max_days_without_success: ${{ inputs.max_days_without_success }}
+        target_branch: ${{ inputs.target-branch }}
+        workflow_id: ${{ inputs.workflow-id }}
+        max_days_without_success: ${{ inputs.max-days-without-success }}

From d2ea94ed872532781633b6f380167e30abe0dda0 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 13 Feb 2026 16:12:40 -0600
Subject: [PATCH 04/14] more kebob-case

---
 check_nightly_success/dispatch/action.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/check_nightly_success/dispatch/action.yml b/check_nightly_success/dispatch/action.yml
index 872dc96f..eceea33a 100644
--- a/check_nightly_success/dispatch/action.yml
+++ b/check_nightly_success/dispatch/action.yml
@@ -5,7 +5,7 @@ inputs:
     description: "Repository name with owner (e.g. 'rapidsai/cudf' not 'cudf')"
     required: true
     type: string
-  target_branch:
+  target-branch:
     description: |
       Branch the pull request this is running on targets.
       Only statuses of nightly runs on that branch will be considered.
@@ -36,6 +36,6 @@ runs:
       uses: ./shared-actions/check_nightly_success/check-nightly-success
       with:
         repo: ${{ inputs.repo }}
-        target_branch: ${{ inputs.target-branch }}
-        workflow_id: ${{ inputs.workflow-id }}
-        max_days_without_success: ${{ inputs.max-days-without-success }}
+        target-branch: ${{ inputs.target-branch }}
+        workflow-id: ${{ inputs.workflow-id }}
+        max-days-without-success: ${{ inputs.max-days-without-success }}

From 8d049331f75409afab70c8e4f671de1618c8c5d4 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 10:15:34 -0600
Subject: [PATCH 05/14] start trying to handle exemption

---
 check_nightly_success/README.md               |  3 +
 .../check-nightly-success/check.py            | 81 ++++++++++++++-----
 check_nightly_success/dispatch/action.yml     |  3 +-
 3 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index fd559965..8c0a6abf 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -13,6 +13,9 @@ Add it to any GitHub Actions workflow configuration like this:
     env:
       GH_TOKEN: ${{ github.token }}
     steps:
+      - name: Get PR Info
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
       - name: Check if nightly CI is passing
         uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
         with:
diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index 8ddb5fa6..194da05a 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -158,7 +158,7 @@ def main(
         request_timeout_seconds=request_timeout_seconds,
         retry_backoff_seconds=retry_backoff_seconds,
     )
-    all_runs = client.get_all_runs(
+    successful_runs = client.get_all_runs(
         url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
         headers={"Authorization": f"token {GITHUB_TOKEN}"},
         params={
@@ -173,32 +173,75 @@ def main(
         },
     )
 
+    if successful_runs:
+        most_recent_successful_run = max(successful_runs, key=lambda r: r.run_started_at)
+        print(
+            f"Found {len(successful_runs)} successful runs of workflow '{workflow_id}' on branch '{target_branch}' "
+            f"in the previous {max_days_without_success} days (most recent: '{most_recent_successful_run.run_started_at}'). "
+            f"View logs:\n - {most_recent_successful_run.html_url}"
+        )
+        return ExitCode.SUCCESS
+
+    # It's ok for there to be 0 successful runs if the branch is fairly new or the workflow hasn't been running on it
+    # very long.
+    #
+    # When new release branches are cut, we want to give a couple of days of space for teams to get their nightly tests working
+    all_runs = client.get_all_runs(
+        url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
+        headers={"Authorization": f"token {GITHUB_TOKEN}"},
+        params={
+            # only care about runs from one branch (usually, the PR target branch)
+            "branch": target_branch,
+            # pull as many results per page as possible
+            "per_page": request_page_size,
+            # filter to recent-enough runs
+            "created": f">={oldest_date_to_pull.strftime('%Y-%m-%d')}",
+        },
+    )
+
+    # 
+    if not all_runs:
+        print(
+            f"There were 0 runs (successful or unsuccessful) of workflow '{workflow_id}' on branch "
+            f"'{target_branch}' in the last {max_days_without_success} days."
+        )
+        return ExitCode.FAILURE
+
+    if len(all_runs) < max_days_without_success:
+        print(
+            "There have only been"
+
+            f"The oldest run of the {workflow_id} workflow on {latest_branch} was less "
+            f"than {max_days_without_success} days ago. This exempts the workflow from "
+            "check-nightly-success because the workflow has not been running for very long."
+    else:
+        
+
     # if there were 0 successful runs, immediately exit with 1... by definition that means there
     # hasn't been a success in the last `max_days_without_success` days
-    if not all_runs:
+    if not successful_runs:
+       
+        successful_runs = client.get_all_runs(
+            url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
+            headers={"Authorization": f"token {GITHUB_TOKEN}"},
+            params={
+                # only care about runs from one branch (usually, the PR target branch)
+                "branch": target_branch,
+                # only care about successful runs
+                "status": "success",
+                # pull as many results per page as possible
+                "per_page": request_page_size,
+                # filter to recent-enough runs
+                "created": f">={oldest_date_to_pull.strftime('%Y-%m-%d')}",
+            },
+        )
+
         print(
             f"There were 0 successful runs of workflow '{workflow_id}' on branch '{target_branch}' in the last "
             f"{max_days_without_success} days."
         )
         return ExitCode.FAILURE
 
-    # If we get here, then there must have been at least 1 successful run in the allowed time. We need to find its
-    # precise time for a log message, but otherwise the code an exit with success.
-    print(
-        f"Found {len(all_runs)} successful runs of workflow '{workflow_id}' on branch '{target_branch}' "
-        f"in the previous {max_days_without_success} days."
-    )
-
-    # sort runs by "run_started_at"
-    most_recent_successful_run = max(all_runs, key=lambda r: r.run_started_at)
-    print(
-        f"The most recent successful run of workflow '{workflow_id}' on branch '{target_branch}' "
-        f"was '{most_recent_successful_run.run_started_at}', which is within the "
-        f"last {max_days_without_success} days. "
-        f"View logs:\n - {most_recent_successful_run.html_url}"
-    )
-    return ExitCode.SUCCESS
-
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
diff --git a/check_nightly_success/dispatch/action.yml b/check_nightly_success/dispatch/action.yml
index eceea33a..02409d59 100644
--- a/check_nightly_success/dispatch/action.yml
+++ b/check_nightly_success/dispatch/action.yml
@@ -9,8 +9,7 @@ inputs:
     description: |
       Branch the pull request this is running on targets.
       Only statuses of nightly runs on that branch will be considered.
-    required: false
-    default: "main"
+    required: true
     type: string
   workflow-id:
     description: "The workflow whose runs to check"

From 5692b2e79e75fed218b4b4dc9506a6e8049493d2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Feb 2026 16:15:46 +0000
Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 check_nightly_success/check-nightly-success/check.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index 194da05a..00bb0ff2 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -199,7 +199,7 @@ def main(
         },
     )
 
-    # 
+    #
     if not all_runs:
         print(
             f"There were 0 runs (successful or unsuccessful) of workflow '{workflow_id}' on branch "
@@ -215,12 +215,12 @@ def main(
             f"than {max_days_without_success} days ago. This exempts the workflow from "
             "check-nightly-success because the workflow has not been running for very long."
     else:
-        
+
 
     # if there were 0 successful runs, immediately exit with 1... by definition that means there
     # hasn't been a success in the last `max_days_without_success` days
     if not successful_runs:
-       
+
         successful_runs = client.get_all_runs(
             url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
             headers={"Authorization": f"token {GITHUB_TOKEN}"},

From c13b9c4205991fb1865439f19aa2bf03d40ea11d Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 11:10:29 -0600
Subject: [PATCH 07/14] handle exempting new workflows

---
 .../check-nightly-success/check.py            | 61 ++++++++-----------
 1 file changed, 26 insertions(+), 35 deletions(-)

diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index 00bb0ff2..f972e877 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -173,11 +173,12 @@ def main(
         },
     )
 
+    # recent-enough, successful run = exit 0
     if successful_runs:
         most_recent_successful_run = max(successful_runs, key=lambda r: r.run_started_at)
         print(
             f"Found {len(successful_runs)} successful runs of workflow '{workflow_id}' on branch '{target_branch}' "
-            f"in the previous {max_days_without_success} days (most recent: '{most_recent_successful_run.run_started_at}'). "
+            f"in the previous {max_days_without_success} days (most recent: '{most_recent_successful_run.run_started_at}'). "  # noqa: E501
             f"View logs:\n - {most_recent_successful_run.html_url}"
         )
         return ExitCode.SUCCESS
@@ -185,7 +186,11 @@ def main(
     # It's ok for there to be 0 successful runs if the branch is fairly new or the workflow hasn't been running on it
     # very long.
     #
-    # When new release branches are cut, we want to give a couple of days of space for teams to get their nightly tests working
+    # Code below looks for runs in the last `max_days_without_success * 2` days, to get an
+    # approximation of the entire history without having an unbounded "list all runs from all time" type of query
+    # (which could get expensive for very-active branches).
+    lookback_days = max_days_without_success * 2
+    oldest_date_to_pull = datetime.now(timezone.utc) - timedelta(days=lookback_days)
     all_runs = client.get_all_runs(
         url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
         headers={"Authorization": f"token {GITHUB_TOKEN}"},
@@ -199,48 +204,34 @@ def main(
         },
     )
 
-    #
+    # Fail if there have not been any runs at all (to avoid silently skipping this check).
     if not all_runs:
         print(
             f"There were 0 runs (successful or unsuccessful) of workflow '{workflow_id}' on branch "
-            f"'{target_branch}' in the last {max_days_without_success} days."
+            f"'{target_branch}' in the last {lookback_days} days. "
+            "To resolve this, run the workflow at least once or increase 'max-days-without-success'."
         )
         return ExitCode.FAILURE
 
-    if len(all_runs) < max_days_without_success:
+    # If the oldest run on the branch was less than {max_days_without_success} ago, warn but allow the check to pass.
+    oldest_run = min(all_runs, key=lambda r: r.run_started_at)
+    days_since_oldest_run = (datetime.now(tz=timezone.utc) - oldest_run.run_started_at).days
+    if days_since_oldest_run < max_days_without_success:
         print(
-            "There have only been"
-
-            f"The oldest run of the {workflow_id} workflow on {latest_branch} was less "
-            f"than {max_days_without_success} days ago. This exempts the workflow from "
-            "check-nightly-success because the workflow has not been running for very long."
-    else:
-
-
-    # if there were 0 successful runs, immediately exit with 1... by definition that means there
-    # hasn't been a success in the last `max_days_without_success` days
-    if not successful_runs:
-
-        successful_runs = client.get_all_runs(
-            url=f"https://api.github.com/repos/{repo}/actions/workflows/{workflow_id}/runs",
-            headers={"Authorization": f"token {GITHUB_TOKEN}"},
-            params={
-                # only care about runs from one branch (usually, the PR target branch)
-                "branch": target_branch,
-                # only care about successful runs
-                "status": "success",
-                # pull as many results per page as possible
-                "per_page": request_page_size,
-                # filter to recent-enough runs
-                "created": f">={oldest_date_to_pull.strftime('%Y-%m-%d')}",
-            },
+            f"The oldest run of workflow '{workflow_id}' on branch '{target_branch}' was "
+            f"{days_since_oldest_run} days ago ({oldest_run.run_started_at}). Because that is less than "
+            f"'max-days-without-success = {max_days_without_success}' days, this workflow is exempted from "
+            "check-nightly-success. The check will start failing if there is not a successful run in "
+            "the next few days."
         )
+        return ExitCode.SUCCESS
 
-        print(
-            f"There were 0 successful runs of workflow '{workflow_id}' on branch '{target_branch}' in the last "
-            f"{max_days_without_success} days."
-        )
-        return ExitCode.FAILURE
+    # There isn't a recent-enough success and the branch isn't exempted... fail.
+    print(
+        f"There were 0 successful runs of workflow '{workflow_id}' on branch '{target_branch}' in the last "
+        f"{max_days_without_success} days."
+    )
+    return ExitCode.FAILURE
 
 
 if __name__ == "__main__":

From 82b50ba1636a69c124c5c3392f12c2a0db6677b7 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 11:15:22 -0600
Subject: [PATCH 08/14] update docs

---
 check_nightly_success/README.md | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index 8c0a6abf..ba3ee3a8 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -20,7 +20,7 @@ Add it to any GitHub Actions workflow configuration like this:
         uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
         with:
           repo: ${{ github.repository }}
-          target-branch: ${{ github.base_ref }}
+          target-branch: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}
           workflow-id: 'test.yaml'
           max-days-without-success: 7
 ```
@@ -45,16 +45,15 @@ python ./check-nightly-success/check.py \
 
 If this succeeds, you'll see a `0` exit code and output text similar to the following:
 
-> Found 4 successful runs of workflow 'test.yaml' on branch 'main' in the previous 7 days.
-The most recent successful run of workflow 'test.yaml' on branch 'main' was '2026-02-13 13:40:18+00:00', which is within the last 7 days. View logs:
- - https://github.com/rapidsai/cudf/actions/runs/21978265026
+> Found 4 successful runs of workflow 'test.yaml' on branch 'main' in the previous 7 days (most recent: '2026-02-16 06:26:04+00:00'). View logs:
+ - https://github.com/rapidsai/cudf/actions/runs/22052428055
 
- To see it fail, try on a repo that doesn't have that workflow.
+To see it fail, try on a repo that doesn't have that workflow.
 
 ```shell
 GH_TOKEN=$(gh auth token) \
 python ./check-nightly-success/check.py \
-  --repo 'rapidsai/build-planniing' \
+  --repo 'rapidsai/build-planning' \
   --branch 'main' \
   --workflow-id 'test.yaml' \
   --max-days-without-success 7
@@ -62,12 +61,12 @@ python ./check-nightly-success/check.py \
 
 That'll return exit code `1` and output similar to this:
 
-> RuntimeError: Failed to fetch https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs after 5 attempts with the following errors:
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planniing/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-05
+> RuntimeError: Failed to fetch https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs after 5 attempts with the following errors:
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
+        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
 
 Set `--request-page-size` to `1` to test that pagination is working.
 

From 552b206dd92779ef466a522b666988604fb3cd73 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 11:33:31 -0600
Subject: [PATCH 09/14] more logging

---
 check_nightly_success/check-nightly-success/check.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index f972e877..e36b4036 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -216,13 +216,15 @@ def main(
     # If the oldest run on the branch was less than {max_days_without_success} ago, warn but allow the check to pass.
     oldest_run = min(all_runs, key=lambda r: r.run_started_at)
     days_since_oldest_run = (datetime.now(tz=timezone.utc) - oldest_run.run_started_at).days
+    print(
+        f"The oldest run of workflow '{workflow_id}' on branch '{target_branch}' was "
+        f"{days_since_oldest_run} days ago ({oldest_run.run_started_at})."
+    )
     if days_since_oldest_run < max_days_without_success:
         print(
-            f"The oldest run of workflow '{workflow_id}' on branch '{target_branch}' was "
-            f"{days_since_oldest_run} days ago ({oldest_run.run_started_at}). Because that is less than "
-            f"'max-days-without-success = {max_days_without_success}' days, this workflow is exempted from "
-            "check-nightly-success. The check will start failing if there is not a successful run in "
-            "the next few days."
+            f"Because the latest run was less than 'max-days-without-success = {max_days_without_success}' days ago, "
+            "this workflow is exempted from check-nightly-success. The check will start failing if there is not a "
+            "successful run in the next few days."
         )
         return ExitCode.SUCCESS
 

From 0167e97b8f25e36b6b9da76c6d3957f0949a2a12 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 11:56:56 -0600
Subject: [PATCH 10/14] expand testing docs

---
 .gitignore                      |  1 +
 check_nightly_success/README.md | 46 ++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 9c92e4ec..cfb60695 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 *.swp
+ucxx/
 .venv
diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index ba3ee3a8..70bea189 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -28,6 +28,9 @@ Add it to any GitHub Actions workflow configuration like this:
 ## Testing
 
 The code for the actions is implemented in Python.
+
+### Case 1: Succeed on recent nightly test successes
+
 Try the following locally to test it.
 
 ```shell
@@ -48,7 +51,9 @@ If this succeeds, you'll see a `0` exit code and output text similar to the foll
 > Found 4 successful runs of workflow 'test.yaml' on branch 'main' in the previous 7 days (most recent: '2026-02-16 06:26:04+00:00'). View logs:
  - https://github.com/rapidsai/cudf/actions/runs/22052428055
 
-To see it fail, try on a repo that doesn't have that workflow.
+### Case 2: Fail when branch has 0 runs (of any status)
+
+The check should fail on a repo without any runs of this workflow:
 
 ```shell
 GH_TOKEN=$(gh auth token) \
@@ -68,6 +73,45 @@ That'll return exit code `1` and output similar to this:
         404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
         404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
 
+### Case 3: Success on new branches with only very-recent runs
+
+Branches with only very-recent runs should be exempted from the check.
+
+```shell
+# NOTE: this example requires write access to 'rapidsai/ucxx'
+git clone -o upstream https://github.com/rapidsai/ucxx
+pushd./ucxx
+git checkout -b delete-me
+git push upstream delete-me
+popd
+
+gh workflow run \
+    --repo rapidsai/ucxx \
+    --ref delete-me \
+    test.yaml \
+    -f branch="delete-me" \
+    -f date="$(date +%Y-%m-%d)" \
+    -f sha="$(git rev-parse HEAD)" \
+    -f build_type=nightly
+
+# (MANUAL - go to https://github.com/rapidsai/ucxx/actions/runs/22109183034 and manuall cancel that run)
+
+# run the check
+GH_TOKEN=$(gh auth token) \
+python ./check-nightly-success/check.py \
+  --repo 'rapidsai/ucxx' \
+  --branch 'delete-me' \
+  --workflow-id 'test.yaml' \
+  --max-days-without-success 7
+```
+
+That'll exit with code `0` and print something like this:
+
+> The oldest run of workflow 'test.yaml' on branch 'delete-me' was 0 days ago (2026-02-17 17:42:05+00:00).
+Because the latest run was less than 'max-days-without-success = 7' days ago, this workflow is exempted from check-nightly-success. The check will start failing if there is not a successful run in the next few days.
+
+### Other testing: pagination
+
 Set `--request-page-size` to `1` to test that pagination is working.
 
 ```shell

From dd8e1c48370615caaa66f86b1a900bcd2ef78213 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 17 Feb 2026 13:50:17 -0600
Subject: [PATCH 11/14] Apply suggestions from code review

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 check_nightly_success/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index 70bea189..2c9820db 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -80,7 +80,7 @@ Branches with only very-recent runs should be exempted from the check.
 ```shell
 # NOTE: this example requires write access to 'rapidsai/ucxx'
 git clone -o upstream https://github.com/rapidsai/ucxx
-pushd./ucxx
+pushd ./ucxx
 git checkout -b delete-me
 git push upstream delete-me
 popd
@@ -94,7 +94,7 @@ gh workflow run \
     -f sha="$(git rev-parse HEAD)" \
     -f build_type=nightly
 
-# (MANUAL - go to https://github.com/rapidsai/ucxx/actions/runs/22109183034 and manuall cancel that run)
+# (MANUAL - go to https://github.com/rapidsai/ucxx/actions/runs/22109183034 and manually cancel that run)
 
 # run the check
 GH_TOKEN=$(gh auth token) \

From a018fa67daf01aefe0e3677e98bb1be6bf5aaac6 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 14:49:11 -0600
Subject: [PATCH 12/14] switch to urllib3.util.Retry

---
 .../check-nightly-success/check.py            | 48 ++++++++-----------
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index e36b4036..8558b341 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -5,11 +5,12 @@
 import argparse
 import os
 import sys
-import time
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 
 import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
 
 # Constants
 GITHUB_TOKEN = os.environ["GH_TOKEN"]
@@ -45,44 +46,33 @@ def __init__(
         retry_backoff_seconds: float,
         request_timeout_seconds: float,
     ) -> None:
-        self.max_retries = max_retries
         self.request_timeout_seconds = request_timeout_seconds
-        self.retry_backoff_seconds = retry_backoff_seconds
+        retry = Retry(
+            total=max_retries - 1,  # 1 initial attempt + (total) retries = max_retries attempts
+            backoff_factor=retry_backoff_seconds,
+            status_forcelist=(429, 500, 502, 503, 504),
+        )
+        adapter = HTTPAdapter(max_retries=retry)
+        self._session = requests.Session()
+        self._session.mount("https://", adapter)
+        self._session.mount("http://", adapter)
 
     def __get_next_page(
         self,
         *,
         url: str,
         headers: dict[str, str],
-        params: dict[str, int | str],
+        params: dict[str, int | str] | None,
     ) -> _ResponseData:
         """Get one page of results"""
-        exceptions = []
-        for _ in range(self.max_retries):
-            try:
-                response = requests.get(
-                    url,
-                    headers=headers,
-                    params=params,
-                    timeout=self.request_timeout_seconds,
-                )
-                response.raise_for_status()
-                break
-            except requests.RequestException as e:
-                exceptions.append(str(e))
-                # simple backoff, without jitter, exponential backoff, etc., should be fine for this
-                time.sleep(self.retry_backoff_seconds)
-        else:
-            # this needs to be done outside the f-string to avoid:
-            # "Cannot use an escape sequence (backslash) in f-strings on Python 3.10 (syntax was added in Python 3.12)"
-            exception_text = "\n\t".join(exceptions)
-            msg = (
-                f"Failed to fetch {url} after {self.max_retries} attempts with the following "
-                f"errors: \n\t{exception_text}"
-            )
-            raise RuntimeError(msg)
+        response = self._session.get(
+            url,
+            headers=headers,
+            params=params,
+            timeout=self.request_timeout_seconds,
+        )
+        response.raise_for_status()
 
-        # if we get here, the request succeeded...return its data, in the format we want
         return _ResponseData(
             data=[
                 _WorkflowRun(

From 22a87b1f9b2067dbde801002c1df86790510e1bc Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Feb 2026 14:54:24 -0600
Subject: [PATCH 13/14] do not trust a 403 or 404 from GitHub is not retryable

---
 check_nightly_success/README.md                      | 7 +------
 check_nightly_success/check-nightly-success/check.py | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index 2c9820db..0a6a3d44 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -66,12 +66,7 @@ python ./check-nightly-success/check.py \
 
 That'll return exit code `1` and output similar to this:
 
-> RuntimeError: Failed to fetch https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs after 5 attempts with the following errors:
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
-        404 Client Error: Not Found for url: https://api.github.com/repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10
+> requests.exceptions.RetryError: HTTPSConnectionPool(host='api.github.com', port=443): Max retries exceeded with url: /repos/rapidsai/build-planning/actions/workflows/test.yaml/runs?branch=main&status=success&per_page=100&created=%3E%3D2026-02-10 (Caused by ResponseError('too many 404 error responses'))
 
 ### Case 3: Success on new branches with only very-recent runs
 
diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index 8558b341..d41b59b6 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -50,7 +50,7 @@ def __init__(
         retry = Retry(
             total=max_retries - 1,  # 1 initial attempt + (total) retries = max_retries attempts
             backoff_factor=retry_backoff_seconds,
-            status_forcelist=(429, 500, 502, 503, 504),
+            status_forcelist=(403, 404, 429, 500, 502, 503, 504),
         )
         adapter = HTTPAdapter(max_retries=retry)
         self._session = requests.Session()

From e06b149c430bc840b0bf7c020a05e94cb700a2df Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 19 Feb 2026 09:39:35 -0600
Subject: [PATCH 14/14] update name, clone UCXX to a temporary directory

---
 .gitignore                                           | 1 -
 check_nightly_success/README.md                      | 5 +++--
 check_nightly_success/check-nightly-success/check.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index cfb60695..9c92e4ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,2 @@
 *.swp
-ucxx/
 .venv
diff --git a/check_nightly_success/README.md b/check_nightly_success/README.md
index 0a6a3d44..d81b61bd 100644
--- a/check_nightly_success/README.md
+++ b/check_nightly_success/README.md
@@ -74,8 +74,9 @@ Branches with only very-recent runs should be exempted from the check.
 
 ```shell
 # NOTE: this example requires write access to 'rapidsai/ucxx'
-git clone -o upstream https://github.com/rapidsai/ucxx
-pushd ./ucxx
+TMP_UCXX=$(mktemp -d)
+git clone -o upstream https://github.com/rapidsai/ucxx "${TMP_UCXX}"
+pushd "${TMP_UCXX}"
 git checkout -b delete-me
 git push upstream delete-me
 popd
diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index d41b59b6..62351c5b 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -57,7 +57,7 @@ def __init__(
         self._session.mount("https://", adapter)
         self._session.mount("http://", adapter)
 
-    def __get_next_page(
+    def _get_next_page(
         self,
         *,
         url: str,
@@ -99,7 +99,7 @@ def get_all_runs(
         page_num = 1
         while True:
             print(f"requesting page {page_num} of results")
-            page = self.__get_next_page(
+            page = self._get_next_page(
                 url=url,
                 headers=headers,
                 params=params,