Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 23 additions & 16 deletions gittensor/validator/pat_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import asyncio
import json
from typing import TYPE_CHECKING, Optional, Tuple

import bittensor as bt
Expand Down Expand Up @@ -70,22 +71,28 @@ def _reject(reason: str) -> PatBroadcastSynapse:
if error:
return _reject(error)

# 3. Enforce GitHub identity pinning — same hotkey cannot switch GitHub accounts
identity_error = _github_identity_pin_error(uid, hotkey, github_id)
if identity_error:
return _reject(identity_error)

# 4. Test query against a known repo to catch org-restricted PATs
test_error = await asyncio.to_thread(_test_pat_against_repo, synapse.github_access_token)
if test_error:
return _reject(f'PAT test query failed: {test_error}')

identity_error = _github_identity_pin_error(uid, hotkey, github_id)
if identity_error:
return _reject(identity_error)

# 5. Store PAT (github_id guaranteed non-None after validate_github_credentials success)
pat_storage.save_pat(uid=uid, hotkey=hotkey, pat=synapse.github_access_token, github_id=github_id or '0')
# Steps 3-5 read/write the PAT store. If it is momentarily unreadable, fail
# closed: reject so the miner retries, rather than letting save_pat overwrite
# (and wipe) the store or surfacing a raw axon error.
try:
# 3. Enforce GitHub identity pinning — same hotkey cannot switch GitHub accounts
identity_error = _github_identity_pin_error(uid, hotkey, github_id)
if identity_error:
return _reject(identity_error)

# 4. Test query against a known repo to catch org-restricted PATs
test_error = await asyncio.to_thread(_test_pat_against_repo, synapse.github_access_token)
if test_error:
return _reject(f'PAT test query failed: {test_error}')

identity_error = _github_identity_pin_error(uid, hotkey, github_id)
if identity_error:
return _reject(identity_error)

# 5. Store PAT (github_id guaranteed non-None after validate_github_credentials success)
pat_storage.save_pat(uid=uid, hotkey=hotkey, pat=synapse.github_access_token, github_id=github_id or '0')
except (json.JSONDecodeError, OSError) as e:
return _reject(f'Validator PAT store temporarily unavailable; please retry. ({e})')

# Clear PAT from response so it isn't echoed back
synapse.github_access_token = ''
Expand Down
47 changes: 37 additions & 10 deletions gittensor/validator/pat_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from pathlib import Path
from typing import Optional

import bittensor as bt

PATS_FILE = Path(__file__).resolve().parents[2] / 'data' / 'miner_pats.json'

_lock = threading.Lock()
Expand All @@ -28,15 +30,33 @@ def ensure_pats_file() -> None:


def load_all_pats() -> list[dict]:
"""Read all stored PAT entries. Returns empty list if file is missing or corrupt."""
"""Snapshot all stored PAT entries for a scoring round.

Read-only and deliberately tolerant: an unreadable store here must not crash
the round (an unhandled error would stop the validator) nor wipe anything. It
logs loudly and returns [] so the round recovers on the next successful read.
The *write* path (save_pat) is the one that fails closed.
"""
with _lock:
return _read_file()
try:
return _read_file()
except (json.JSONDecodeError, OSError) as e:
bt.logging.error(
f'miner_pats.json unreadable this round; scoring with no stored PATs until it recovers: {e}'
)
return []


def save_pat(uid: int, hotkey: str, pat: str, github_id: str) -> None:
"""Upsert a PAT entry by UID. Creates the file if needed."""
"""Upsert a PAT entry by UID, failing closed on an unreadable store.

If the existing store cannot be read (corrupt file or a transient I/O error),
this raises *without writing*, so a single failed read can never erase every
other miner's stored PAT (the read-then-overwrite wipe). The broadcast handler
catches this and rejects the broadcast, so the miner retries and nothing is lost.
"""
with _lock:
entries = _read_file()
entries = _read_file() # fail closed: a failed read raises here, before any write

entry = {
'uid': uid,
Expand All @@ -57,7 +77,11 @@ def save_pat(uid: int, hotkey: str, pat: str, github_id: str) -> None:


def get_pat_by_uid(uid: int) -> Optional[dict]:
"""Look up a single PAT entry by UID. Returns None if not found."""
"""Look up a single PAT entry by UID. Returns None if not found.

Propagates (does not swallow) a read error, so callers never mistake an
unreadable store for 'no PAT stored for this miner'.
"""
with _lock:
for entry in _read_file():
if entry.get('uid') == uid:
Expand All @@ -66,13 +90,16 @@ def get_pat_by_uid(uid: int) -> Optional[dict]:


def _read_file() -> list[dict]:
"""Read and parse the JSON file. Must be called while holding _lock."""
"""Read and parse the JSON store. Must be called while holding _lock.

Returns [] only when the file genuinely does not exist. Raises
(json.JSONDecodeError / OSError) on a corrupt or unreadable file so the write
path never mistakes a failed read for an empty store and overwrites it. The
read paths that must tolerate a transient failure (load_all_pats) catch it.
"""
if not PATS_FILE.exists():
return []
try:
return json.loads(PATS_FILE.read_text())
except (json.JSONDecodeError, OSError):
return []
return json.loads(PATS_FILE.read_text())


def _write_file(entries: list[dict]) -> None:
Expand Down
18 changes: 18 additions & 0 deletions tests/validator/test_pat_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,24 @@ def test_unregistered_hotkey_rejected(self, mock_validator):
assert result.accepted is False
assert 'not registered' in (result.rejection_reason or '')

@patch('gittensor.validator.pat_handler._test_pat_against_repo', return_value=None)
@patch('gittensor.validator.pat_handler.validate_github_credentials', return_value=('github_42', None))
def test_unreadable_store_rejected_not_wiped(
self, mock_validate, mock_test_query, mock_validator, use_tmp_pats_file
):
"""A momentarily unreadable store fails closed: the broadcast is rejected (so
the miner retries) instead of overwriting the store or raising a raw error."""
pat_storage.save_pat(0, 'hotkey_0', 'ghp_existing', 'github_7')
use_tmp_pats_file.write_text('not json{{{')

synapse = _make_broadcast_synapse('hotkey_1', pat='ghp_valid')
result = _run(handle_pat_broadcast(mock_validator, synapse))

assert result.accepted is False
assert 'temporarily unavailable' in (result.rejection_reason or '')
# The store was not overwritten down to the single incoming entry.
assert use_tmp_pats_file.read_text() == 'not json{{{'

@patch('gittensor.validator.pat_handler.validate_github_credentials', return_value=(None, 'PAT invalid'))
def test_invalid_pat_rejected(self, mock_validate, mock_validator):
synapse = _make_broadcast_synapse('hotkey_1', pat='ghp_bad')
Expand Down
45 changes: 45 additions & 0 deletions tests/validator/test_pat_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import threading
from pathlib import Path

import pytest

Expand Down Expand Up @@ -90,6 +91,50 @@ def test_load_handles_corrupt_file(self, use_tmp_pats_file):
assert entries == []


class TestSavePatFailsClosed:
"""The read-then-overwrite wipe (issue #1481, Proof 2): a single failed read of
miner_pats.json must never erase every other miner's stored PAT on the next save."""

def test_transient_read_error_does_not_wipe(self, use_tmp_pats_file, monkeypatch):
"""A momentary I/O error on read leaves the on-disk store fully intact."""
pat_storage.save_pat(10, 'h10', 'p10', 'u10')
pat_storage.save_pat(20, 'h20', 'p20', 'u20')
pat_storage.save_pat(30, 'h30', 'p30', 'u30')
good_bytes = use_tmp_pats_file.read_text()

real_read_text = Path.read_text
failing = {'on': False}

def flaky_read_text(self, *args, **kwargs):
if failing['on'] and self == use_tmp_pats_file:
raise OSError('transient I/O error')
return real_read_text(self, *args, **kwargs)

monkeypatch.setattr(Path, 'read_text', flaky_read_text)

# The save must fail closed (raise) instead of overwriting the store.
failing['on'] = True
with pytest.raises(OSError):
pat_storage.save_pat(99, 'h99', 'p99', 'u99')
failing['on'] = False

# Read recovers: every original entry survives; UID 99 was never written.
assert use_tmp_pats_file.read_text() == good_bytes
assert {e['uid'] for e in pat_storage.load_all_pats()} == {10, 20, 30}

def test_corrupt_store_save_does_not_shrink(self, use_tmp_pats_file):
"""A corrupt store is left as-is, not silently overwritten down to one entry."""
pat_storage.save_pat(10, 'h10', 'p10', 'u10')
pat_storage.save_pat(20, 'h20', 'p20', 'u20')

use_tmp_pats_file.write_text('not json{{{')

with pytest.raises(json.JSONDecodeError):
pat_storage.save_pat(99, 'h99', 'p99', 'u99')

assert use_tmp_pats_file.read_text() == 'not json{{{'


class TestGetPatByUid:
def test_get_existing(self):
pat_storage.save_pat(1, 'hotkey_1', 'ghp_abc', 'user_1')
Expand Down
Loading