Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .artifacts/ssot-spec-2009-structured-fields-strict-validation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## Content

# Structured fields strict validation

This spec defines Tigrcorn's package-owned strict validation behavior for RFC 9651 structured fields.

## Scope

- `src/tigrcorn/http/structured_fields.py`
- `tests/test_structured_fields_validation.py`

## Required behavior

- The parser must reject malformed structured string escapes.
- The parser must reject control and non-ASCII characters inside structured strings.
- Structured keys must follow the RFC 9651 key grammar and reject uppercase characters.
- Structured tokens must keep RFC 9651-valid `:` and `/` characters when parsing and serializing.
- The serializer must reject invalid keys, invalid tokens, and invalid string values instead of emitting non-conformant wire output.

## Verification

- `tests/test_structured_fields_validation.py` covers malformed strings, invalid keys, valid escaped strings, valid `:` and `/` tokens, and serializer rejection paths.
- `tests/test_p8_sf.py` remains the broader RFC 9651 round-trip baseline for the structured-fields surface.
709 changes: 456 additions & 253 deletions .ssot/registry.json

Large diffs are not rendered by default.

30 changes: 12 additions & 18 deletions .ssot/reports/upgrade.report.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
{
"passed": true,
"registry_path": ".ssot/registry.json",
"from_schema_version": 10,
"registry_path": "C:/Users/bigman/.codex/worktrees/00ac/tigrcorn/.ssot/registry.json",
"from_schema_version": "0.1.0",
"to_schema_version": "0.1.0",
"from_version": "0.2.7",
"to_version": "0.2.8",
"migrations": [
"0.2.6->0.2.7 (schema 9->10)",
"0.2.7->0.2.7 (schema 10->0.1.0)"
],
"schema_migrations": [
"migrate_v9_to_v10",
"migrate_v10_to_v0_1_0"
],
"from_version": "0.2.10",
"to_version": "0.2.10",
"migrations": [],
"schema_migrations": [],
"renamed_specs": [],
"document_migration": null,
"sync": {
"adr": {
"created": [],
"updated": [
"updated": [],
"unchanged": [
"adr:0600",
"adr:0601",
"adr:0602",
Expand All @@ -33,12 +28,12 @@
"adr:0611",
"adr:0612",
"adr:0613"
],
"unchanged": []
]
},
"spec": {
"created": [],
"updated": [
"updated": [],
"unchanged": [
"spc:0600",
"spc:0601",
"spc:0602",
Expand All @@ -54,8 +49,7 @@
"spc:0612",
"spc:0613",
"spc:0614"
],
"unchanged": []
]
}
},
"changed": true
Expand Down
26 changes: 26 additions & 0 deletions .ssot/reports/validation.report.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"passed": true,
"registry_path": ".ssot/registry.json",
"failures": [],
"warnings": [],
"summary": {
"counts": {
"features": 93,
"profiles": 0,
"tests": 327,
"claims": 115,
"evidence": 297,
"issues": 10,
"risks": 4,
"boundaries": 1,
"releases": 1,
"adrs": 23,
"specs": 26
},
"profile_status": {
"passing": 0,
"failing": 0,
"draft": 0
}
}
}
40 changes: 40 additions & 0 deletions .ssot/specs/SPEC-2009-structured-fields-strict-validation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
schema_version: "0.1.0"
kind: "spec"
id: "spc:2009"
number: 2009
slug: "structured-fields-strict-validation"
title: "Structured fields strict validation"
status: "draft"
origin: "repo-local"
decision_date: null
tags: []
summary: "This spec defines Tigrcorn's package-owned strict validation behavior for RFC 9651 structured fields."
supersedes: []
superseded_by: []
status_notes: []
references: []
body: |-
## Content

# Structured fields strict validation

This spec defines Tigrcorn's package-owned strict validation behavior for RFC 9651 structured fields.

## Scope

- `src/tigrcorn/http/structured_fields.py`
- `tests/test_structured_fields_validation.py`

## Required Behavior

- The parser must reject malformed structured string escapes.
- The parser must reject control and non-ASCII characters inside structured strings.
- Structured keys must follow the RFC 9651 key grammar and reject uppercase characters.
- Structured tokens must keep RFC 9651-valid `:` and `/` characters when parsing and serializing.
- The serializer must reject invalid keys, invalid tokens, and invalid string values instead of emitting non-conformant wire output.

## Verification

- `tests/test_structured_fields_validation.py` covers malformed strings, invalid keys, valid escaped strings, valid `:` and `/` tokens, and serializer rejection paths.
- `tests/test_p8_sf.py` remains the broader RFC 9651 round-trip baseline for the structured-fields surface.
spec_kind: "local-policy"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ full-featured = [
dev = [
"cryptography>=46.0.0",
"pytest>=8.0",
"ssot-registry>=0.1.0",
"ssot-registry>=0.2.10",
"aioquic>=1.3.0",
"h2>=4.1.0",
"websockets>=12.0",
Expand Down
55 changes: 44 additions & 11 deletions src/tigrcorn/http/structured_fields.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import base64
import string
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Any
Expand Down Expand Up @@ -46,6 +47,30 @@ class StructuredFieldError(ValueError):
pass


_KEY_START_CHARS = frozenset('abcdefghijklmnopqrstuvwxyz*')
_KEY_CHARS = frozenset('abcdefghijklmnopqrstuvwxyz0123456789_-.*')
_TOKEN_START_CHARS = frozenset(string.ascii_letters + '*')
_TOKEN_CHARS = frozenset(string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:/")


def _validate_key(key: str) -> None:
if not key or key[0] not in _KEY_START_CHARS or any(char not in _KEY_CHARS for char in key[1:]):
raise StructuredFieldError(f'invalid structured key {key!r}')


def _validate_token(token: str) -> None:
if not token:
raise StructuredFieldError('expected token')
if token[0] not in _TOKEN_START_CHARS or any(char not in _TOKEN_CHARS for char in token[1:]):
raise StructuredFieldError(f'invalid structured token {token!r}')


def _validate_string_value(value: str) -> None:
for char in value:
if ord(char) < 0x20 or ord(char) > 0x7E:
raise StructuredFieldError('invalid character in structured string')


class _Parser:
def __init__(self, text: str):
self.text = text
Expand Down Expand Up @@ -152,9 +177,14 @@ def _parse_string(self) -> str:
if char == '\\':
if self.index >= self.length:
raise StructuredFieldError('unterminated escape in structured string')
chunks.append(self.text[self.index])
escaped = self.text[self.index]
if escaped not in {'"', '\\'}:
raise StructuredFieldError('invalid escape in structured string')
chunks.append(escaped)
self.index += 1
continue
if ord(char) < 0x20 or ord(char) > 0x7E:
raise StructuredFieldError('invalid character in structured string')
chunks.append(char)
raise StructuredFieldError('unterminated structured string')

Expand Down Expand Up @@ -196,17 +226,15 @@ def _parse_number(self) -> int | Decimal:

def _parse_token(self) -> str:
start = self.index
while self.index < self.length and self.text[self.index] not in '()<>@,;:\\"/[]?={} \t':
while self.index < self.length and self.text[self.index] not in {'(', ')', '<', '>', '@', ',', ';', '\\', '"', '[', ']', '?', '=', '{', '}', ' ', '\t'}:
self.index += 1
token = self.text[start:self.index]
if not token:
raise StructuredFieldError('expected token')
_validate_token(token)
return token

def _parse_key(self) -> str:
key = self._parse_token()
if not key[0].islower() and key[0] != '*':
raise StructuredFieldError(f'invalid structured key {key!r}')
_validate_key(key)
return key

def _parse_digits(self, *, allow_sign: bool) -> str:
Expand Down Expand Up @@ -264,6 +292,7 @@ def serialize_bare_item(value: BareItem) -> str:
if isinstance(value, bool):
return '?1' if value else '?0'
if isinstance(value, Token):
_validate_token(value.value)
return value.value
if isinstance(value, ByteSequence):
return ':' + base64.b64encode(value.value).decode('ascii') + ':'
Expand All @@ -275,7 +304,9 @@ def serialize_bare_item(value: BareItem) -> str:
return text
if isinstance(value, int):
return str(value)
escaped = str(value).replace('\\', '\\\\').replace('"', '\\"')
text = str(value)
_validate_string_value(text)
escaped = text.replace('\\', '\\\\').replace('"', '\\"')
return f'"{escaped}"'


Expand All @@ -293,6 +324,7 @@ def serialize_list_member(member: ListMember) -> str:
def serialize_dictionary(value: dict[str, ListMember]) -> str:
parts: list[str] = []
for key, member in value.items():
_validate_key(key)
if isinstance(member, Item) and member.value is True:
parts.append(key + _serialize_params(member.params))
else:
Expand All @@ -313,10 +345,11 @@ def serialize_structured_value(value: StructuredValue) -> str:


def _serialize_params(params: dict[str, BareItem]) -> str:
return ''.join(
f';{key}' if raw is True else f';{key}={serialize_bare_item(raw)}'
for key, raw in params.items()
)
parts: list[str] = []
for key, raw in params.items():
_validate_key(key)
parts.append(f';{key}' if raw is True else f';{key}={serialize_bare_item(raw)}')
return ''.join(parts)


def normalize_for_json(value: Any) -> Any:
Expand Down
57 changes: 57 additions & 0 deletions tests/test_structured_fields_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from __future__ import annotations

import pytest

from tigrcorn.http.structured_fields import (
Item,
StructuredFieldError,
Token,
parse_dictionary,
parse_item,
serialize_dictionary,
serialize_item,
)


@pytest.mark.parametrize(
('wire_value', 'message_fragment'),
[
('"bad\\q"', 'invalid escape'),
('"bad\n"', 'invalid character'),
],
)
def test_parse_item_rejects_malformed_strings(wire_value: str, message_fragment: str) -> None:
with pytest.raises(StructuredFieldError, match=message_fragment):
parse_item(wire_value)


def test_parse_item_accepts_escaped_quote_and_backslash() -> None:
parsed = parse_item(r'"a\"b\\c"')

assert parsed.value == 'a"b\\c'
assert serialize_item(parsed) == r'"a\"b\\c"'


def test_parse_dictionary_rejects_uppercase_keys() -> None:
with pytest.raises(StructuredFieldError, match='invalid structured key'):
parse_dictionary('fooBar=1')


def test_parse_item_accepts_tokens_with_colon_and_slash() -> None:
parsed = parse_item('Digest:sha-256/example')

assert parsed.value == Token('Digest:sha-256/example')
assert serialize_item(parsed) == 'Digest:sha-256/example'


@pytest.mark.parametrize(
'value',
[
{'fooBar': Item(1)},
{'foo': Item('m\u00fc')},
{'foo': Item(Token('b\u00e9po'))},
],
)
def test_serialize_dictionary_rejects_invalid_keys_and_item_values(value: dict[str, Item]) -> None:
with pytest.raises(StructuredFieldError):
serialize_dictionary(value)
Loading
Loading