-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidation_gate.py
More file actions
142 lines (109 loc) · 4.31 KB
/
validation_gate.py
File metadata and controls
142 lines (109 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
"""
validation_gate.py - Hard Gate pattern example
Demonstrates validation that blocks workflow progression via exit codes.
The agent cannot decide if validation passed—the script decides.
EXIT CODES:
0 - Validation PASSED. Agent may proceed.
1 - Validation FAILED. Agent must stop.
USAGE:
python validation_gate.py source.txt translation.json
"""
import json
import sys
from pathlib import Path
from typing import Tuple, Dict, Any
def normalize_text(text: str) -> str:
"""Normalize text for comparison. Domain-specific: handles Latin formatting quirks."""
# Collapse whitespace, normalize quotes, remove column markers, etc.
return ' '.join(text.split()).strip()
def check_translation_coverage(
source_text: str,
last_translated_chunk: str
) -> Tuple[bool, str, Dict[str, Any]]:
"""
Core validation: does the last translated content appear near the end
of the source? If not, content was missed.
Returns: (passed, message, metrics)
"""
metrics = {
"source_length": len(source_text),
"chunk_length": len(last_translated_chunk),
"remaining_chars": 0,
"remaining_pct": 0.0
}
source_norm = normalize_text(source_text)
chunk_norm = normalize_text(last_translated_chunk)
# Find where the last translated text appears in source
position = source_norm.find(chunk_norm[-100:] if len(chunk_norm) > 100 else chunk_norm)
if position == -1:
return (
False,
"FATAL: Last translated chunk not found in source text.",
metrics
)
# Calculate remaining untranslated content
end_position = position + len(chunk_norm)
remaining = len(source_norm) - end_position
remaining_pct = (remaining / len(source_norm)) * 100
metrics["remaining_chars"] = remaining
metrics["remaining_pct"] = remaining_pct
# Thresholds: >500 chars = incomplete, >100 = suspicious
if remaining > 500:
return (
False,
f"INCOMPLETE: {remaining_pct:.1f}% of source remains untranslated.",
metrics
)
if remaining > 100:
return (
False,
f"WARNING: {remaining} characters remain. Verify this is formatting only.",
metrics
)
return (
True,
f"PASSED: Translation covers source ({remaining} chars remaining).",
metrics
)
def check_natural_ending(last_chunk: str) -> Tuple[bool, str]:
"""Check if translation ends naturally. Domain-specific: Latin text patterns."""
# Checks for incomplete sentence markers, proper closing punctuation, etc.
text = last_chunk.strip()
if text.endswith(('.', '?', '!')) or text.lower().endswith('amen'):
return True, "Ending appears natural"
return False, "WARNING: Translation may end mid-sentence"
def main():
import argparse
parser = argparse.ArgumentParser(description="Validate translation completeness")
parser.add_argument("source_file", help="Path to source text")
parser.add_argument("translation_file", help="Path to translation JSON")
parser.add_argument("--quiet", "-q", action="store_true")
args = parser.parse_args()
source_path = Path(args.source_file)
translation_path = Path(args.translation_file)
if not source_path.exists():
print(f"ERROR: Source file not found: {source_path}")
sys.exit(1)
if not translation_path.exists():
print(f"ERROR: Translation file not found: {translation_path}")
sys.exit(1)
# Load files
source_text = source_path.read_text(encoding='utf-8')
translation = json.loads(translation_path.read_text(encoding='utf-8'))
chunks = translation.get('chunks', [])
if not chunks:
print("ERROR: Translation has no chunks")
sys.exit(1)
last_latin = chunks[-1].get('latin', chunks[-1].get('original_latin', ''))
# Run validation
passed, message, metrics = check_translation_coverage(source_text, last_latin)
if not args.quiet:
print(message)
# ==========================================================================
# EXIT CODE IS THE GATE
# The agent reads this result; it cannot override it.
# ==========================================================================
sys.exit(0 if passed else 1)
if __name__ == "__main__":
main()