-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproxy_server.py
More file actions
2117 lines (1856 loc) · 85.8 KB
/
proxy_server.py
File metadata and controls
2117 lines (1856 loc) · 85.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import annotations
import argparse
import copy
import gzip
import hashlib
import http.client
import io
import json
import os
import threading
import time
import urllib.parse
import uuid
from dataclasses import dataclass, field
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from typing import Any
HOST = os.environ.get("HASH_CONTEXT_PROXY_HOST", "127.0.0.1")
PORT = int(os.environ.get("HASH_CONTEXT_PROXY_PORT", "8787"))
OPENAI_UPSTREAM_BASE_URL = os.environ.get(
"HASH_CONTEXT_OPENAI_UPSTREAM_BASE_URL",
os.environ.get("HASH_CONTEXT_UPSTREAM_BASE_URL", "https://api.openai.com/v1"),
)
CHATGPT_UPSTREAM_BASE_URL = os.environ.get(
"HASH_CONTEXT_CHATGPT_UPSTREAM_BASE_URL",
"https://chatgpt.com/backend-api/codex",
)
DATA_DIR = Path(os.environ.get("HASH_CONTEXT_PROXY_DATA_DIR", Path(__file__).parent / "data"))
STATE_PATH = DATA_DIR / "proxy_state.json"
LOG_PATH = DATA_DIR / "proxy.log"
CODEX_PROXY_PROVIDER_ID = "codex-proxy"
CODEX_PROXY_BASE_URL = f"http://{HOST}:{PORT}/v1"
INTERNAL_CONTEXT_HEADER = "x-hash-context-internal"
INTERNAL_CONTEXT_VALUE = "context-workbench"
LOCAL_COMPACT_PROMPT_PREFIX = "You are performing a CONTEXT CHECKPOINT COMPACTION."
LOCAL_COMPACT_SUMMARY_PREFIX = (
"Another language model started to solve this problem and produced a summary of its thinking process. "
"You also have access to the state of the tools that were used by that language model. "
"Use this to build on the work that has already been done and avoid duplicating work. "
"Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:"
)
_UPSTREAM_AUTH_LOCK = threading.Lock()
_UPSTREAM_AUTH_HEADERS: dict[str, str] = {}
CODEX_AUTH_PATH = Path(os.environ.get("CODEX_HOME", Path.home() / ".codex")) / "auth.json"
def utc_timestamp() -> str:
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
def proxy_log(message: str) -> None:
try:
DATA_DIR.mkdir(parents=True, exist_ok=True)
LOG_PATH.write_text("", encoding="utf-8") if not LOG_PATH.exists() else None
with LOG_PATH.open("a", encoding="utf-8") as handle:
handle.write(f"{utc_timestamp()} {message}\n")
except Exception:
pass
def compact_text(value: Any) -> str:
if value is None:
return ""
if isinstance(value, str):
return value
if isinstance(value, list):
parts: list[str] = []
for item in value:
if isinstance(item, dict):
text = (
item.get("text")
or item.get("input_text")
or item.get("output_text")
or item.get("summary_text")
or item.get("reasoning_text")
)
if text:
parts.append(str(text))
elif item is not None:
parts.append(str(item))
return "\n".join(parts)
if isinstance(value, dict):
text = (
value.get("text")
or value.get("input_text")
or value.get("output_text")
or value.get("summary_text")
or value.get("reasoning_text")
)
if text:
return str(text)
try:
return json.dumps(value, ensure_ascii=False)
except TypeError:
return str(value)
return str(value)
def read_message_text(item: dict[str, Any]) -> str:
if "content" in item:
return compact_text(item.get("content"))
return compact_text(item.get("text"))
def role_message_text_from_items(items: list[dict[str, Any]], role: str) -> str:
parts = [
read_message_text(item)
for item in items
if item.get("type") == "message" and item.get("role") == role
]
return "\n\n".join(part for part in parts if part)
def provider_message(role: str, text: str) -> dict[str, Any]:
return {
"type": "message",
"role": role,
"content": text,
}
ASSISTANT_ITEM_TYPES = {
"reasoning",
"local_shell_call",
"function_call",
"tool_search_call",
"function_call_output",
"mcp_tool_call_output",
"local_shell_call_output",
"custom_tool_call",
"custom_tool_call_output",
"tool_search_output",
"web_search_call",
"image_generation_call",
"compaction",
"compaction_summary",
}
TOOL_CALL_ITEM_TYPES = {
"function_call",
"custom_tool_call",
"local_shell_call",
"tool_search_call",
"web_search_call",
"image_generation_call",
}
TOOL_OUTPUT_ITEM_TYPES = {
"function_call_output",
"custom_tool_call_output",
"mcp_tool_call_output",
"tool_search_output",
"local_shell_call_output",
}
REQUIRED_TOOL_OUTPUT_TYPES_BY_CALL_TYPE = {
"function_call": {"function_call_output", "mcp_tool_call_output"},
"local_shell_call": {"function_call_output", "local_shell_call_output"},
"custom_tool_call": {"custom_tool_call_output"},
"tool_search_call": {"tool_search_output"},
}
REQUIRED_TOOL_CALL_TYPES_BY_OUTPUT_TYPE: dict[str, set[str]] = {}
for _call_type, _output_types in REQUIRED_TOOL_OUTPUT_TYPES_BY_CALL_TYPE.items():
for _output_type in _output_types:
REQUIRED_TOOL_CALL_TYPES_BY_OUTPUT_TYPE.setdefault(_output_type, set()).add(_call_type)
def transcript_record(role: str, text: str, provider_items: list[dict[str, Any]]) -> dict[str, Any]:
safe_provider_items = provider_items_with_record_text(role, text, provider_items)
tool_events = tool_events_from_provider_items(safe_provider_items)
blocks = blocks_from_provider_items(role, text, safe_provider_items, tool_events)
return {
"role": role,
"text": text,
"attachments": [],
"toolEvents": tool_events,
"blocks": blocks,
"providerItems": safe_provider_items,
"pending": role == "assistant" and not text,
}
def provider_items_with_record_text(role: str, text: str, provider_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
items = [copy.deepcopy(item) for item in provider_items if isinstance(item, dict)]
if not text or role != "assistant":
return items
if assistant_text_from_items(items):
return items
for item in items:
if item.get("type") == "message" and item.get("role") == "assistant":
item["content"] = [{"type": "output_text", "text": text}]
return items
return [{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": text}]}, *items]
def blocks_from_provider_items(
role: str,
text: str,
provider_items: list[dict[str, Any]],
tool_events: list[dict[str, Any]],
) -> list[dict[str, Any]]:
if role != "assistant":
return [{"kind": "text", "text": text}] if text else []
if not text and not tool_events:
return [{"kind": "thinking"}]
events_by_call_id = {
str(event.get("call_id") or ""): event
for event in tool_events
if isinstance(event, dict) and event.get("call_id")
}
blocks: list[dict[str, Any]] = []
used_call_ids: set[str] = set()
for item in provider_items:
item_type = item.get("type")
if item_type == "message" and item.get("role") == "assistant":
item_text = read_message_text(item)
if item_text:
blocks.append({"kind": "text", "text": item_text})
elif item_type in TOOL_CALL_ITEM_TYPES:
call_id = str(item.get("call_id") or item.get("id") or "")
event = events_by_call_id.get(call_id)
if event is not None:
blocks.append({"kind": "tool", "tool_event": event})
used_call_ids.add(call_id)
elif item_type == "reasoning":
reasoning_text = reasoning_text_from_item(item)
if reasoning_text:
blocks.append({"kind": "reasoning", "text": reasoning_text, "status": "completed"})
for event in tool_events:
call_id = str(event.get("call_id") or "")
if call_id and call_id in used_call_ids:
continue
blocks.append({"kind": "tool", "tool_event": event})
if text and not any(block.get("kind") == "text" for block in blocks):
blocks.insert(0, {"kind": "text", "text": text})
return blocks
def tool_events_from_provider_items(provider_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
consumed_output_indexes: set[int] = set()
output_indexes_by_call_id: dict[str, list[int]] = {}
for index, item in enumerate(provider_items):
item_type = str(item.get("type") or "")
if item_type not in TOOL_OUTPUT_ITEM_TYPES:
continue
call_id = str(item.get("call_id") or "").strip()
if call_id:
output_indexes_by_call_id.setdefault(call_id, []).append(index)
for index, item in enumerate(provider_items):
item_type = str(item.get("type") or "")
if item_type in REQUIRED_TOOL_OUTPUT_TYPES_BY_CALL_TYPE:
call_id = str(item.get("call_id") or item.get("id") or "").strip()
event = tool_call_event_from_item(item)
allowed_output_types = REQUIRED_TOOL_OUTPUT_TYPES_BY_CALL_TYPE.get(item_type, set())
for output_index in output_indexes_by_call_id.get(call_id, []):
if output_index in consumed_output_indexes:
continue
output_item = provider_items[output_index]
if str(output_item.get("type") or "") not in allowed_output_types:
continue
output = output_text_from_item(output_item)
event["raw_output"] = output
event["output_preview"] = output[:500]
event["display_result"] = output[:500]
event["status"] = status_from_tool_output(output, str(event.get("status") or "completed"))
consumed_output_indexes.add(output_index)
break
events.append(event)
elif item_type in {"web_search_call", "image_generation_call"}:
events.append(tool_call_event_from_item(item))
elif item_type in TOOL_OUTPUT_ITEM_TYPES and index not in consumed_output_indexes:
call_id = str(item.get("call_id") or "").strip()
output = output_text_from_item(item)
events.append(
{
"name": str(item.get("name") or item_type or "tool_output"),
"arguments": "",
"call_id": call_id,
"output_preview": output[:500],
"raw_output": output,
"display_title": str(item.get("name") or item_type or "tool_output"),
"display_detail": "",
"display_result": output[:500],
"status": status_from_tool_output(output),
}
)
return events
def tool_call_event_from_item(item: dict[str, Any]) -> dict[str, Any]:
item_type = str(item.get("type") or "")
call_id = str(item.get("call_id") or item.get("id") or "")
name = str(item.get("name") or item_type or "tool_call")
arguments = item.get("arguments")
if arguments is None:
arguments = item.get("input")
if arguments is None:
arguments = item.get("action")
if arguments is None and item_type == "tool_search_call":
arguments = item.get("arguments")
if arguments is None:
arguments = ""
result = ""
if item_type == "image_generation_call":
result = compact_text(item.get("result"))
return {
"name": name,
"arguments": compact_text(arguments),
"output_preview": result[:500],
"raw_output": result,
"display_title": display_title_for_tool_item(item),
"display_detail": display_detail_for_tool_item(item),
"display_result": result[:500],
"status": str(item.get("status") or "completed"),
"call_id": call_id,
}
def display_title_for_tool_item(item: dict[str, Any]) -> str:
item_type = str(item.get("type") or "")
if item_type == "web_search_call":
return "web_search"
if item_type == "image_generation_call":
return "image_generation"
if item_type == "tool_search_call":
return "tool_search"
if item_type == "local_shell_call":
return "local_shell"
return str(item.get("name") or item_type or "tool_call")
def output_text_from_item(item: dict[str, Any]) -> str:
item_type = str(item.get("type") or "")
if item_type == "tool_search_output":
return compact_text(item.get("tools"))
if item_type == "mcp_tool_call_output":
return compact_text(item.get("output"))
return compact_text(item.get("output"))
def display_detail_for_tool_item(item: dict[str, Any]) -> str:
item_type = str(item.get("type") or "")
name = str(item.get("name") or "")
arguments = item.get("arguments")
if isinstance(arguments, str):
try:
parsed_arguments = json.loads(arguments)
except json.JSONDecodeError:
parsed_arguments = None
else:
parsed_arguments = arguments
if name in {"shell_command", "exec_command"} and isinstance(parsed_arguments, dict):
command = parsed_arguments.get("command")
if isinstance(command, list):
return " ".join(str(part) for part in command)
if command is not None:
return compact_text(command)
if name == "write_stdin" and isinstance(parsed_arguments, dict):
return compact_text(parsed_arguments.get("stdin") or parsed_arguments.get("input") or arguments)
if item_type == "local_shell_call":
action = item.get("action")
if isinstance(action, dict):
command = action.get("command")
if isinstance(command, list):
return " ".join(str(part) for part in command)
if command is not None:
return compact_text(command)
return compact_text(action)
return item_type or name or "tool call"
def status_from_tool_output(output: str, fallback: str = "completed") -> str:
lines = compact_text(output).splitlines()
first_line = lines[0] if lines else ""
if first_line.lower().startswith("exit code:"):
raw_code = first_line.split(":", 1)[1].strip().split(maxsplit=1)[0]
try:
return "completed" if int(raw_code) == 0 else "error"
except ValueError:
return fallback
return fallback
def reasoning_text_from_item(item: dict[str, Any]) -> str:
parts: list[str] = []
summary = item.get("summary")
if isinstance(summary, list):
parts.extend(compact_text(entry) for entry in summary if entry is not None)
elif summary:
parts.append(compact_text(summary))
content = item.get("content")
if isinstance(content, list):
parts.extend(compact_text(entry) for entry in content if entry is not None)
elif content:
parts.append(compact_text(content))
return "\n".join(part for part in parts if part)
def visible_text_from_compaction_item(item: dict[str, Any]) -> str:
parts: list[str] = []
def append_visible(value: Any) -> None:
if isinstance(value, str):
text = compact_text(value)
if text:
parts.append(text)
return
if isinstance(value, list):
for entry in value:
append_visible(entry)
return
if isinstance(value, dict):
for key in ("text", "summary", "content"):
if key in value:
append_visible(value.get(key))
for key in ("summary", "content", "text"):
append_visible(item.get(key))
if not parts:
append_visible(item.get("encrypted_content"))
return "\n".join(part for part in parts if part)
def visible_text_from_context_item(item: dict[str, Any]) -> str:
item_type = str(item.get("type") or "")
if item_type in {"compaction", "compaction_summary"}:
return visible_text_from_compaction_item(item)
if item_type == "message":
return read_message_text(item)
if item_type == "reasoning":
return reasoning_text_from_item(item)
return compact_text(item)
def input_items_to_transcript(input_items: Any) -> list[dict[str, Any]]:
if isinstance(input_items, str):
return [transcript_record("user", input_items, [provider_message("user", input_items)])]
if not isinstance(input_items, list):
return []
records: list[dict[str, Any]] = []
assistant_items: list[dict[str, Any]] = []
assistant_text_parts: list[str] = []
def flush_assistant() -> None:
nonlocal assistant_items, assistant_text_parts
if not assistant_items:
return
text = "\n".join(part for part in assistant_text_parts if part)
records.append(transcript_record("assistant", text, assistant_items))
assistant_items = []
assistant_text_parts = []
for raw_item in input_items:
if not isinstance(raw_item, dict):
continue
item = copy.deepcopy(raw_item)
item_type = str(item.get("type") or "")
role = str(item.get("role") or "")
if item_type == "message" and role in {"system", "developer", "user"}:
flush_assistant()
text = read_message_text(item)
records.append(transcript_record(role, text, [item]))
continue
if item_type == "message" and role == "assistant":
text = read_message_text(item)
assistant_items.append(item)
if text:
assistant_text_parts.append(text)
continue
if item_type in {"compaction", "compaction_summary"} and assistant_items:
assistant_items.append(item)
continue
if item_type in {"compaction", "compaction_summary"}:
flush_assistant()
records.append(transcript_record("compaction", visible_text_from_compaction_item(item), [item]))
continue
if item_type in ASSISTANT_ITEM_TYPES:
assistant_items.append(item)
continue
flush_assistant()
records.append(transcript_record("context", visible_text_from_context_item(item), [item]))
flush_assistant()
return records
def transcript_to_input_items(transcript: list[dict[str, Any]]) -> list[dict[str, Any]]:
input_items: list[dict[str, Any]] = []
for record in transcript:
if not isinstance(record, dict):
continue
record_role = str(record.get("role") or "").strip()
role = record_role if record_role in {"system", "developer", "user", "assistant"} else ""
text = compact_text(record.get("text"))
provider_items = record.get("providerItems")
if isinstance(provider_items, list) and provider_items and not role:
input_items.extend(copy.deepcopy(item) for item in provider_items if isinstance(item, dict))
elif isinstance(provider_items, list) and provider_items:
input_items.extend(compile_provider_items(role, text, provider_items))
elif role:
input_items.append(provider_message(role, text))
return input_items
def input_items_contain_tool_output(input_items: Any) -> bool:
if not isinstance(input_items, list):
return False
return any(
isinstance(item, dict) and str(item.get("type") or "") in TOOL_OUTPUT_ITEM_TYPES
for item in input_items
)
def input_items_end_with_tool_output(input_items: Any) -> bool:
if not isinstance(input_items, list):
return False
for item in reversed(input_items):
if not isinstance(item, dict):
continue
return str(item.get("type") or "") in TOOL_OUTPUT_ITEM_TYPES
return False
def drop_unpaired_tool_items(input_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
output_counts_by_call: dict[tuple[str, str], int] = {}
for item in input_items:
item_type = str(item.get("type") or "")
output_call_types = REQUIRED_TOOL_CALL_TYPES_BY_OUTPUT_TYPE.get(item_type)
if not output_call_types:
continue
call_id = str(item.get("call_id") or "").strip()
if not call_id:
continue
for output_call_type in output_call_types:
key = (output_call_type, call_id)
output_counts_by_call[key] = output_counts_by_call.get(key, 0) + 1
kept_call_counts: dict[tuple[str, str], int] = {}
for item in input_items:
item_type = str(item.get("type") or "")
if item_type not in REQUIRED_TOOL_OUTPUT_TYPES_BY_CALL_TYPE:
continue
call_id = str(item.get("call_id") or item.get("id") or "").strip()
if not call_id:
continue
key = (item_type, call_id)
available_outputs = output_counts_by_call.get(key, 0)
if available_outputs <= kept_call_counts.get(key, 0):
continue
kept_call_counts[key] = kept_call_counts.get(key, 0) + 1
emitted_call_counts: dict[tuple[str, str], int] = {}
used_output_counts: dict[tuple[str, str], int] = {}
sanitized_items: list[dict[str, Any]] = []
for item in input_items:
item_type = str(item.get("type") or "")
if item_type in REQUIRED_TOOL_OUTPUT_TYPES_BY_CALL_TYPE:
call_id = str(item.get("call_id") or item.get("id") or "").strip()
if not call_id:
continue
key = (item_type, call_id)
if kept_call_counts.get(key, 0) <= emitted_call_counts.get(key, 0):
continue
emitted_call_counts[key] = emitted_call_counts.get(key, 0) + 1
sanitized_items.append(item)
continue
output_call_types = REQUIRED_TOOL_CALL_TYPES_BY_OUTPUT_TYPE.get(item_type)
if output_call_types:
call_id = str(item.get("call_id") or "").strip()
if not call_id:
continue
key = next(
(
(output_call_type, call_id)
for output_call_type in output_call_types
if emitted_call_counts.get((output_call_type, call_id), 0)
> used_output_counts.get((output_call_type, call_id), 0)
),
None,
)
if key is None:
continue
used_output_counts[key] = used_output_counts.get(key, 0) + 1
sanitized_items.append(item)
continue
sanitized_items.append(item)
return sanitized_items
def compile_provider_items(role: str, text: str, provider_items: list[Any]) -> list[dict[str, Any]]:
items = [copy.deepcopy(item) for item in provider_items if isinstance(item, dict)]
if not items:
return [provider_message(role, text)]
existing_text = role_message_text_from_items(items, role)
if existing_text.strip() == compact_text(text).strip():
return items
message_indexes = [
index
for index, item in enumerate(items)
if item.get("type") == "message" and item.get("role") == role
]
structural_items = [
item
for item in items
if not (item.get("type") == "message" and item.get("role") == role)
]
if not structural_items:
return [provider_message(role, text)]
if message_indexes:
first_index = message_indexes[0]
items[first_index]["content"] = text
for duplicate_index in reversed(message_indexes[1:]):
del items[duplicate_index]
return items
if text:
return [provider_message(role, text), *items]
return items
def session_id_for_request(body: dict[str, Any], headers: dict[str, str]) -> str:
for key in ("x-hash-context-session-id", "x-codex-conversation-id", "x-codex-session-id"):
value = headers.get(key)
if value:
return sanitize_id(value)
metadata_session_id = session_id_from_codex_metadata(headers)
if metadata_session_id:
return sanitize_id(metadata_session_id)
prompt_cache_key = body.get("prompt_cache_key")
if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
return sanitize_id(prompt_cache_key)
metadata = body.get("client_metadata")
if isinstance(metadata, dict):
for value in metadata.values():
if isinstance(value, str) and value.strip():
return sanitize_id(value)
digest = hashlib.sha1(json.dumps(body.get("input", []), sort_keys=True, default=str).encode("utf-8")).hexdigest()
return f"session-{digest[:16]}"
def session_id_for_compact_request(body: dict[str, Any], headers: dict[str, str], active_session_id: str) -> str:
for key in ("x-hash-context-session-id", "x-codex-conversation-id", "x-codex-session-id"):
value = headers.get(key)
if value:
return sanitize_id(value)
metadata_session_id = session_id_from_codex_metadata(headers)
if metadata_session_id:
return sanitize_id(metadata_session_id)
if active_session_id:
return active_session_id
return session_id_for_request(body, headers)
def session_id_from_codex_metadata(headers: dict[str, str]) -> str:
for key in ("x-codex-turn-metadata", "x-codex-turn-state"):
raw_value = headers.get(key)
if not raw_value:
continue
try:
parsed = json.loads(raw_value)
except (TypeError, json.JSONDecodeError):
continue
session_id = find_session_id_in_value(parsed)
if session_id:
return session_id
return ""
def find_session_id_in_value(value: Any) -> str:
if isinstance(value, dict):
for key in ("session_id", "conversation_id", "thread_id"):
candidate = value.get(key)
if isinstance(candidate, str) and candidate.strip():
return candidate.strip()
for key, nested_value in value.items():
key_text = str(key).lower()
if any(part in key_text for part in ("session", "conversation", "thread")):
found = find_session_id_in_value(nested_value)
if found:
return found
for nested_value in value.values():
found = find_session_id_in_value(nested_value)
if found:
return found
elif isinstance(value, list):
for item in value:
found = find_session_id_in_value(item)
if found:
return found
return ""
def sanitize_id(value: str) -> str:
cleaned = "".join(ch if ch.isalnum() or ch in "-_." else "-" for ch in value.strip())
return cleaned[:120] or uuid.uuid4().hex
def response_headers_for_upstream(headers: dict[str, str]) -> dict[str, str]:
hop_by_hop = {
"host",
"content-length",
"content-encoding",
"connection",
"accept-encoding",
"transfer-encoding",
INTERNAL_CONTEXT_HEADER,
}
canonical_names = {
"accept": "Accept",
"authorization": "Authorization",
"chatgpt-account-id": "ChatGPT-Account-ID",
"content-type": "Content-Type",
"cookie": "Cookie",
"openai-beta": "OpenAI-Beta",
"originator": "originator",
"user-agent": "User-Agent",
"x-client-request-id": "x-client-request-id",
"x-codex-beta-features": "x-codex-beta-features",
"x-codex-installation-id": "x-codex-installation-id",
"x-codex-parent-thread-id": "x-codex-parent-thread-id",
"x-codex-turn-metadata": "x-codex-turn-metadata",
"x-codex-turn-state": "x-codex-turn-state",
"x-codex-window-id": "x-codex-window-id",
"x-openai-subagent": "x-openai-subagent",
"x-responsesapi-include-timing-metrics": "x-responsesapi-include-timing-metrics",
}
next_headers: dict[str, str] = {}
for key, value in headers.items():
lower_key = key.lower()
if lower_key in hop_by_hop:
continue
next_headers[canonical_names.get(lower_key, key)] = value
# Codex sends these through reqwest, but be explicit here because the
# proxy reserializes the JSON body before forwarding it.
next_headers["Content-Type"] = "application/json"
next_headers["Accept"] = "text/event-stream"
next_headers.setdefault("User-Agent", "codex_cli_rs")
return next_headers
def _has_usable_auth(headers: dict[str, str]) -> bool:
authorization = str(headers.get("Authorization") or headers.get("authorization") or "").strip()
if authorization and authorization.lower() not in {"bearer not-needed", "bearer dummy", "bearer fake"}:
return True
return bool(
str(headers.get("ChatGPT-Account-ID") or headers.get("chatgpt-account-id") or "").strip()
or str(headers.get("Cookie") or headers.get("cookie") or "").strip()
)
def remember_upstream_auth(headers: dict[str, str]) -> None:
candidate = response_headers_for_upstream(headers)
if not _has_usable_auth(candidate):
return
cached = {
key: value
for key, value in candidate.items()
if key.lower()
not in {
"accept",
"content-type",
"content-length",
"host",
"connection",
"transfer-encoding",
INTERNAL_CONTEXT_HEADER,
}
}
with _UPSTREAM_AUTH_LOCK:
_UPSTREAM_AUTH_HEADERS.clear()
_UPSTREAM_AUTH_HEADERS.update(cached)
def preload_codex_subscription_auth() -> bool:
try:
raw = json.loads(CODEX_AUTH_PATH.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return False
tokens = raw.get("tokens") if isinstance(raw, dict) else None
if not isinstance(tokens, dict):
return False
access_token = str(tokens.get("access_token") or "").strip()
account_id = str(tokens.get("account_id") or "").strip()
if not access_token or not account_id:
return False
headers = {
"Authorization": f"Bearer {access_token}",
"ChatGPT-Account-ID": account_id,
"User-Agent": "codex_cli_rs",
"originator": "codex-tui",
}
remember_upstream_auth(headers)
return bool(cached_upstream_auth_headers())
def preload_openai_api_auth() -> bool:
api_key = str(os.environ.get("OPENAI_API_KEY") or "").strip()
if not api_key:
return False
headers = {
"Authorization": f"Bearer {api_key}",
"User-Agent": "codex_cli_rs",
}
remember_upstream_auth(headers)
return bool(cached_upstream_auth_headers())
def cached_upstream_auth_headers() -> dict[str, str]:
with _UPSTREAM_AUTH_LOCK:
return dict(_UPSTREAM_AUTH_HEADERS)
def apply_cached_upstream_auth(headers: dict[str, str]) -> dict[str, str]:
next_headers = dict(headers)
cached = cached_upstream_auth_headers()
if not cached:
return next_headers
if not _has_usable_auth(next_headers):
for key, value in cached.items():
if key.lower() == "accept":
continue
next_headers[key] = value
return next_headers
return next_headers
def upstream_headers_for_request(headers: dict[str, str], *, accept: str) -> dict[str, str]:
remember_upstream_auth(headers)
next_headers = apply_cached_upstream_auth(response_headers_for_upstream(headers))
next_headers["Accept"] = accept
next_headers["Content-Type"] = "application/json"
return next_headers
def json_headers_for_upstream(headers: dict[str, str]) -> dict[str, str]:
return upstream_headers_for_request(headers, accept="application/json")
def safe_headers_for_log(headers: dict[str, str]) -> dict[str, str]:
redacted = {"authorization", "cookie", "set-cookie"}
return {
key: ("<redacted>" if key.lower() in redacted else value)
for key, value in headers.items()
if key.lower() not in {"host", "content-length"}
}
def decode_request_body(raw_body: bytes, content_encoding: str | None) -> bytes:
encoding = (content_encoding or "").strip().lower()
if not encoding or encoding == "identity":
return raw_body
if encoding in {"gzip", "x-gzip"}:
return gzip.decompress(raw_body)
if encoding == "br":
import brotli
return brotli.decompress(raw_body)
if encoding in {"zstd", "zstandard"}:
import zstandard
with zstandard.ZstdDecompressor().stream_reader(io.BytesIO(raw_body)) as reader:
return reader.read()
raise ValueError(f"unsupported request content-encoding: {content_encoding}")
def parse_json_request_body(raw_body: bytes, content_encoding: str | None) -> Any:
decoded = decode_request_body(raw_body, content_encoding)
return json.loads(decoded.decode("utf-8"))
def upstream_base_url_for_request(headers: dict[str, str]) -> str:
effective_headers = apply_cached_upstream_auth(headers)
lowered = {key.lower(): value for key, value in effective_headers.items()}
if lowered.get("chatgpt-account-id"):
return CHATGPT_UPSTREAM_BASE_URL
return OPENAI_UPSTREAM_BASE_URL
def has_effective_chatgpt_auth(headers: dict[str, str]) -> bool:
effective_headers = apply_cached_upstream_auth(headers)
lowered = {key.lower(): value for key, value in effective_headers.items()}
return bool(str(lowered.get("chatgpt-account-id") or "").strip())
def has_effective_upstream_auth(headers: dict[str, str]) -> bool:
return _has_usable_auth(apply_cached_upstream_auth(headers))
def is_internal_context_request(headers: dict[str, str], body: dict[str, Any] | None = None) -> bool:
if str(headers.get(INTERNAL_CONTEXT_HEADER) or "").strip() == INTERNAL_CONTEXT_VALUE:
return True
for metadata_key in ("metadata", "client_metadata"):
metadata = (body or {}).get(metadata_key) if isinstance(body, dict) else None
if not isinstance(metadata, dict):
continue
if str(metadata.get("hash_context_internal") or "").strip() == INTERNAL_CONTEXT_VALUE:
return True
return False
def normalize_model_record(item: Any) -> dict[str, Any] | None:
if isinstance(item, str):
model_id = item.strip()
if not model_id:
return None
return {"id": model_id, "object": "model", "owned_by": "codex"}
if not isinstance(item, dict):
return None
model_id = str(item.get("id") or item.get("name") or item.get("slug") or "").strip()
if not model_id:
return None
normalized = copy.deepcopy(item)
normalized["id"] = model_id.removeprefix("models/")
normalized.setdefault("object", "model")
normalized.setdefault("owned_by", str(item.get("owned_by") or item.get("provider") or "codex"))
return normalized
def normalize_models_payload(payload: Any) -> dict[str, Any] | None:
raw_models: Any = None
if isinstance(payload, dict):
if isinstance(payload.get("data"), list):
raw_models = payload.get("data")
elif isinstance(payload.get("models"), list):
raw_models = payload.get("models")
elif isinstance(payload.get("items"), list):
raw_models = payload.get("items")
elif isinstance(payload.get("model_slugs"), list):
raw_models = payload.get("model_slugs")
elif isinstance(payload, list):
raw_models = payload
if not isinstance(raw_models, list):
return None
normalized_models: list[dict[str, Any]] = []
seen_ids: set[str] = set()
for item in raw_models:
record = normalize_model_record(item)
if record is None:
continue
model_id = str(record.get("id") or "").strip()
if not model_id or model_id in seen_ids:
continue
seen_ids.add(model_id)
normalized_models.append(record)
return {"object": "list", "data": normalized_models}
def normalize_models_response_body(response_body: bytes) -> bytes:
try:
payload = json.loads(response_body.decode("utf-8"))
except (UnicodeDecodeError, json.JSONDecodeError):
return response_body
normalized = normalize_models_payload(payload)
if normalized is None:
return response_body
return json.dumps(normalized, ensure_ascii=False).encode("utf-8")
def fallback_models_response_body() -> bytes:
configured = os.environ.get("HASH_CONTEXT_FALLBACK_MODELS", "")
model_ids = [
item.strip()