-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpatchX.py
More file actions
109 lines (101 loc) · 3.95 KB
/
patchX.py
File metadata and controls
109 lines (101 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import json
import re
from collections import defaultdict
from difflib import SequenceMatcher
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
def parse_script_blocks(script_lines):
# 脚本以 say 为区块分割
blocks = []
current_block = []
for line in script_lines:
if line.strip().startswith('say '):
current_block.append(line)
blocks.append(current_block)
current_block = []
else:
current_block.append(line)
if current_block:
blocks.append(current_block)
return blocks
def extract_say_text(say_line):
# 正则提取所有字符串参数, 取最后一个就是原文/台词
strs = re.findall(r'"((?:[^"\\]|\\.)*?)"', say_line)
return strs[-1] if strs else ""
def preprocess_json(raw_json):
# 生成 forbidden 集合(raw有立绘关键字重复)和 allow_map
raw_map = defaultdict(list) # raw => [(fname, rep, keyid)]
for kid, entry in raw_json.items():
for k, v in entry.items():
if k == "raw":
continue
raw_map[ entry['raw'] ].append( (k, v, kid) )
forbidden = set()
allow_map = {}
for rtxt, lst in raw_map.items():
counter = defaultdict(set)
for fname, rep, _ in lst:
counter[fname].add(rep)
conflicted = [fname for fname, reps in counter.items() if len(reps) > 1]
if conflicted:
forbidden.add(rtxt)
else:
allow_map[rtxt] = [ (fname, rep) for fname, rep, _ in lst ]
return forbidden, allow_map
def apply_replacements_by_raw(block, raw_txt, allow_map, forbidden, log_list):
if raw_txt in forbidden:
log_list.append(f"【有raw和立绘文件名重复但值不同,需手工处理】: {raw_txt}")
return block
if raw_txt not in allow_map:
return block
block_str = "\n".join(block)
replaced_any = False
for fname, rep in allow_map[raw_txt]:
pattern = re.escape(fname)
new_fname = fname.replace("X", rep)
if re.search(pattern, block_str):
block_str = re.sub(pattern, new_fname, block_str)
replaced_any = True
else:
log_list.append(f"【未匹配到:{fname}】 在区块/原文: {raw_txt[:20]} ...")
return block_str.split('\n')
def process_script(script_text, json_text):
script_lines = script_text.splitlines()
repl_json = json.loads(json_text)
forbidden, allow_map = preprocess_json(repl_json)
blocks = parse_script_blocks(script_lines)
log_list = []
result_lines = []
for block in blocks:
# 找到say那行,提取原文
for line in reversed(block):
if line.strip().startswith('say '):
say_txt = extract_say_text(line)
break
else:
say_txt = ""
# 模糊匹配找到对应raw
best_id = None
best_sim = 0
matched_raw = ""
for kid, entry in repl_json.items():
sim = similar(say_txt, entry.get("raw", ""))
if sim > best_sim:
best_sim = sim
best_id = kid
matched_raw = entry.get("raw", "")
if best_sim > 0.7 and matched_raw:
block = apply_replacements_by_raw(block, matched_raw, allow_map, forbidden, log_list)
result_lines.extend(block)
return "\n".join(result_lines), log_list
if __name__ == "__main__":
with open("AC3\\data01120.arc\\ac3_08harem.txt", encoding="utf-8") as f:
script = f.read()
with open("AC3\\后宫线.json", encoding="utf-8") as f:
jsondata = f.read()
new_script, logs = process_script(script, jsondata)
with open("AC3\\data01120.arc\\ac3_08harem_patched.txt", "w", encoding="utf-8") as f:
f.write(new_script)
with open("patch_log.txt", "w", encoding="utf-8") as f:
for l in logs:
f.write(l+'\n')