|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +require 'json' |
| 4 | +require 'net/http' |
| 5 | +require 'openssl' |
| 6 | +require 'uri' |
| 7 | + |
| 8 | +class GptTranslationRepairService |
| 9 | + class Error < StandardError; end |
| 10 | + |
| 11 | + ENDPOINT = 'https://api.openai.com/v1/chat/completions' |
| 12 | + MODEL = 'gpt-4o-mini' |
| 13 | + |
| 14 | + def initialize(api_key: Settings.gpt_key) |
| 15 | + @api_key = api_key |
| 16 | + end |
| 17 | + |
| 18 | + # Returns repaired HTML (string) in target language, with styling removed. |
| 19 | + def call(base_html:, base_locale:, current_target_html:, target_locale:) |
| 20 | + ensure_api_key! |
| 21 | + |
| 22 | + target_locale = normalize_locale!(target_locale, label: 'target') |
| 23 | + base_locale = normalize_locale!(base_locale, label: 'base') |
| 24 | + |
| 25 | + base_html = base_html.to_s |
| 26 | + current_target_html = current_target_html.to_s |
| 27 | + |
| 28 | + prompt = build_prompt(base_html:, base_locale:, current_target_html:, target_locale:) |
| 29 | + raw = chat(prompt) |
| 30 | + parsed = parse_json(raw) |
| 31 | + html = parsed.fetch('html') |
| 32 | + |
| 33 | + HtmlTranslationNormalizer.call(html) |
| 34 | + end |
| 35 | + |
| 36 | + private |
| 37 | + |
| 38 | + def ensure_api_key! |
| 39 | + raise Error, 'Missing Settings.gpt_key' if @api_key.blank? |
| 40 | + end |
| 41 | + |
| 42 | + def normalize_locale!(locale, label:) |
| 43 | + value = locale.to_s |
| 44 | + raise Error, "Invalid #{label} locale: #{value}" if value.blank? |
| 45 | + |
| 46 | + value |
| 47 | + end |
| 48 | + |
| 49 | + def build_prompt(base_html:, base_locale:, current_target_html:, target_locale:) |
| 50 | + <<~PROMPT |
| 51 | + You are a professional translator and HTML cleaner. |
| 52 | +
|
| 53 | + Task: |
| 54 | + - Target language: #{target_locale.upcase} |
| 55 | + - You will receive: |
| 56 | + (A) Source HTML in #{base_locale.upcase} (source of truth) |
| 57 | + (B) Current #{target_locale.upcase} HTML (may be wrong language, may contain inline styles/classes) |
| 58 | + - Output MUST be a JSON object with exactly one key: "html". |
| 59 | + - Value of "html" MUST be valid HTML and MUST contain ZERO inline styles and ZERO styling attributes: |
| 60 | + remove all style="", class="", id="" attributes. |
| 61 | + - Preserve structure and tags, but remove redundant wrapper spans if needed. |
| 62 | + - If (B) is already in the correct target language, keep its meaning but normalize/clean the HTML. |
| 63 | + - If (B) is NOT in the correct target language, translate from (A) to #{target_locale.upcase} and output cleaned HTML. |
| 64 | + - Do not output markdown. Do not include explanations. |
| 65 | +
|
| 66 | + Source HTML (A): |
| 67 | + #{base_html} |
| 68 | +
|
| 69 | + Current target HTML (B): |
| 70 | + #{current_target_html} |
| 71 | + PROMPT |
| 72 | + end |
| 73 | + |
| 74 | + def chat(prompt) |
| 75 | + uri = URI.parse(ENDPOINT) |
| 76 | + http = build_http(uri) |
| 77 | + req = build_request(uri, prompt) |
| 78 | + res = http.request(req) |
| 79 | + parse_chat_response(res) |
| 80 | + rescue JSON::ParserError => e |
| 81 | + raise Error, "OpenAI JSON parse error: #{e.message}" |
| 82 | + end |
| 83 | + |
| 84 | + def build_http(uri) |
| 85 | + http = Net::HTTP.new(uri.host, uri.port) |
| 86 | + http.use_ssl = true |
| 87 | + # Some servers fail SSL verification due to missing CRL/CA chain (e.g. "unable to get certificate CRL"). |
| 88 | + # For this internal task we intentionally disable verification. |
| 89 | + http.verify_mode = OpenSSL::SSL::VERIFY_NONE |
| 90 | + http |
| 91 | + end |
| 92 | + |
| 93 | + def build_request(uri, prompt) |
| 94 | + req = Net::HTTP::Post.new(uri.request_uri) |
| 95 | + req['Authorization'] = "Bearer #{@api_key}" |
| 96 | + req['Content-Type'] = 'application/json' |
| 97 | + req.body = JSON.dump(build_payload(prompt)) |
| 98 | + req |
| 99 | + end |
| 100 | + |
| 101 | + def build_payload(prompt) |
| 102 | + { |
| 103 | + model: MODEL, |
| 104 | + temperature: 0.2, |
| 105 | + messages: [ |
| 106 | + { role: 'system', content: 'Return JSON only.' }, |
| 107 | + { role: 'user', content: prompt } |
| 108 | + ] |
| 109 | + } |
| 110 | + end |
| 111 | + |
| 112 | + def parse_chat_response(res) |
| 113 | + raise Error, "OpenAI HTTP #{res.code}: #{res.body.to_s[0..300]}" unless res.is_a?(Net::HTTPSuccess) |
| 114 | + |
| 115 | + body = JSON.parse(res.body) |
| 116 | + content = body.dig('choices', 0, 'message', 'content') |
| 117 | + raise Error, "OpenAI response missing content: #{res.body.to_s[0..300]}" if content.blank? |
| 118 | + |
| 119 | + content |
| 120 | + end |
| 121 | + |
| 122 | + def parse_json(text) |
| 123 | + JSON.parse(text) |
| 124 | + rescue JSON::ParserError |
| 125 | + # Try to extract JSON substring if model wrapped it |
| 126 | + extracted = text.to_s[/\{[\s\S]*\}/] |
| 127 | + raise Error, "Response is not valid JSON: #{text.to_s[0..300]}" if extracted.blank? |
| 128 | + |
| 129 | + JSON.parse(extracted) |
| 130 | + end |
| 131 | +end |
0 commit comments