Skip to content

Commit 3f530ad

Browse files
intel352claude
andcommitted
feat: interactive PTY uses vt10x virtual terminal for screen reading
Replace raw PTY byte reading with vt10x virtual terminal emulator. This properly handles cursor positioning and escape sequences that rich TUIs (Claude Code, Copilot, etc.) use for rendering. Key changes: - startSession creates vt10x.Terminal + background reader goroutine - waitForPrompt polls virtual screen (not raw bytes) - readResponse diffs screen snapshots to extract response text - extractResponse filters UI chrome (box-drawing, status bar, etc.) - Auto-handles trust prompts (presses Enter) - Message sent char-by-char + CR (some TUIs need this) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cacd532 commit 3f530ad

1 file changed

Lines changed: 153 additions & 58 deletions

File tree

genkit/pty_provider.go

Lines changed: 153 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"bytes"
66
"context"
77
"fmt"
8-
"io"
98
"os"
109
"os/exec"
1110
"strings"
@@ -14,6 +13,7 @@ import (
1413

1514
"github.com/GoCodeAlone/workflow-plugin-agent/provider"
1615
"github.com/creack/pty"
16+
"github.com/hinshun/vt10x"
1717
)
1818

1919
// CLIAdapter defines per-tool behavior for driving a CLI via PTY.
@@ -51,11 +51,12 @@ type ptyProvider struct {
5151
timeout time.Duration
5252

5353
// PTY session state (kept alive for multi-turn streaming)
54-
mu sync.Mutex // guards ptmx, cmd, output field pointers
55-
sessionMu sync.Mutex // serializes full turn lifecycle (prompt→send→read)
56-
ptmx *os.File // PTY master — nil when no active session
57-
cmd *exec.Cmd // running CLI process
58-
output bytes.Buffer
54+
mu sync.Mutex // guards ptmx, cmd, vt field pointers
55+
sessionMu sync.Mutex // serializes full turn lifecycle (prompt→send→read)
56+
ptmx *os.File // PTY master — nil when no active session
57+
cmd *exec.Cmd // running CLI process
58+
vt vt10x.Terminal // virtual terminal screen buffer
59+
output bytes.Buffer // raw output accumulator (for fallback parsing)
5960
}
6061

6162
// Name implements provider.Provider.
@@ -153,43 +154,66 @@ func (p *ptyProvider) streamInteractive(ctx context.Context, msg string, ch chan
153154
return fmt.Errorf("waiting for prompt: %w", err)
154155
}
155156

156-
// Send the message.
157-
if _, err := fmt.Fprintf(ptmx, "%s\n", msg); err != nil {
158-
return fmt.Errorf("writing to PTY: %w", err)
157+
// Send the message character by character (some TUIs need this).
158+
for _, ch := range msg {
159+
if _, err := ptmx.Write([]byte(string(ch))); err != nil {
160+
return fmt.Errorf("writing to PTY: %w", err)
161+
}
162+
time.Sleep(10 * time.Millisecond)
163+
}
164+
// Submit with carriage return (Enter key in terminal).
165+
if _, err := ptmx.Write([]byte{'\r'}); err != nil {
166+
return fmt.Errorf("sending enter: %w", err)
159167
}
160-
161-
// Reset output accumulator for this turn.
162-
p.mu.Lock()
163-
p.output.Reset()
164-
p.mu.Unlock()
165168

166169
// Read output and emit stream events until response ends.
167170
return p.readResponse(ctx, ptmx, deadline, ch)
168171
}
169172

170-
// startSession forks the CLI process under a PTY. Caller must hold p.mu.
173+
// startSession forks the CLI process under a PTY with a virtual terminal.
174+
// Caller must hold p.mu.
171175
func (p *ptyProvider) startSession() error {
172176
cmd := exec.Command(p.binPath)
177+
cmd.Env = append(os.Environ(), "TERM=xterm-256color")
173178
if p.workDir != "" {
174179
cmd.Dir = p.workDir
175180
}
176181

177-
ptmx, err := pty.StartWithSize(cmd, &pty.Winsize{Rows: 40, Cols: 120})
182+
ptmx, err := pty.StartWithSize(cmd, &pty.Winsize{Rows: 30, Cols: 100})
178183
if err != nil {
179184
return fmt.Errorf("pty.StartWithSize: %w", err)
180185
}
181186

187+
// Virtual terminal processes escape sequences and maintains screen buffer.
188+
vt := vt10x.New(vt10x.WithSize(100, 30))
189+
190+
// Background goroutine feeds PTY output to the virtual terminal.
191+
go func() {
192+
buf := make([]byte, 4096)
193+
for {
194+
n, readErr := ptmx.Read(buf)
195+
if n > 0 {
196+
vt.Write(buf[:n])
197+
p.mu.Lock()
198+
p.output.Write(buf[:n])
199+
p.mu.Unlock()
200+
}
201+
if readErr != nil {
202+
return
203+
}
204+
}
205+
}()
206+
182207
p.cmd = cmd
183208
p.ptmx = ptmx
209+
p.vt = vt
184210
p.output.Reset()
185211
return nil
186212
}
187213

188-
// waitForPrompt reads PTY output until the adapter's DetectPrompt returns true.
214+
// waitForPrompt polls the virtual terminal screen until the adapter's DetectPrompt returns true.
215+
// Also auto-handles trust prompts by pressing Enter.
189216
func (p *ptyProvider) waitForPrompt(ctx context.Context, ptmx *os.File, deadline time.Time) error {
190-
buf := make([]byte, 4096)
191-
var accumulated strings.Builder
192-
193217
for {
194218
if ctx.Err() != nil {
195219
return ctx.Err()
@@ -198,67 +222,138 @@ func (p *ptyProvider) waitForPrompt(ctx context.Context, ptmx *os.File, deadline
198222
return fmt.Errorf("timeout waiting for CLI prompt")
199223
}
200224

201-
_ = ptmx.SetReadDeadline(time.Now().Add(100 * time.Millisecond))
202-
n, err := ptmx.Read(buf)
203-
if n > 0 {
204-
chunk := string(buf[:n])
205-
accumulated.WriteString(chunk)
206-
if p.adapter.DetectPrompt(accumulated.String()) {
207-
return nil
208-
}
225+
screen := p.vt.String()
226+
227+
// Auto-handle trust prompts (e.g., "trust this folder" in Claude Code)
228+
if strings.Contains(screen, "trust") && strings.Contains(screen, "Yes") {
229+
ptmx.Write([]byte{'\r'})
230+
time.Sleep(1 * time.Second)
231+
continue
209232
}
210-
if err != nil && !isTimeout(err) {
211-
return fmt.Errorf("reading PTY: %w", err)
233+
234+
if p.adapter.DetectPrompt(screen) {
235+
return nil
212236
}
237+
238+
time.Sleep(300 * time.Millisecond)
213239
}
214240
}
215241

216-
// readResponse reads PTY output after sending a message, emitting stream events.
242+
// readResponse polls the virtual terminal screen after sending a message.
243+
// Emits text diffs as stream events until the adapter detects the response is done
244+
// (typically when a new prompt appears after the response text).
217245
func (p *ptyProvider) readResponse(ctx context.Context, ptmx *os.File, deadline time.Time, ch chan<- provider.StreamEvent) error {
218-
buf := make([]byte, 4096)
246+
// Snapshot the screen before the response to diff against.
247+
lastScreen := p.vt.String()
248+
var lastEmitted string
219249

220250
for {
221251
if ctx.Err() != nil {
222252
return ctx.Err()
223253
}
224254
if time.Now().After(deadline) {
225-
return fmt.Errorf("timeout waiting for response")
255+
// On timeout, emit whatever we have and return done.
256+
ch <- provider.StreamEvent{Type: "done"}
257+
return nil
226258
}
227259

228-
_ = ptmx.SetReadDeadline(time.Now().Add(100 * time.Millisecond))
229-
n, err := ptmx.Read(buf)
230-
if n > 0 {
231-
chunk := string(buf[:n])
260+
screen := p.vt.String()
261+
if screen != lastScreen {
262+
lastScreen = screen
232263

233-
p.mu.Lock()
234-
p.output.WriteString(chunk)
235-
accumulated := p.output.String()
236-
p.mu.Unlock()
264+
// Extract response text from screen (content between user message and next prompt).
265+
responseText := p.extractResponse(screen)
237266

238-
// Emit text chunk (tool approval prompts pass through as text).
239-
ch <- provider.StreamEvent{Type: "text", Text: chunk}
267+
// Only emit new text that hasn't been emitted yet.
268+
if responseText != lastEmitted && len(responseText) > len(lastEmitted) {
269+
newText := responseText[len(lastEmitted):]
270+
if newText != "" {
271+
ch <- provider.StreamEvent{Type: "text", Text: newText}
272+
lastEmitted = responseText
273+
}
274+
}
240275

241-
if p.adapter.DetectResponseEnd(accumulated) {
276+
// Check if the response is complete (new prompt appeared).
277+
if p.adapter.DetectResponseEnd(screen) {
242278
ch <- provider.StreamEvent{Type: "done"}
243279
return nil
244280
}
245281
}
246-
if err != nil && !isTimeout(err) {
247-
if err == io.EOF {
248-
// Process exited — reap it to avoid zombie, then clean up session.
249-
p.mu.Lock()
250-
cmd := p.cmd
251-
p.ptmx = nil
252-
p.cmd = nil
253-
p.mu.Unlock()
254-
if cmd != nil {
255-
_ = cmd.Wait()
256-
}
257-
ch <- provider.StreamEvent{Type: "done"}
258-
return nil
282+
time.Sleep(200 * time.Millisecond)
283+
}
284+
}
285+
286+
// extractResponse extracts the assistant's response text from the virtual terminal screen.
287+
// It looks for text between the user's message and the next prompt indicator.
288+
func (p *ptyProvider) extractResponse(screen string) string {
289+
lines := strings.Split(screen, "\n")
290+
var response []string
291+
inResponse := false
292+
293+
for _, line := range lines {
294+
trimmed := strings.TrimSpace(line)
295+
296+
// Skip empty lines and UI chrome
297+
if trimmed == "" {
298+
if inResponse {
299+
response = append(response, "")
259300
}
260-
return fmt.Errorf("reading PTY response: %w", err)
301+
continue
302+
}
303+
304+
// Skip horizontal rules (box-drawing chars)
305+
if len(trimmed) > 5 && strings.Count(trimmed, "─") > len(trimmed)/2 {
306+
if inResponse {
307+
// A horizontal rule after response content likely means end of response area
308+
continue
309+
}
310+
continue
311+
}
312+
313+
// Skip box-drawing and UI elements
314+
if strings.HasPrefix(trimmed, "╭") || strings.HasPrefix(trimmed, "│") ||
315+
strings.HasPrefix(trimmed, "╰") || strings.HasPrefix(trimmed, "?") ||
316+
strings.Contains(trimmed, "Update available") ||
317+
strings.Contains(trimmed, "shortcuts") ||
318+
strings.Contains(trimmed, "/effort") ||
319+
strings.Contains(trimmed, "MCP server") {
320+
continue
321+
}
322+
323+
// The greyed ❯ marks a prior user input — response starts after this line
324+
if strings.Contains(line, "❯") && !inResponse {
325+
inResponse = true
326+
continue
327+
}
328+
329+
// A new bright ❯ with empty or different content = new prompt = end
330+
if inResponse && strings.Contains(line, "❯") {
331+
break
261332
}
333+
334+
if inResponse {
335+
response = append(response, trimmed)
336+
}
337+
}
338+
339+
// Trim trailing empty lines
340+
for len(response) > 0 && response[len(response)-1] == "" {
341+
response = response[:len(response)-1]
342+
}
343+
344+
return strings.Join(response, "\n")
345+
}
346+
347+
// handleSessionEnd cleans up when the CLI process exits.
348+
func (p *ptyProvider) handleSessionEnd() {
349+
p.mu.Lock()
350+
defer p.mu.Unlock()
351+
cmd := p.cmd
352+
p.ptmx = nil
353+
p.cmd = nil
354+
p.vt = nil
355+
if cmd != nil {
356+
_ = cmd.Wait()
262357
}
263358
}
264359

0 commit comments

Comments
 (0)