Skip to content

Commit ea97a7f

Browse files
intel352claude
andcommitted
feat: Copilot interactive PTY + screen-diff extraction
- Enable SupportsInteractivePTY for Copilot CLI adapter - Implement screen-diff based response extraction (extractResponseDiff): captures pre-message screen snapshot and extracts only new content - Add thinking-guard: skip extraction while ◉/◎/Thinking indicators visible - Filter status bar, mode line, and loading indicators from extracted text - Reorder Stream() resolution: JSON streaming → interactive PTY → fallback (JSON streaming is more reliable for Claude Code; interactive PTY is for Copilot which has no JSON streaming support) - Prevent fallback to non-interactive when active PTY session exists (avoids capturing TUI garbage as response text) - Add debug logging for prompt/response timeout diagnostics Tests: 3 integration tests (Claude multi-turn, Claude tool-use, Copilot) 5 unit tests (screen extraction, diff extraction, screenLines) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d6572a0 commit ea97a7f

File tree

3 files changed

+441
-14
lines changed

3 files changed

+441
-14
lines changed

genkit/pty_adapters.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ func (CopilotCLIAdapter) DetectPrompt(output string) bool {
149149
}
150150

151151
func (CopilotCLIAdapter) DetectResponseEnd(output string) bool {
152+
// Don't fire while Copilot is still thinking/loading.
153+
if strings.Contains(output, "Thinking") || strings.Contains(output, "Queued") ||
154+
strings.Contains(output, "◉") || strings.Contains(output, "◎") {
155+
return false
156+
}
157+
152158
// Response is complete when we see a ● response line AFTER the user's ❯ input line,
153159
// followed by a new ❯ prompt with "Type @". We look for the pattern:
154160
// ❯ <user message>
@@ -306,8 +312,8 @@ func (CursorCLIAdapter) ParseResponse(raw string) string {
306312
func (CopilotCLIAdapter) StreamingArgs(_ string) []string { return nil }
307313
func (CopilotCLIAdapter) ParseStreamEvent(_ string) (string, bool) { return "", false }
308314

309-
// SupportsInteractivePTY returns false — Copilot should use non-interactive exec.
310-
func (CopilotCLIAdapter) SupportsInteractivePTY() bool { return false }
315+
// SupportsInteractivePTY returns true — Copilot uses vt10x with screen-diff extraction.
316+
func (CopilotCLIAdapter) SupportsInteractivePTY() bool { return true }
311317

312318
func (CodexCLIAdapter) SupportsInteractivePTY() bool { return false }
313319
func (CodexCLIAdapter) StreamingArgs(_ string) []string { return nil }

genkit/pty_interactive_test.go

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
package genkit
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"os/exec"
8+
"strings"
9+
"testing"
10+
"time"
11+
12+
"github.com/GoCodeAlone/workflow-plugin-agent/provider"
13+
)
14+
15+
// TestClaudeCodeInteractivePTY_MultiTurn tests Claude Code interactive PTY
16+
// with a 3-message multi-turn conversation including complex output.
17+
// Requires: claude binary in PATH, authenticated.
18+
func TestClaudeCodeInteractivePTY_MultiTurn(t *testing.T) {
19+
if _, err := exec.LookPath("claude"); err != nil {
20+
t.Skip("claude not in PATH")
21+
}
22+
if os.Getenv("PTY_INTEGRATION") == "" {
23+
t.Skip("set PTY_INTEGRATION=1 to run interactive PTY tests")
24+
}
25+
26+
p, err := NewClaudeCodeProvider("")
27+
if err != nil {
28+
t.Fatalf("NewClaudeCodeProvider: %v", err)
29+
}
30+
defer p.(interface{ Close() error }).Close()
31+
32+
ctx := context.Background()
33+
34+
// Message 1: Establish context
35+
t.Log("=== Message 1: Establish context ===")
36+
ch1, err := p.Stream(ctx, []provider.Message{
37+
{Role: provider.RoleUser, Content: "My name is PTYTestBot. Remember this. Reply with just 'Noted, PTYTestBot.' and nothing else."},
38+
}, nil)
39+
if err != nil {
40+
t.Fatalf("Stream msg1: %v", err)
41+
}
42+
text1 := collectStream(t, ch1, 2*time.Minute)
43+
t.Logf("Response 1:\n%s", text1)
44+
if text1 == "" {
45+
t.Error("Message 1: empty response")
46+
}
47+
48+
// Message 2: Complex code generation
49+
t.Log("=== Message 2: Complex code generation ===")
50+
ch2, err := p.Stream(ctx, []provider.Message{
51+
{Role: provider.RoleUser, Content: "Write a Python function called merge_sorted_lists that takes two sorted lists and returns a merged sorted list. Include type hints and a docstring. Only output the code block."},
52+
}, nil)
53+
if err != nil {
54+
t.Fatalf("Stream msg2: %v", err)
55+
}
56+
text2 := collectStream(t, ch2, 2*time.Minute)
57+
t.Logf("Response 2:\n%s", text2)
58+
if !strings.Contains(text2, "merge_sorted") && !strings.Contains(text2, "def ") {
59+
t.Error("Message 2: expected Python function, got:", text2[:min(100, len(text2))])
60+
}
61+
62+
// Message 3: Recall context (tests multi-turn)
63+
t.Log("=== Message 3: Context recall ===")
64+
ch3, err := p.Stream(ctx, []provider.Message{
65+
{Role: provider.RoleUser, Content: "What was my name? Reply with just the name."},
66+
}, nil)
67+
if err != nil {
68+
t.Fatalf("Stream msg3: %v", err)
69+
}
70+
text3 := collectStream(t, ch3, 2*time.Minute)
71+
t.Logf("Response 3:\n%s", text3)
72+
if !strings.Contains(strings.ToLower(text3), "ptytestbot") && !strings.Contains(strings.ToLower(text3), "pty") {
73+
t.Error("Message 3: expected name recall, got:", text3[:min(100, len(text3))])
74+
}
75+
}
76+
77+
// TestClaudeCodeInteractivePTY_MultiAgent tests that PTY output is readable
78+
// when Claude Code uses tools (file_read, bash, etc.) which create tool call
79+
// UI elements in the terminal.
80+
func TestClaudeCodeInteractivePTY_MultiAgent(t *testing.T) {
81+
if _, err := exec.LookPath("claude"); err != nil {
82+
t.Skip("claude not in PATH")
83+
}
84+
if os.Getenv("PTY_INTEGRATION") == "" {
85+
t.Skip("set PTY_INTEGRATION=1 to run interactive PTY tests")
86+
}
87+
88+
p, err := NewClaudeCodeProvider("")
89+
if err != nil {
90+
t.Fatalf("NewClaudeCodeProvider: %v", err)
91+
}
92+
defer p.(interface{ Close() error }).Close()
93+
94+
ctx := context.Background()
95+
96+
// This prompt triggers tool usage (listing files), which creates
97+
// tool call cards in Claude Code's TUI (Read, Bash, etc.)
98+
t.Log("=== Tool-using prompt (triggers tool call UI) ===")
99+
ch, err := p.Stream(ctx, []provider.Message{
100+
{Role: provider.RoleUser, Content: "List the files in the current directory and tell me how many there are. Be brief."},
101+
}, nil)
102+
if err != nil {
103+
t.Fatalf("Stream: %v", err)
104+
}
105+
text := collectStream(t, ch, 3*time.Minute)
106+
t.Logf("Response:\n%s", text)
107+
if text == "" {
108+
t.Error("empty response from tool-using prompt")
109+
}
110+
// Should contain some file count or listing
111+
if !strings.Contains(text, "file") && !strings.Contains(text, "director") {
112+
t.Error("expected response about files/directories, got:", text[:min(100, len(text))])
113+
}
114+
}
115+
116+
// TestCopilotInteractivePTY tests Copilot with interactive PTY enabled.
117+
func TestCopilotInteractivePTY(t *testing.T) {
118+
if _, err := exec.LookPath("copilot"); err != nil {
119+
t.Skip("copilot not in PATH")
120+
}
121+
if os.Getenv("PTY_INTEGRATION") == "" {
122+
t.Skip("set PTY_INTEGRATION=1 to run interactive PTY tests")
123+
}
124+
125+
p, err := NewCopilotCLIProvider("")
126+
if err != nil {
127+
t.Fatalf("NewCopilotCLIProvider: %v", err)
128+
}
129+
defer p.(interface{ Close() error }).Close()
130+
131+
ctx := context.Background()
132+
133+
// Simple query
134+
t.Log("=== Copilot: Simple query ===")
135+
ch1, err := p.Stream(ctx, []provider.Message{
136+
{Role: provider.RoleUser, Content: "What is the capital of Japan? Reply in one word."},
137+
}, nil)
138+
if err != nil {
139+
t.Fatalf("Stream msg1: %v", err)
140+
}
141+
text1 := collectStream(t, ch1, 2*time.Minute)
142+
t.Logf("Response 1:\n%s", text1)
143+
if !strings.Contains(strings.ToLower(text1), "tokyo") {
144+
t.Error("expected Tokyo, got:", text1[:min(100, len(text1))])
145+
}
146+
147+
// Code generation
148+
t.Log("=== Copilot: Code generation ===")
149+
ch2, err := p.Stream(ctx, []provider.Message{
150+
{Role: provider.RoleUser, Content: "Write a Python function is_palindrome(s) that checks if a string is a palindrome. Only code."},
151+
}, nil)
152+
if err != nil {
153+
t.Fatalf("Stream msg2: %v", err)
154+
}
155+
text2 := collectStream(t, ch2, 2*time.Minute)
156+
t.Logf("Response 2:\n%s", text2)
157+
if !strings.Contains(text2, "palindrome") && !strings.Contains(text2, "def ") {
158+
t.Error("expected Python function")
159+
}
160+
}
161+
162+
func collectStream(t *testing.T, ch <-chan provider.StreamEvent, timeout time.Duration) string {
163+
t.Helper()
164+
var sb strings.Builder
165+
timer := time.NewTimer(timeout)
166+
defer timer.Stop()
167+
for {
168+
select {
169+
case ev, ok := <-ch:
170+
if !ok {
171+
return sb.String()
172+
}
173+
switch ev.Type {
174+
case "text":
175+
sb.WriteString(ev.Text)
176+
case "done":
177+
return sb.String()
178+
case "error":
179+
t.Logf("stream error: %s", ev.Error)
180+
return sb.String()
181+
}
182+
case <-timer.C:
183+
t.Log("timeout waiting for stream")
184+
return sb.String()
185+
}
186+
}
187+
}
188+
189+
func min(a, b int) int {
190+
if a < b {
191+
return a
192+
}
193+
return b
194+
}
195+
196+
// TestExtractResponse_ClaudeCodeScreen tests extractResponse with realistic
197+
// Claude Code screen content including tool call cards.
198+
func TestExtractResponse_ClaudeCodeScreen(t *testing.T) {
199+
p := &ptyProvider{adapter: ClaudeCodeAdapter{}}
200+
201+
// Simulate screen with tool call UI (Read tool, Bash tool)
202+
screen := `╭─────────────────────────────────────╮
203+
│ ✻ Claude Code │
204+
╰─────────────────────────────────────╯
205+
206+
❯ List the files and count them
207+
208+
⎿ Read tool
209+
Listed 5 files in current directory
210+
211+
There are 5 files in the current directory:
212+
- main.go
213+
- go.mod
214+
- go.sum
215+
- README.md
216+
- Makefile
217+
218+
❯`
219+
220+
result := p.extractResponse(screen)
221+
t.Logf("Extracted: %q", result)
222+
if !strings.Contains(result, "5 files") {
223+
t.Errorf("expected '5 files' in response, got: %s", result)
224+
}
225+
}
226+
227+
// TestExtractResponse_CopilotScreen tests extractResponse with Copilot screen.
228+
func TestExtractResponse_CopilotScreen(t *testing.T) {
229+
p := &ptyProvider{adapter: CopilotCLIAdapter{}}
230+
231+
screen := fmt.Sprintf(` 💡 Tip: Use @workspace to ask about your project
232+
233+
❯ What is 2+2?
234+
235+
● The answer is 4.
236+
237+
❯ Type @`)
238+
239+
result := p.extractResponse(screen)
240+
t.Logf("Extracted: %q", result)
241+
if !strings.Contains(result, "4") {
242+
t.Errorf("expected '4' in response, got: %s", result)
243+
}
244+
}
245+
246+
// TestExtractResponseDiff_CopilotWithSystemLines tests that screen-diff extraction
247+
// correctly ignores pre-existing system ● lines and only captures response content.
248+
func TestExtractResponseDiff_CopilotWithSystemLines(t *testing.T) {
249+
p := &ptyProvider{adapter: CopilotCLIAdapter{}}
250+
251+
// Pre-message screen: has system ● lines that should be ignored
252+
preScreen := ` ● Environment: macOS 14.5
253+
💡 Tip: Use @workspace to ask about your project
254+
❯ Type @`
255+
256+
preLines := screenLines(preScreen)
257+
258+
// Post-message screen: user message + response + new prompt
259+
postScreen := ` ● Environment: macOS 14.5
260+
💡 Tip: Use @workspace to ask about your project
261+
262+
❯ What is 2+2?
263+
264+
● The answer is 4.
265+
266+
❯ Type @`
267+
268+
result := p.extractResponseDiff(postScreen, preLines)
269+
t.Logf("Extracted: %q", result)
270+
if !strings.Contains(result, "4") {
271+
t.Errorf("expected '4' in response, got: %s", result)
272+
}
273+
// Must NOT contain the system Environment line
274+
if strings.Contains(result, "Environment") {
275+
t.Errorf("should not contain pre-existing system lines, got: %s", result)
276+
}
277+
}
278+
279+
// TestExtractResponseDiff_ClaudeCodeWithToolCalls tests screen-diff extraction
280+
// when Claude Code shows tool call cards.
281+
func TestExtractResponseDiff_ClaudeCodeWithToolCalls(t *testing.T) {
282+
p := &ptyProvider{adapter: ClaudeCodeAdapter{}}
283+
284+
preScreen := `╭─────────────────────────────────────╮
285+
│ ✻ Claude Code │
286+
╰─────────────────────────────────────╯
287+
❯`
288+
289+
preLines := screenLines(preScreen)
290+
291+
postScreen := `╭─────────────────────────────────────╮
292+
│ ✻ Claude Code │
293+
╰─────────────────────────────────────╯
294+
295+
❯ List files
296+
297+
⎿ Bash(ls)
298+
main.go go.mod README.md
299+
300+
There are 3 files in the directory.
301+
302+
❯`
303+
304+
result := p.extractResponseDiff(postScreen, preLines)
305+
t.Logf("Extracted: %q", result)
306+
if !strings.Contains(result, "3 files") {
307+
t.Errorf("expected '3 files' in response, got: %s", result)
308+
}
309+
}
310+
311+
// TestScreenLines verifies the screen snapshot helper.
312+
func TestScreenLines(t *testing.T) {
313+
screen := " line1 \n line2\n\n line3 "
314+
lines := screenLines(screen)
315+
if !lines[" line1"] {
316+
t.Error("expected ' line1' in set")
317+
}
318+
if lines[""] {
319+
t.Error("empty strings should not be in set")
320+
}
321+
if len(lines) != 3 {
322+
t.Errorf("expected 3 lines, got %d", len(lines))
323+
}
324+
}

0 commit comments

Comments
 (0)