From d5f5038d5beb661edde9ca28e8028b0ffaf0917e Mon Sep 17 00:00:00 2001 From: Jo Voordeckers Date: Fri, 19 Jun 2026 14:10:52 -0700 Subject: [PATCH] Add window broadcast, fix audio lag, set reasonable volume, remove mouse cursor --- cmd/doubletake/main.go | 20 +++- internal/airplay/audio.go | 207 ++++++++++++++++++++++++++++++------ internal/airplay/capture.go | 83 ++++++++++++--- internal/airplay/mirror.go | 36 +++++-- 4 files changed, 287 insertions(+), 59 deletions(-) diff --git a/cmd/doubletake/main.go b/cmd/doubletake/main.go index a195789..87d2b3c 100644 --- a/cmd/doubletake/main.go +++ b/cmd/doubletake/main.go @@ -19,6 +19,14 @@ import ( "doubletake/internal/daemon" ) +func parseXID(s string) (uint64, error) { + s = strings.TrimSpace(s) + if s == "" { + return 0, nil + } + return strconv.ParseUint(s, 0, 64) // accepts decimal or 0xhex +} + // parsePortRange parses a "min-max" string into inclusive port bounds. // An empty string returns (0, 0, nil) meaning "let the OS pick". func parsePortRange(s string) (int, int, error) { @@ -66,6 +74,8 @@ func main() { debug := flag.Bool("debug", false, "Enable verbose debug logging") daemonize := flag.Bool("daemonize", false, "Run as background daemon with Unix socket control interface") socketPath := flag.String("socket", daemon.DefaultSocketPath(), "Unix socket path for daemon control interface") + x11WindowID := flag.String("x11-window-id", "", "X11 window id to capture, decimal or 0xhex") + x11WindowName := flag.String("x11-window-name", "", "X11 window name to capture; prefer -x11-window-id") flag.Parse() airplay.SetTargetLatency(time.Duration(*targetLatencyMs) * time.Millisecond) @@ -80,6 +90,12 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + xid, err := parseXID(*x11WindowID) + if err != nil { + log.Fatalf("invalid -x11-window-id: %v", err) + } + + sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) go func() { @@ -275,7 +291,7 @@ func main() { Bitrate: *bitrate, HWAccel: *hwaccel, }) - if err != nil { + if err != nil { log.Fatalf("test capture failed: %v", err) } } else { @@ -283,6 +299,8 @@ func main() { FPS: *fps, Bitrate: *bitrate, HWAccel: *hwaccel, + X11WindowID: xid, + X11WindowName: *x11WindowName, } var err error capture, err = airplay.StartCapture(ctx, captureCfg) diff --git a/internal/airplay/audio.go b/internal/airplay/audio.go index 1586b03..522303d 100644 --- a/internal/airplay/audio.go +++ b/internal/airplay/audio.go @@ -8,11 +8,12 @@ import ( "fmt" "io" "net" + "os" "os/exec" "strings" "sync" "time" - + "golang.org/x/sys/unix" aeadchacha20poly1305 "github.com/aead/chacha20poly1305" ) @@ -176,47 +177,190 @@ func (ac *AudioCapture) ReadFrame(buf []byte) (int, error) { return n, nil } -// DrainStale discards any PCM that buffered in the OS pipe between capture -// start and the first read. The capture pipeline starts producing audio -// immediately, but streaming does not begin until the first video frame is -// sent; during that gap the kernel pipe accumulates a FIFO backlog that would -// otherwise be read in order forever, leaving every frame permanently stale and -// audio lagging video. Draining once just before the read loop starts streaming -// from the freshest sample. It removes whatever backlog actually accumulated — -// no fixed latency value is assumed. -func (ac *AudioCapture) DrainStale() { - type deadlineReader interface { - SetReadDeadline(t time.Time) error +func bytesAvailable(f *os.File) int { + available, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCINQ) + if err != nil { + dbg("[AUDIO] TIOCINQ failed: %v", err) + return 0 } - dr, ok := ac.pcmPipe.(deadlineReader) - if !ok { + return available +} + +func (ac *AudioCapture) DropAudioBacklog(keepFrames int) { + f, ok := ac.pcmPipe.(*os.File) + if !ok || f == nil { return } - buf := make([]byte, 32*1024) - var discarded int - for { - // Re-arm a short idle timeout each read: while a backlog exists, reads - // return buffered data immediately; once the pipe is empty the read - // blocks and this deadline fires before the next live frame (~8ms) - // arrives, ending the drain. This is a poll timeout, not a latency. - if err := dr.SetReadDeadline(time.Now().Add(2 * time.Millisecond)); err != nil { + + const spf = 352 + const channels = 2 + const bytesPerSample = 2 + const frameBytes = spf * channels * bytesPerSample + + keep := keepFrames * frameBytes + + available := bytesAvailable(f) + if available <= 0 { + return + } + // var available int + // _, _, errno := syscall.Syscall( + // syscall.SYS_IOCTL, + // f.Fd(), + // uintptr(syscall.FIONREAD), + // uintptr(unsafe.Pointer(&available)), + // ) + // if errno != 0 || available <= keep { + // return + // } + + drop := available - keep + drop -= drop % frameBytes + if drop <= 0 { + return + } + + buf := make([]byte, frameBytes) + dropped := 0 + + for dropped < drop { + want := drop - dropped + if want > len(buf) { + want = len(buf) + } + + n, err := ac.pcmPipe.Read(buf[:want]) + dropped += n + if err != nil || n == 0 { break } - n, err := ac.pcmPipe.Read(buf) - discarded += n + } + + if dropped > 0 { + const bytesPerSecond = 44100 * 2 * 2 + dbg("[AUDIO] dropped %d stale bytes (~%.0fms), kept %d frames", + dropped, + float64(dropped)/bytesPerSecond*1000, + keepFrames, + ) + } +} + +func (ac *AudioCapture) DrainStale() { + f, ok := ac.pcmPipe.(*os.File) + if !ok || f == nil { + dbg("[AUDIO] DrainStale: pcmPipe is not *os.File") + return + } + + // var available int + // _, _, errno := syscall.Syscall( + // syscall.SYS_IOCTL, + // f.Fd(), + // uintptr(syscall.FIONREAD), + // uintptr(unsafe.Pointer(&available)), + // ) + // if errno != 0 { + // dbg("[AUDIO] DrainStale: FIONREAD failed: %v", errno) + // return + // } + + available := bytesAvailable(f) + + if available <= 0 { + dbg("[AUDIO] DrainStale: no startup backlog") + return + } + + // Keep only a tiny amount so ReadFrame() has something immediate. + const spf = 352 + const channels = 2 + const bytesPerSample = 2 + const frameBytes = spf * channels * bytesPerSample // 1408 bytes ~= 8ms + + keep := 2 * frameBytes // keep ~16ms + drop := available - keep + if drop <= 0 { + dbg("[AUDIO] DrainStale: backlog only %d bytes, keeping it", available) + return + } + + // Keep frame alignment. + drop -= drop % frameBytes + if drop <= 0 { + return + } + + buf := make([]byte, frameBytes) + dropped := 0 + + for dropped < drop { + want := drop - dropped + if want > len(buf) { + want = len(buf) + } + + n, err := ac.pcmPipe.Read(buf[:want]) + dropped += n if err != nil { + dbg("[AUDIO] DrainStale: read stopped after %d bytes: %v", dropped, err) + break + } + if n == 0 { break } } - // Restore blocking reads for steady-state streaming. - _ = dr.SetReadDeadline(time.Time{}) - if discarded > 0 { - const bytesPerSecond = 44100 * 2 * 2 // 44.1kHz, stereo, S16LE - dbg("[AUDIO] drained %d bytes (~%.0fms) of startup backlog before streaming", - discarded, float64(discarded)/bytesPerSecond*1000) - } + + const bytesPerSecond = 44100 * 2 * 2 + dbg("[AUDIO] drained %d/%d stale bytes (~%.0fms), kept ~%dms", + dropped, + available, + float64(dropped)/bytesPerSecond*1000, + keep*1000/bytesPerSecond, + ) } +// // DrainStale discards any PCM that buffered in the OS pipe between capture +// // start and the first read. The capture pipeline starts producing audio +// // immediately, but streaming does not begin until the first video frame is +// // sent; during that gap the kernel pipe accumulates a FIFO backlog that would +// // otherwise be read in order forever, leaving every frame permanently stale and +// // audio lagging video. Draining once just before the read loop starts streaming +// // from the freshest sample. It removes whatever backlog actually accumulated — +// // no fixed latency value is assumed. +// func (ac *AudioCapture) DrainStale() { +// type deadlineReader interface { +// SetReadDeadline(t time.Time) error +// } +// dr, ok := ac.pcmPipe.(deadlineReader) +// if !ok { +// return +// } +// buf := make([]byte, 32*1024) +// var discarded int +// for { +// // Re-arm a short idle timeout each read: while a backlog exists, reads +// // return buffered data immediately; once the pipe is empty the read +// // blocks and this deadline fires before the next live frame (~8ms) +// // arrives, ending the drain. This is a poll timeout, not a latency. +// if err := dr.SetReadDeadline(time.Now().Add(2 * time.Millisecond)); err != nil { +// break +// } +// n, err := ac.pcmPipe.Read(buf) +// discarded += n +// if err != nil { +// break +// } +// } +// // Restore blocking reads for steady-state streaming. +// _ = dr.SetReadDeadline(time.Time{}) +// if discarded > 0 { +// const bytesPerSecond = 44100 * 2 * 2 // 44.1kHz, stereo, S16LE +// dbg("[AUDIO] drained %d bytes (~%.0fms) of startup backlog before streaming", +// discarded, float64(discarded)/bytesPerSecond*1000) +// } +// } + func (ac *AudioCapture) Stop() { if ac.stopped { return @@ -803,6 +947,7 @@ func (s *MirrorSession) StreamAudio(ctx context.Context, capture *AudioCapture, default: } + // TODO: capture.DropAudioBacklog(2) n, err := capture.ReadFrame(frameBuf) if err != nil { if ctx.Err() != nil { diff --git a/internal/airplay/capture.go b/internal/airplay/capture.go index d7d944d..e89575f 100644 --- a/internal/airplay/capture.go +++ b/internal/airplay/capture.go @@ -21,8 +21,12 @@ type CaptureConfig struct { Bitrate int // Video bitrate in kbps (0 = auto) HWAccel string // "auto", "vaapi", "none" + X11WindowID uint64 + X11WindowName string + RestoreToken string SaveRestoreToken func(string) error + } const ( @@ -51,7 +55,12 @@ type ScreenCapture struct { // StartCapture detects the display server (Wayland or X11) and initiates screen // capture accordingly. On Wayland it uses xdg-desktop-portal + PipeWire for // capture; on X11 it uses ximagesrc. Both use GStreamer for H.264 encoding. + func StartCapture(ctx context.Context, cfg CaptureConfig) (*ScreenCapture, error) { + if (cfg.X11WindowID != 0 || cfg.X11WindowName != "") && os.Getenv("DISPLAY") != "" { + return startX11Capture(ctx, cfg) + } + if os.Getenv("WAYLAND_DISPLAY") != "" { return startWaylandCapture(ctx, cfg) } @@ -61,6 +70,16 @@ func StartCapture(ctx context.Context, cfg CaptureConfig) (*ScreenCapture, error return nil, fmt.Errorf("no display server detected (neither WAYLAND_DISPLAY nor DISPLAY is set)") } +// func StartCapture(ctx context.Context, cfg CaptureConfig) (*ScreenCapture, error) { +// if os.Getenv("WAYLAND_DISPLAY") != "" { +// return startWaylandCapture(ctx, cfg) +// } +// if os.Getenv("DISPLAY") != "" { +// return startX11Capture(ctx, cfg) +// } +// return nil, fmt.Errorf("no display server detected (neither WAYLAND_DISPLAY nor DISPLAY is set)") +// } + func startWaylandCapture(ctx context.Context, cfg CaptureConfig) (*ScreenCapture, error) { // Check dependencies if err := exec.Command("gst-inspect-1.0", "pipewiresrc").Run(); err != nil { @@ -167,28 +186,58 @@ func startX11Capture(ctx context.Context, cfg CaptureConfig) (*ScreenCapture, er } display := os.Getenv("DISPLAY") - encoder := detectGstEncoder(cfg) - // Detect primary monitor geometry — ximagesrc captures the full X screen - // (all monitors combined). On multi-monitor setups this wastes CPU on pixels - // we don't need, so crop to the primary monitor. The encoded resolution is - // then the primary monitor's native resolution (no rescaling). - startX, startY, endX, endY := detectPrimaryMonitor(display) - ximageSrcArgs := []string{ - "ximagesrc", fmt.Sprintf("display-name=%s", display), "use-damage=false", - } - if endX > startX && endY > startY { - ximageSrcArgs = append(ximageSrcArgs, - fmt.Sprintf("startx=%d", startX), - fmt.Sprintf("starty=%d", startY), - fmt.Sprintf("endx=%d", endX-1), - fmt.Sprintf("endy=%d", endY-1), - ) - dbg("[CAPTURE] cropping ximagesrc to x=%d..%d y=%d..%d", startX, endX-1, startY, endY-1) + "ximagesrc", + fmt.Sprintf("display-name=%s", display), + "use-damage=false", + "show-pointer=false", + } + + if cfg.X11WindowID != 0 { + ximageSrcArgs = append(ximageSrcArgs, fmt.Sprintf("xid=%d", cfg.X11WindowID)) + dbg("[CAPTURE] capturing X11 window xid=0x%x", cfg.X11WindowID) + } else if cfg.X11WindowName != "" { + ximageSrcArgs = append(ximageSrcArgs, fmt.Sprintf("xname=%s", cfg.X11WindowName)) + dbg("[CAPTURE] capturing X11 window name=%q", cfg.X11WindowName) + } else { + // existing behavior: crop to primary monitor + startX, startY, endX, endY := detectPrimaryMonitor(display) + if endX > startX && endY > startY { + ximageSrcArgs = append(ximageSrcArgs, + fmt.Sprintf("startx=%d", startX), + fmt.Sprintf("starty=%d", startY), + fmt.Sprintf("endx=%d", endX-1), + fmt.Sprintf("endy=%d", endY-1), + ) + dbg("[CAPTURE] cropping ximagesrc to x=%d..%d y=%d..%d", startX, endX-1, startY, endY-1) + } } + // display := os.Getenv("DISPLAY") + // + // encoder := detectGstEncoder(cfg) + // + // // Detect primary monitor geometry — ximagesrc captures the full X screen + // // (all monitors combined). On multi-monitor setups this wastes CPU on pixels + // // we don't need, so crop to the primary monitor. The encoded resolution is + // // then the primary monitor's native resolution (no rescaling). + // startX, startY, endX, endY := detectPrimaryMonitor(display) + // + // ximageSrcArgs := []string{ + // "ximagesrc", fmt.Sprintf("display-name=%s", display), "use-damage=false", + // } + // if endX > startX && endY > startY { + // ximageSrcArgs = append(ximageSrcArgs, + // fmt.Sprintf("startx=%d", startX), + // fmt.Sprintf("starty=%d", startY), + // fmt.Sprintf("endx=%d", endX-1), + // fmt.Sprintf("endy=%d", endY-1), + // ) + // dbg("[CAPTURE] cropping ximagesrc to x=%d..%d y=%d..%d", startX, endX-1, startY, endY-1) + // } + gstArgs := []string{"--quiet"} gstArgs = append(gstArgs, ximageSrcArgs...) gstArgs = append(gstArgs, diff --git a/internal/airplay/mirror.go b/internal/airplay/mirror.go index 4582d6f..dd825bb 100644 --- a/internal/airplay/mirror.go +++ b/internal/airplay/mirror.go @@ -136,9 +136,13 @@ func (c *AirPlayClient) setupMirrorSession(ctx context.Context, cfg StreamConfig // the receiver's playout buffer lead. Receivers without a robust jitter buffer // (non-FairPlay-SAP, e.g. Roku) need a conservative floor or they drop audio; // modern Apple receivers can run at the low target latency unchanged. + // if floor := c.info.playoutLatencyFloor(); sessionLatency < floor { + // dbg("[SETUP] raising session latency to receiver playout floor: %v", floor) + // sessionLatency = floor + // } if floor := c.info.playoutLatencyFloor(); sessionLatency < floor { - dbg("[SETUP] raising session latency to receiver playout floor: %v", floor) - sessionLatency = floor + dbg("[SETUP] receiver playout floor: %v; ignoring, using configured latency %v", + floor, sessionLatency) } // ---- Working SETUP sequence ---- @@ -464,22 +468,34 @@ func (c *AirPlayClient) setupMirrorSession(ctx context.Context, cfg StreamConfig if len(recordBody) > 0 { dbg("[SETUP] RECORD response body: %02x", recordBody) } + // if value, ok := recordRespHeaders["audio-latency"]; ok { + // parsed, parseErr := strconv.ParseUint(value, 10, 32) + // if parseErr != nil { + // dbg("[SETUP] invalid Audio-Latency header %q: %v", value, parseErr) + // } else if parsed > 0 { + // // The receiver reported its authoritative playout latency. Drive both + // // audio and video from it so they stay in sync (the anchor lead is the + // // shared playout time), overriding our conservative fallback floor. + // audioLatencySamples = uint32(parsed) + // sessionLatency = time.Duration(audioLatencySamples) * time.Second / 44100 + // dbg("[SETUP] receiver audio latency: %d samples (%v); using for audio+video", audioLatencySamples, sessionLatency) + // } + // } if value, ok := recordRespHeaders["audio-latency"]; ok { parsed, parseErr := strconv.ParseUint(value, 10, 32) if parseErr != nil { dbg("[SETUP] invalid Audio-Latency header %q: %v", value, parseErr) } else if parsed > 0 { - // The receiver reported its authoritative playout latency. Drive both - // audio and video from it so they stay in sync (the anchor lead is the - // shared playout time), overriding our conservative fallback floor. - audioLatencySamples = uint32(parsed) - sessionLatency = time.Duration(audioLatencySamples) * time.Second / 44100 - dbg("[SETUP] receiver audio latency: %d samples (%v); using for audio+video", audioLatencySamples, sessionLatency) + receiverLatency := time.Duration(parsed) * time.Second / 44100 + + dbg("[SETUP] receiver audio latency: %d samples (%v); ignoring, using configured latency %v", + parsed, receiverLatency, sessionLatency) + + audioLatencySamples = samplesFor44k1(sessionLatency) } } - // Set volume to maximum (0 dB) - volumeBody := []byte("volume: 0.000000\r\n") + volumeBody := []byte("volume: 20.000000\r\n") _, _, err = c.rtspRequest("SET_PARAMETER", audioURI, "text/parameters", volumeBody, nil) if err != nil { dbg("[SETUP] SET_PARAMETER volume failed (non-fatal): %v", err)