diff --git a/packages/app-expo/src/stores/tts-store.ts b/packages/app-expo/src/stores/tts-store.ts index e9047835..4448b398 100644 --- a/packages/app-expo/src/stores/tts-store.ts +++ b/packages/app-expo/src/stores/tts-store.ts @@ -2,7 +2,10 @@ import { DEFAULT_TTS_CONFIG, type ITTSPlayer, type TTSConfig, + VOICE_RESPEAK_DEBOUNCE_MS, + isActivePlay, normalizeTTSConfig, + shouldRespeakForSynthChange, splitNarrationText, } from "@readany/core/tts"; import TrackPlayer from "react-native-track-player"; @@ -59,6 +62,26 @@ function clearSleepTimerHandle(): void { } } +let _respeakTimer: ReturnType | null = null; + +function clearRespeakTimer(): void { + if (_respeakTimer) { + clearTimeout(_respeakTimer); + _respeakTimer = null; + } +} + +function scheduleRespeak(): void { + clearRespeakTimer(); + _respeakTimer = setTimeout(() => { + _respeakTimer = null; + const { playState, jumpToChunk } = useTTSStore.getState(); + if (isActivePlay(playState)) { + jumpToChunk(_sessionCurrentIndex); + } + }, VOICE_RESPEAK_DEBOUNCE_MS); +} + function detachAndStopPlayer(player: ITTSPlayer | null): void { if (!player) return; player.onStateChange = undefined; @@ -257,6 +280,7 @@ export const useTTSStore = create()( sleepTimerDurationMinutes: null, play: (text: string | string[]) => { + clearRespeakTimer(); const segments = normalizeSegments(text); const joinedText = segments.join(" ").trim(); if (!joinedText) { @@ -327,6 +351,7 @@ export const useTTSStore = create()( pause: () => { console.log("[TTSStore] pause called"); + clearRespeakTimer(); const { playState } = get(); if (playState !== "playing" && playState !== "loading") return; _activeTTS?.pause(); @@ -371,6 +396,7 @@ export const useTTSStore = create()( stop: () => { console.log("[TTSStore] stop called"); clearSleepTimerHandle(); + clearRespeakTimer(); _sessionGeneration += 1; detachAndStopAllPlayers(); _sessionSegments = []; @@ -405,10 +431,22 @@ export const useTTSStore = create()( } }, - updateConfig: (updates) => - set((state) => ({ - config: normalizeTTSConfig({ ...state.config, ...updates }), - })), + updateConfig: (updates) => { + const previousConfig = normalizeTTSConfig(get().config); + const nextConfig = normalizeTTSConfig({ ...previousConfig, ...updates }); + set({ config: nextConfig }); + + if ( + shouldRespeakForSynthChange(previousConfig, nextConfig) && + isActivePlay(get().playState) + ) { + scheduleRespeak(); + } else { + // 非重读变更(切引擎、或改了当前引擎不关心的字段)必须取消上一次合成变更排下的 + // 待执行 respeak,否则陈旧防抖定时器会 fire 并强制重启播放。 + clearRespeakTimer(); + } + }, setPlayState: (playState) => set({ playState }), @@ -448,6 +486,7 @@ export const useTTSStore = create()( }), jumpToChunk: (index: number) => { + clearRespeakTimer(); if (index < 0 || index >= _sessionSegments.length) return; const config = normalizeTTSConfig(get().config); diff --git a/packages/core/src/stores/tts-store.test.ts b/packages/core/src/stores/tts-store.test.ts new file mode 100644 index 00000000..fe594fd4 --- /dev/null +++ b/packages/core/src/stores/tts-store.test.ts @@ -0,0 +1,205 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { DEFAULT_TTS_CONFIG, type ITTSPlayer, type TTSConfig } from "../tts/types"; + +vi.mock("./persist", () => ({ + withPersist: (_key: string, creator: unknown) => creator, +})); + +const { setTTSPlayerFactories, useTTSStore } = await import("./tts-store"); + +type MockTTSPlayer = ITTSPlayer & { + speak: ReturnType; + pause: ReturnType; + resume: ReturnType; + stop: ReturnType; +}; + +function createMockPlayer(): MockTTSPlayer { + const player = { paused: false } as MockTTSPlayer; + player.speak = vi.fn(() => { + player.onStateChange?.("playing"); + }); + player.pause = vi.fn(() => { + player.onStateChange?.("paused"); + }); + player.resume = vi.fn(() => { + player.onStateChange?.("playing"); + }); + player.stop = vi.fn(() => { + player.onStateChange?.("stopped"); + }); + return player; +} + +function resetStore(config: TTSConfig = DEFAULT_TTS_CONFIG) { + useTTSStore.setState({ + playState: "stopped", + currentText: "", + config, + onEnd: null, + currentChunkIndex: 0, + totalChunks: 0, + currentBookTitle: "", + currentChapterTitle: "", + currentBookId: "", + currentLocationCfi: "", + sleepTimerEndsAt: null, + sleepTimerDurationMinutes: null, + }); +} + +let systemPlayer: MockTTSPlayer; +let edgePlayer: MockTTSPlayer; +let dashscopePlayer: MockTTSPlayer; + +function startDashScope(voice = "Cherry") { + useTTSStore + .getState() + .updateConfig({ engine: "dashscope", dashscopeApiKey: "key", dashscopeVoice: voice }); + useTTSStore.getState().play(["s0", "s1", "s2"]); +} +function startEdge() { + useTTSStore + .getState() + .updateConfig({ engine: "edge", edgeVoice: "zh-CN-XiaoxiaoNeural", rate: 1.0, pitch: 1.0 }); + useTTSStore.getState().play(["s0", "s1"]); +} + +describe("useTTSStore — re-speak on synth change (#370)", () => { + beforeEach(() => { + vi.useFakeTimers(); + systemPlayer = createMockPlayer(); + edgePlayer = createMockPlayer(); + dashscopePlayer = createMockPlayer(); + setTTSPlayerFactories({ + createSystemTTS: () => systemPlayer, + createEdgeTTS: () => edgePlayer, + createDashScopeTTS: () => dashscopePlayer, + }); + resetStore(); + useTTSStore.getState().stop(); + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("dashscope: re-speaks from current sentence with new voice after debounce", () => { + startDashScope("Cherry"); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(2); + const [segments, config] = dashscopePlayer.speak.mock.calls[1]; + expect(segments).toEqual(["s0", "s1", "s2"]); + expect((config as TTSConfig).dashscopeVoice).toBe("Ethan"); + }); + + it("edge: re-speaks on edge voice change", () => { + startEdge(); + useTTSStore.getState().updateConfig({ edgeVoice: "zh-CN-YunxiNeural" }); + vi.advanceTimersByTime(250); + expect(edgePlayer.speak).toHaveBeenCalledTimes(2); + expect((edgePlayer.speak.mock.calls[1][1] as TTSConfig).edgeVoice).toBe("zh-CN-YunxiNeural"); + }); + + it("debounces rapid switches into one re-speak with the last voice", () => { + startDashScope("Cherry"); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + vi.advanceTimersByTime(100); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Serena" }); + vi.advanceTimersByTime(100); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Dylan" }); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(2); + expect((dashscopePlayer.speak.mock.calls[1][1] as TTSConfig).dashscopeVoice).toBe("Dylan"); + }); + + it("[cleanup] new play() during a pending re-speak adds no spurious speak", () => { + startDashScope("Cherry"); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + vi.clearAllMocks(); + useTTSStore.getState().play(["n0", "n1"]); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + }); + + it("[cleanup] manual jumpToChunk during a pending re-speak does not double-fire", () => { + startDashScope("Cherry"); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + vi.clearAllMocks(); + useTTSStore.getState().jumpToChunk(1); + const callsAfterJump = dashscopePlayer.speak.mock.calls.length; + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(callsAfterJump); + }); + + it("[loading] triggers re-speak while in loading state", () => { + startDashScope("Cherry"); + useTTSStore.setState({ playState: "loading" }); + vi.clearAllMocks(); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + expect((dashscopePlayer.speak.mock.calls[0][1] as TTSConfig).dashscopeVoice).toBe("Ethan"); + }); + + it("does not re-speak when stopped", () => { + useTTSStore + .getState() + .updateConfig({ engine: "dashscope", dashscopeApiKey: "key", dashscopeVoice: "Cherry" }); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).not.toHaveBeenCalled(); + }); + + it("does not re-speak when voice unchanged", () => { + startDashScope("Cherry"); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Cherry" }); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + }); + + it("does not re-speak when dashscope api key missing", () => { + useTTSStore + .getState() + .updateConfig({ engine: "dashscope", dashscopeApiKey: "", dashscopeVoice: "Cherry" }); + useTTSStore.getState().play(["s0", "s1"]); + vi.clearAllMocks(); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).not.toHaveBeenCalled(); + }); + + it("cancels pending re-speak on stop", () => { + startDashScope("Cherry"); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + useTTSStore.getState().stop(); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + }); + + it("cancels pending re-speak on pause", () => { + startDashScope("Cherry"); + useTTSStore.getState().updateConfig({ dashscopeVoice: "Ethan" }); + useTTSStore.getState().pause(); + vi.advanceTimersByTime(250); + expect(dashscopePlayer.speak).toHaveBeenCalledTimes(1); + }); + + it("[cleanup] 非重读配置变更取消待执行的 respeak(不残留重启)", () => { + startEdge(); + useTTSStore.getState().updateConfig({ edgeVoice: "zh-CN-YunxiNeural" }); // 排下 respeak 定时器 + vi.advanceTimersByTime(100); // 防抖窗口内 + useTTSStore.getState().updateConfig({ engine: "system" }); // 非重读变更 → 应取消定时器 + vi.clearAllMocks(); + vi.advanceTimersByTime(250); // 让任何残留定时器有机会 fire + expect(edgePlayer.speak).not.toHaveBeenCalled(); + expect(systemPlayer.speak).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/core/src/stores/tts-store.ts b/packages/core/src/stores/tts-store.ts index 760132d6..cb1907ae 100644 --- a/packages/core/src/stores/tts-store.ts +++ b/packages/core/src/stores/tts-store.ts @@ -11,6 +11,7 @@ * (e.g. React Native) can override via `setTTSPlayerFactories()`. */ import { create } from "zustand"; +import { VOICE_RESPEAK_DEBOUNCE_MS, isActivePlay, shouldRespeakForSynthChange } from "../tts/respeak"; import { BrowserTTSPlayer, DashScopeTTSPlayer, EdgeTTSPlayer } from "../tts/tts-players"; import type { ITTSPlayer, TTSConfig } from "../tts/types"; import { DEFAULT_TTS_CONFIG, normalizeTTSConfig } from "../tts/types"; @@ -90,6 +91,26 @@ function clearSleepTimerHandle(): void { } } +let _respeakTimer: ReturnType | null = null; + +function clearRespeakTimer(): void { + if (_respeakTimer) { + clearTimeout(_respeakTimer); + _respeakTimer = null; + } +} + +function scheduleRespeak(): void { + clearRespeakTimer(); + _respeakTimer = setTimeout(() => { + _respeakTimer = null; + const { playState, jumpToChunk } = useTTSStore.getState(); + if (isActivePlay(playState)) { + jumpToChunk(_sessionCurrentIndex); + } + }, VOICE_RESPEAK_DEBOUNCE_MS); +} + export interface TTSState { /** Current playback state */ playState: TTSPlayState; @@ -150,6 +171,7 @@ export const useTTSStore = create()( sleepTimerDurationMinutes: null, play: (text: string | string[]) => { + clearRespeakTimer(); const config = normalizeTTSConfig(get().config); _dashscopeActiveVoice = config.dashscopeVoice; const segments = Array.isArray(text) ? text.map((item) => item.trim()).filter(Boolean) : [text.trim()].filter(Boolean); @@ -204,6 +226,7 @@ export const useTTSStore = create()( }, pause: () => { + clearRespeakTimer(); const config = normalizeTTSConfig(get().config); const { playState } = get(); if (playState !== "playing") return; @@ -293,6 +316,7 @@ export const useTTSStore = create()( stop: () => { clearSleepTimerHandle(); + clearRespeakTimer(); const system = getSystemTTS(); const edge = getEdgeTTS(); const dashscope = getDashScopeTTS(); @@ -333,10 +357,22 @@ export const useTTSStore = create()( } }, - updateConfig: (updates) => - set((s) => ({ - config: normalizeTTSConfig({ ...s.config, ...updates }), - })), + updateConfig: (updates) => { + const previousConfig = normalizeTTSConfig(get().config); + const nextConfig = normalizeTTSConfig({ ...previousConfig, ...updates }); + set({ config: nextConfig }); + + // [占位 · #427/#349] engine 变化 + 播放中 → 停播。合并 #427 时在此插入, + // 并在该分支内调用 clearRespeakTimer()。 + + if (shouldRespeakForSynthChange(previousConfig, nextConfig) && isActivePlay(get().playState)) { + scheduleRespeak(); + } else { + // 非重读变更(切引擎、或改了当前引擎不关心的字段)必须取消上一次合成变更排下的 + // 待执行 respeak,否则陈旧防抖定时器会 fire 并强制重启播放。 + clearRespeakTimer(); + } + }, setPlayState: (playState) => set({ playState }), @@ -350,6 +386,7 @@ export const useTTSStore = create()( setChunkProgress: (index, total) => set({ currentChunkIndex: index, totalChunks: total }), jumpToChunk: (index: number) => { + clearRespeakTimer(); if (index < 0 || index >= _sessionSegments.length) return; const config = normalizeTTSConfig(get().config); _dashscopeActiveVoice = config.dashscopeVoice; diff --git a/packages/core/src/tts/index.ts b/packages/core/src/tts/index.ts index 6e3c49ef..ab4d4a58 100644 --- a/packages/core/src/tts/index.ts +++ b/packages/core/src/tts/index.ts @@ -26,3 +26,10 @@ export type { EdgeTTSVoice, EdgeTTSPayload } from "./edge-tts"; // Players export { BrowserTTSPlayer, DashScopeTTSPlayer, EdgeTTSPlayer } from "./tts-players"; + +// Re-speak on synthesis-param change (#370) +export { + VOICE_RESPEAK_DEBOUNCE_MS, + isActivePlay, + shouldRespeakForSynthChange, +} from "./respeak"; diff --git a/packages/core/src/tts/respeak.test.ts b/packages/core/src/tts/respeak.test.ts new file mode 100644 index 00000000..0eda0980 --- /dev/null +++ b/packages/core/src/tts/respeak.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from "vitest"; + +import { isActivePlay, shouldRespeakForSynthChange } from "./respeak"; +import { DEFAULT_TTS_CONFIG, type TTSConfig } from "./types"; + +function cfg(over: Partial): TTSConfig { + return { ...DEFAULT_TTS_CONFIG, ...over }; +} + +describe("shouldRespeakForSynthChange", () => { + it("dashscope: voice change with api key → true", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "dashscope", dashscopeApiKey: "k", dashscopeVoice: "Cherry" }), + cfg({ engine: "dashscope", dashscopeApiKey: "k", dashscopeVoice: "Ethan" }), + ), + ).toBe(true); + }); + + it("dashscope: voice change without api key → false", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "dashscope", dashscopeApiKey: "", dashscopeVoice: "Cherry" }), + cfg({ engine: "dashscope", dashscopeApiKey: "", dashscopeVoice: "Ethan" }), + ), + ).toBe(false); + }); + + it("dashscope: same voice → false", () => { + const base = { engine: "dashscope" as const, dashscopeApiKey: "k", dashscopeVoice: "Cherry" }; + expect(shouldRespeakForSynthChange(cfg(base), cfg(base))).toBe(false); + }); + + it("dashscope: rate change (voice unchanged) → false", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "dashscope", dashscopeApiKey: "k", dashscopeVoice: "Cherry", rate: 1.0 }), + cfg({ engine: "dashscope", dashscopeApiKey: "k", dashscopeVoice: "Cherry", rate: 1.5 }), + ), + ).toBe(false); + }); + + it("edge: voice change → true", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "edge", edgeVoice: "a" }), + cfg({ engine: "edge", edgeVoice: "b" }), + ), + ).toBe(true); + }); + + it("edge: rate change → true", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "edge", rate: 1.0 }), + cfg({ engine: "edge", rate: 1.5 }), + ), + ).toBe(true); + }); + + it("edge: pitch change → true", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "edge", pitch: 1.0 }), + cfg({ engine: "edge", pitch: 1.5 }), + ), + ).toBe(true); + }); + + it("edge: no synth-param change → false", () => { + const base = { engine: "edge" as const, edgeVoice: "a", rate: 1.0, pitch: 1.0 }; + expect(shouldRespeakForSynthChange(cfg(base), cfg(base))).toBe(false); + }); + + it("system engine → false", () => { + expect( + shouldRespeakForSynthChange( + cfg({ engine: "system", voiceName: "a" }), + cfg({ engine: "system", voiceName: "b" }), + ), + ).toBe(false); + }); +}); + +describe("isActivePlay", () => { + it("playing/loading → true", () => { + expect(isActivePlay("playing")).toBe(true); + expect(isActivePlay("loading")).toBe(true); + }); + it("paused/stopped → false", () => { + expect(isActivePlay("paused")).toBe(false); + expect(isActivePlay("stopped")).toBe(false); + }); +}); diff --git a/packages/core/src/tts/respeak.ts b/packages/core/src/tts/respeak.ts new file mode 100644 index 00000000..fe23fb87 --- /dev/null +++ b/packages/core/src/tts/respeak.ts @@ -0,0 +1,24 @@ +import type { TTSConfig, TTSPlayState } from "./types"; + +/** Debounce window (ms) before re-speaking from the current sentence after a + * synthesis-affecting config change (voice/rate/pitch). */ +export const VOICE_RESPEAK_DEBOUNCE_MS = 250; + +export function isActivePlay(state: TTSPlayState): boolean { + return state === "playing" || state === "loading"; +} + +/** Whether a config change altered a synthesis-affecting parameter for the + * *current* engine, warranting a re-speak from the current sentence. + * DashScope only honors voice (it does not send rate/pitch). */ +export function shouldRespeakForSynthChange(prev: TTSConfig, next: TTSConfig): boolean { + if (next.engine === "dashscope") { + return !!next.dashscopeApiKey && next.dashscopeVoice !== prev.dashscopeVoice; + } + if (next.engine === "edge") { + return ( + next.edgeVoice !== prev.edgeVoice || next.rate !== prev.rate || next.pitch !== prev.pitch + ); + } + return false; +} diff --git a/packages/core/src/tts/tts-players.test.ts b/packages/core/src/tts/tts-players.test.ts new file mode 100644 index 00000000..fb0fcac9 --- /dev/null +++ b/packages/core/src/tts/tts-players.test.ts @@ -0,0 +1,141 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { DEFAULT_TTS_CONFIG, type TTSConfig } from "./types"; + +// fetchEdgeTTSAudio 始终立即 resolve,使消费循环快速抵达 decodeAndSchedule; +// 时序闸门改由下面的 decodeAudioData mock 控制。 +vi.mock("./edge-tts", () => ({ + fetchEdgeTTSAudio: vi.fn(async () => new ArrayBuffer(8)), +})); + +const { EdgeTTSPlayer } = await import("./tts-players"); + +// 每次 decodeAudioData 调用都把 resolver 收集起来,便于手动控制某个 run 的解码完成时机。 +let decodeResolvers: Array<(buf: unknown) => void>; + +function installAudioMock() { + decodeResolvers = []; + class MockAudioContext { + state = "running"; + currentTime = 0; + destination = {}; + createGain() { + return { connect: vi.fn(), gain: { value: 1 } }; + } + createBufferSource() { + return { buffer: null, connect: vi.fn(), start: vi.fn() }; + } + decodeAudioData() { + return new Promise((resolve) => { + decodeResolvers.push(resolve); + }); + } + resume() { + return Promise.resolve(); + } + suspend() { + return Promise.resolve(); + } + close() { + return Promise.resolve(); + } + } + (globalThis as unknown as { AudioContext: unknown }).AudioContext = MockAudioContext; +} + +// 反复让出微任务队列,推动被 await 暂停的消费循环前进到下一个闸门。 +const flush = async () => { + for (let i = 0; i < 20; i++) await Promise.resolve(); +}; + +const cfg: TTSConfig = { ...DEFAULT_TTS_CONFIG, engine: "edge" }; + +describe("EdgeTTSPlayer — per-run runId isolation (#372 reentrancy slice)", () => { + beforeEach(() => { + vi.useFakeTimers(); + installAudioMock(); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.clearAllMocks(); + }); + + it("被取代的旧 run 解码完成后不再触发 onChunkChange", async () => { + const player = new EdgeTTSPlayer(); + const onChunk = vi.fn(); + player.onChunkChange = onChunk; + + // Run A:停在 decodeAudioData(resolver[0])。 + player.speak(["a0"], cfg); + await flush(); + expect(decodeResolvers.length).toBe(1); + + // Run B 在同一单例上取代 run A;其解码(resolver[1])保持 pending,故 run B 不会触发 onChunkChange。 + player.speak(["b0"], cfg); + await flush(); + expect(decodeResolvers.length).toBe(2); + + // 在 run A 已被取代之后,才让它的解码完成。 + decodeResolvers[0]({ duration: 1 }); + await flush(); + + // 旧 run 必须完全失活——不得有进度回调泄漏。 + expect(onChunk).not.toHaveBeenCalled(); + }); + + it("stop() 后旧 run 的在途解码完成不再触发 onChunkChange 或排程音频", async () => { + const player = new EdgeTTSPlayer(); + const onChunk = vi.fn(); + player.onChunkChange = onChunk; + + // run 停在 decodeAudioData(resolver[0])。 + player.speak(["a0"], cfg); + await flush(); + expect(decodeResolvers.length).toBe(1); + + // stop() 不 bump runId,仅置 _playing=false。 + player.stop(); + await flush(); + + // 在 stop() 之后才让解码完成——续体须靠 _playing=false 在守卫处 bail。 + decodeResolvers[0]({ duration: 1 }); + await flush(); + + expect(onChunk).not.toHaveBeenCalled(); + }); + + it("resume() 在重入下 reject 不会从 speak() 抛出未处理拒绝", async () => { + class SuspendedRejectingCtx { + state = "suspended"; + currentTime = 0; + destination = {}; + createGain() { + return { connect: vi.fn(), gain: { value: 1 } }; + } + createBufferSource() { + return { buffer: null, connect: vi.fn(), start: vi.fn() }; + } + decodeAudioData() { + return new Promise(() => {}); // 永不 resolve + } + resume() { + return Promise.reject(new Error("InvalidStateError")); + } + suspend() { + return Promise.resolve(); + } + close() { + return Promise.resolve(); + } + } + (globalThis as unknown as { AudioContext: unknown }).AudioContext = SuspendedRejectingCtx; + + const player = new EdgeTTSPlayer(); + const p1 = player.speak(["a0"], cfg); // 停在 await resume()(reject) + player.speak(["b0"], cfg); // 取代 run A,bump runId + await flush(); + await expect(p1).resolves.toBeUndefined(); // run A 干净返回,未 reject + player.stop(); + }); +}); diff --git a/packages/core/src/tts/tts-players.ts b/packages/core/src/tts/tts-players.ts index bac33ea5..afd070e7 100644 --- a/packages/core/src/tts/tts-players.ts +++ b/packages/core/src/tts/tts-players.ts @@ -438,7 +438,6 @@ export class EdgeTTSPlayer implements ITTSPlayer { private chunks: string[] = []; private _playing = false; private _paused = false; - private aborted = false; private hasAudioData = false; private playingNotified = false; private checkEndTimer: ReturnType | null = null; @@ -448,6 +447,9 @@ export class EdgeTTSPlayer implements ITTSPlayer { private producerWake: (() => void) | null = null; private chunkStartTimers = new Set>(); private pausedAt = 0; // Date.now() when suspended (wall-clock ms) + /** Monotonic per-run token, bumped on every speak() to invalidate the previous + * run's in-flight async continuations (mirrors DashScopeTTSPlayer). */ + private runId = 0; private static readonly BUFFER_SIZE = 4; onStateChange?: (state: "playing" | "paused" | "stopped") => void; @@ -461,11 +463,21 @@ export class EdgeTTSPlayer implements ITTSPlayer { return this._paused; } + /** Clear the producer's wake resolver. Extracted into a method so callers' + * control-flow analysis keeps producerWake's declared (() => void) | null + * type — runProducer reassigns it across an un-awaited call TS can't track. */ + private resetProducerWake() { + this.producerWake = null; + } + async speak(text: string | string[], config: TTSConfig) { - this.aborted = true; + // Invalidate any in-flight run immediately: its captured myRun no longer + // equals this.runId, so every continuation/timer below bails on its guard. + const myRun = ++this.runId; this.cleanupAudio(); this.fetchBuffer.clear(); this.producerWake?.(); + this.resetProducerWake(); if (this.checkEndTimer) { clearInterval(this.checkEndTimer); this.checkEndTimer = null; @@ -474,7 +486,6 @@ export class EdgeTTSPlayer implements ITTSPlayer { this.chunks = Array.isArray(text) ? text.filter(Boolean) : splitIntoChunks(text, 800); this._playing = true; this._paused = false; - this.aborted = false; this.allChunksDone = false; this.hasAudioData = false; this.playingNotified = false; @@ -486,10 +497,15 @@ export class EdgeTTSPlayer implements ITTSPlayer { this.scheduledEnd = 0; if (this.audioCtx.state === "suspended") { - await this.audioCtx.resume(); + // 重入下(同步 stop()+speak())后继 run 可能在此 await 期间 close 掉本 ctx, + // 使 resume() reject;吞掉它——下方的 myRun !== this.runId 守卫本就会丢弃本 run。 + await this.audioCtx.resume().catch(() => {}); } + // A newer run may have superseded us during the resume() await. + if (myRun !== this.runId) return; this.checkEndTimer = setInterval(() => { + if (myRun !== this.runId) return; if (!this._playing || this._paused) return; // Also guard against the AudioContext being auto-suspended by the OS // (e.g. iOS background / lock-screen) without us explicitly pausing. @@ -513,12 +529,12 @@ export class EdgeTTSPlayer implements ITTSPlayer { this.producerIndex = 0; this.fetchBuffer.clear(); - this.runProducer(base); + this.runProducer(base, myRun); const prewarmCount = Math.min(EdgeTTSPlayer.BUFFER_SIZE, this.chunks.length); for (let p = 0; p < prewarmCount; p++) { if (this.fetchBuffer.has(p)) continue; - if (!this._playing || this.aborted) return; + if (!this._playing || myRun !== this.runId) return; const promise = fetchEdgeTTSAudio({ text: this.chunks[p], ...base }); promise.catch(() => {}); this.fetchBuffer.set(p, promise); @@ -526,12 +542,15 @@ export class EdgeTTSPlayer implements ITTSPlayer { } for (let i = 0; i < this.chunks.length; i++) { - if (!this._playing || this.aborted) return; + if (!this._playing || myRun !== this.runId) return; try { - const audioData = await this.waitForChunk(i); - if (!this._playing || this.aborted) return; - await this.decodeAndSchedule(audioData, i); + const audioData = await this.waitForChunk(i, myRun); + if (!this._playing || myRun !== this.runId) return; + await this.decodeAndSchedule(audioData, i, myRun); + // Old run resuming here must not delete/wake the new run's buffer. + if (!this._playing || myRun !== this.runId) return; } catch (err) { + if (myRun !== this.runId) return; if ((err as Error)?.message === "aborted") return; console.error("[Edge TTS] chunk error:", err); } @@ -540,22 +559,28 @@ export class EdgeTTSPlayer implements ITTSPlayer { this.producerWake?.(); } + if (myRun !== this.runId) return; this.allChunksDone = true; } - private async runProducer(base: { voice: string; lang: string; rate: number; pitch: number }) { + private async runProducer( + base: { voice: string; lang: string; rate: number; pitch: number }, + myRun: number, + ) { while (this.producerIndex < this.chunks.length) { - if (!this._playing || this.aborted) return; + if (!this._playing || myRun !== this.runId) return; while (this.fetchBuffer.size >= EdgeTTSPlayer.BUFFER_SIZE) { - if (!this._playing || this.aborted) return; + if (!this._playing || myRun !== this.runId) return; await new Promise((resolve) => { this.producerWake = resolve; }); + // Old producer resuming here must not clobber the new run's producerWake. + if (myRun !== this.runId) return; this.producerWake = null; } - if (!this._playing || this.aborted) return; + if (!this._playing || myRun !== this.runId) return; const idx = this.producerIndex++; const promise = fetchEdgeTTSAudio({ text: this.chunks[idx], ...base }); @@ -564,9 +589,9 @@ export class EdgeTTSPlayer implements ITTSPlayer { } } - private async waitForChunk(index: number): Promise { + private async waitForChunk(index: number, myRun: number): Promise { while (!this.fetchBuffer.has(index)) { - if (!this._playing || this.aborted) { + if (!this._playing || myRun !== this.runId) { throw new Error("aborted"); } await new Promise((r) => setTimeout(r, 50)); @@ -574,22 +599,30 @@ export class EdgeTTSPlayer implements ITTSPlayer { return this.fetchBuffer.get(index)!; } - private async decodeAndSchedule(mp3Data: ArrayBuffer, index: number): Promise { - if (!this.audioCtx || !this.gainNode || !this._playing || this.aborted) return; + private async decodeAndSchedule( + mp3Data: ArrayBuffer, + index: number, + myRun: number, + ): Promise { + const ctx = this.audioCtx; + const gain = this.gainNode; + if (!ctx || !gain || !this._playing || myRun !== this.runId) return; - const audioBuffer = await this.audioCtx.decodeAudioData(mp3Data.slice(0)); - if (!this._playing || this.aborted || !this.audioCtx || !this.gainNode) return; + const audioBuffer = await ctx.decodeAudioData(mp3Data.slice(0)); + // Bail if superseded, or if a new run swapped in a different AudioContext — + // never schedule into a ctx that isn't this run's. + if (!this._playing || myRun !== this.runId || this.audioCtx !== ctx || !this.gainNode) return; - const source = this.audioCtx.createBufferSource(); + const source = ctx.createBufferSource(); source.buffer = audioBuffer; - source.connect(this.gainNode); + source.connect(gain); - const startAt = Math.max(this.audioCtx.currentTime, this.scheduledEnd); + const startAt = Math.max(ctx.currentTime, this.scheduledEnd); const notifyChunkStart = () => { - if (!this._playing || this.aborted) return; + if (!this._playing || myRun !== this.runId) return; this.onChunkChange?.(index, this.chunks.length); }; - const startDelayMs = Math.max(0, (startAt - this.audioCtx.currentTime) * 1000); + const startDelayMs = Math.max(0, (startAt - ctx.currentTime) * 1000); if (startDelayMs <= 16) { notifyChunkStart(); } else { @@ -650,7 +683,6 @@ export class EdgeTTSPlayer implements ITTSPlayer { } stop() { - this.aborted = true; if (this.checkEndTimer) { clearInterval(this.checkEndTimer); this.checkEndTimer = null; @@ -658,6 +690,7 @@ export class EdgeTTSPlayer implements ITTSPlayer { this.cleanupAudio(); this.fetchBuffer.clear(); this.producerWake?.(); + this.producerWake = null; this.chunks = []; this._playing = false; this._paused = false;