From 59cdb869cc236f2b75862ff1363b82b2ee237ddb Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 12:26:45 +0100 Subject: [PATCH 1/8] Add Gemini integration with optional dependencies --- fishjam/__init__.py | 4 +- fishjam/integrations/__init__.py | 0 fishjam/integrations/gemini.py | 77 ++++++++++++++++++++++++++++++++ fishjam/version.py | 7 +++ pyproject.toml | 12 +++-- uv.lock | 7 +++ 6 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 fishjam/integrations/__init__.py create mode 100644 fishjam/integrations/gemini.py create mode 100644 fishjam/version.py diff --git a/fishjam/__init__.py b/fishjam/__init__.py index 3778e85..3eb8add 100644 --- a/fishjam/__init__.py +++ b/fishjam/__init__.py @@ -8,7 +8,7 @@ # pylint: disable=locally-disabled, no-name-in-module, import-error # Exceptions and Server Messages -from fishjam import agent, errors, events, peer, room +from fishjam import agent, errors, events, peer, room, version from fishjam._openapi_client.models import PeerMetadata # API @@ -24,6 +24,8 @@ RoomOptions, ) +__version__ = version.__version__ + __all__ = [ "FishjamClient", "FishjamNotifier", diff --git a/fishjam/integrations/__init__.py b/fishjam/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py new file mode 100644 index 0000000..987a354 --- /dev/null +++ b/fishjam/integrations/gemini.py @@ -0,0 +1,77 @@ +try: + from google import genai + from google.auth.credentials import Credentials + from google.genai import types + from google.genai.client import DebugConfig +except ImportError: + raise ImportError( + "To use the Fishjam Gemini integration, you need to import the `gemini` extra." + "Install it with `pip install 'fishjam-server-sdk[gemini]'`" + ) + +from functools import singledispatch +from typing import Optional, Union + +from fishjam import AgentOutputOptions +from fishjam.version import get_version + + +def _get_headers(): + return {"x-goog-api-client": f"fishjam-python-server-sdk/{get_version()}"} + + +@singledispatch +def _add_fishjam_header( + http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]], +) -> Union[types.HttpOptions, types.HttpOptionsDict]: ... + + +@_add_fishjam_header.register +def _(http_options: types.HttpOptions) -> types.HttpOptions: + http_options.headers = (http_options.headers or {}) | _get_headers() + return http_options + + +@_add_fishjam_header.register +def _(http_options: types.HttpOptionsDict) -> types.HttpOptionsDict: + headers = (http_options.get("headers") or {}) | _get_headers() + return http_options | {"headers": headers} + + +@_add_fishjam_header.register +def _(_http_options: None) -> types.HttpOptionsDict: + return {"headers": _get_headers()} + + +class _GeminiIntegration: + def create_client( + self, + vertexai: Optional[bool] = None, + api_key: Optional[str] = None, + credentials: Optional[Credentials] = None, + project: Optional[str] = None, + location: Optional[str] = None, + debug_config: Optional[DebugConfig] = None, + http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]] = None, + ): + full_http_options = _add_fishjam_header(http_options) + + return genai.Client( + vertexai=vertexai, + api_key=api_key, + credentials=credentials, + project=project, + location=location, + debug_config=debug_config, + http_options=full_http_options, + ) + + @property + def GeminiInputAudioSettings(self) -> AgentOutputOptions: + return AgentOutputOptions( + audio_format="pcm16", + audio_sample_rate=16_000, + ) + + +GeminiIntegration = _GeminiIntegration() diff --git a/fishjam/version.py b/fishjam/version.py new file mode 100644 index 0000000..1f076a2 --- /dev/null +++ b/fishjam/version.py @@ -0,0 +1,7 @@ +from importlib.metadata import version + +__version__ = version("fishjam-server-sdk") + + +def get_version(): + return __version__ diff --git a/pyproject.toml b/pyproject.toml index aa8c356..f1c1669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,9 @@ generate_docusaurus = "scripts:generate_docusaurus" update_client = "scripts:update_client" room_manager = "scripts:start_room_manager" +[project.optional-dependencies] +gemini = ["google-genai>=1.43.0"] + [dependency-groups] dev = [ "betterproto[compiler]== 2.0.0b6", @@ -55,7 +58,12 @@ test = [ default-groups = ["dev", "test"] [tool.uv.workspace] -members = ["examples/transcription", ".", "examples/poet_chat", "examples/selective_subscription"] +members = [ + "examples/transcription", + ".", + "examples/poet_chat", + "examples/selective_subscription", +] [tool.hatch.build.targets.sdist] include = ["fishjam"] @@ -86,8 +94,6 @@ convention = "google" "scripts.py" = ["D"] - - [tool.pytest.ini_options] markers = [ "file_component_sources: Tests requiring files uploaded for File Component", diff --git a/uv.lock b/uv.lock index 377ea3f..a946687 100644 --- a/uv.lock +++ b/uv.lock @@ -308,6 +308,11 @@ dependencies = [ { name = "websockets" }, ] +[package.optional-dependencies] +gemini = [ + { name = "google-genai" }, +] + [package.dev-dependencies] dev = [ { name = "betterproto", extra = ["compiler"] }, @@ -332,11 +337,13 @@ requires-dist = [ { name = "attrs", specifier = ">=21.3.0" }, { name = "betterproto", specifier = "==2.0.0b6" }, { name = "flask-cors", specifier = ">=6.0.1,<7" }, + { name = "google-genai", marker = "extra == 'gemini'", specifier = ">=1.43.0" }, { name = "httpx", specifier = ">=0.28.0,<0.29.0" }, { name = "python-dateutil", specifier = ">=2.8.2,<3" }, { name = "urllib3", specifier = ">=1.25.3,<2" }, { name = "websockets", specifier = "~=15.0" }, ] +provides-extras = ["gemini"] [package.metadata.requires-dev] dev = [ From d087b13d54d4f0534145ea8f02d88eb2be864c62 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:08:13 +0100 Subject: [PATCH 2/8] Change exports --- fishjam/__init__.py | 4 +++- fishjam/integrations/gemini.py | 17 ++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fishjam/__init__.py b/fishjam/__init__.py index 3eb8add..ed2ca4b 100644 --- a/fishjam/__init__.py +++ b/fishjam/__init__.py @@ -8,7 +8,7 @@ # pylint: disable=locally-disabled, no-name-in-module, import-error # Exceptions and Server Messages -from fishjam import agent, errors, events, peer, room, version +from fishjam import agent, errors, events, integrations, peer, room, version from fishjam._openapi_client.models import PeerMetadata # API @@ -42,6 +42,8 @@ "room", "peer", "agent", + "integrations", ] + __docformat__ = "restructuredtext" diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py index 987a354..1d21aaf 100644 --- a/fishjam/integrations/gemini.py +++ b/fishjam/integrations/gemini.py @@ -5,7 +5,7 @@ from google.genai.client import DebugConfig except ImportError: raise ImportError( - "To use the Fishjam Gemini integration, you need to import the `gemini` extra." + "To use the Fishjam Gemini integration, you need to import the `gemini` extra. " "Install it with `pip install 'fishjam-server-sdk[gemini]'`" ) @@ -54,6 +54,13 @@ def create_client( debug_config: Optional[DebugConfig] = None, http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]] = None, ): + """Creates and configures a Fishjam-compatible Google GenAI Client. + + See `genai.Client` for configuration options. + + Returns: + genai.Client: An instantiated and configured Gemini client. + """ full_http_options = _add_fishjam_header(http_options) return genai.Client( @@ -68,6 +75,13 @@ def create_client( @property def GeminiInputAudioSettings(self) -> AgentOutputOptions: + """Audio configuration required for Gemini input. + + Gemini requires PCM16 audio at 16,000 Hz for correct processing. + + Returns: + AgentOutputOptions: Agent options compatible with the Gemini Live API. + """ return AgentOutputOptions( audio_format="pcm16", audio_sample_rate=16_000, @@ -75,3 +89,4 @@ def GeminiInputAudioSettings(self) -> AgentOutputOptions: GeminiIntegration = _GeminiIntegration() +"""Integration with the Gemini Live API.""" From 69ad3514d79779b6886fa4b1728a45e7d2b8bd14 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:30:22 +0100 Subject: [PATCH 3/8] Add tests --- fishjam/integrations/gemini.py | 20 +++--- tests/test_gemini.py | 116 +++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 tests/test_gemini.py diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py index 1d21aaf..6d233d4 100644 --- a/fishjam/integrations/gemini.py +++ b/fishjam/integrations/gemini.py @@ -9,7 +9,6 @@ "Install it with `pip install 'fishjam-server-sdk[gemini]'`" ) -from functools import singledispatch from typing import Optional, Union from fishjam import AgentOutputOptions @@ -20,26 +19,29 @@ def _get_headers(): return {"x-goog-api-client": f"fishjam-python-server-sdk/{get_version()}"} -@singledispatch def _add_fishjam_header( http_options: Optional[Union[types.HttpOptions, types.HttpOptionsDict]], -) -> Union[types.HttpOptions, types.HttpOptionsDict]: ... +) -> Union[types.HttpOptions, types.HttpOptionsDict]: + if http_options is None: + return _add_fishjam_header_none() + if isinstance(http_options, types.HttpOptions): + return _add_fishjam_header_object(http_options) + return _add_fishjam_header_dict(http_options) -@_add_fishjam_header.register -def _(http_options: types.HttpOptions) -> types.HttpOptions: +def _add_fishjam_header_object(http_options: types.HttpOptions) -> types.HttpOptions: http_options.headers = (http_options.headers or {}) | _get_headers() return http_options -@_add_fishjam_header.register -def _(http_options: types.HttpOptionsDict) -> types.HttpOptionsDict: +def _add_fishjam_header_dict( + http_options: types.HttpOptionsDict, +) -> types.HttpOptionsDict: headers = (http_options.get("headers") or {}) | _get_headers() return http_options | {"headers": headers} -@_add_fishjam_header.register -def _(_http_options: None) -> types.HttpOptionsDict: +def _add_fishjam_header_none() -> types.HttpOptionsDict: return {"headers": _get_headers()} diff --git a/tests/test_gemini.py b/tests/test_gemini.py new file mode 100644 index 0000000..da55498 --- /dev/null +++ b/tests/test_gemini.py @@ -0,0 +1,116 @@ +from unittest.mock import MagicMock, patch + +import pytest +from google.genai import types + +from fishjam.integrations.gemini import GeminiIntegration +from fishjam.version import get_version + + +@pytest.fixture +def version(): + return get_version() + + +@patch("google.genai.Client") +def test_create_client_passes_all_args(mock_client_cls: MagicMock, version: str): + dummy_credentials = MagicMock() + dummy_debug_config = MagicMock() + + GeminiIntegration.create_client( + vertexai=True, + api_key="test-key", + credentials=dummy_credentials, + project="my-project", + location="us-central1", + debug_config=dummy_debug_config, + ) + + mock_client_cls.assert_called_once() + + kwargs = mock_client_cls.call_args.kwargs + + assert kwargs["vertexai"] is True + assert kwargs["api_key"] == "test-key" + assert kwargs["credentials"] is dummy_credentials + assert kwargs["project"] == "my-project" + assert kwargs["location"] == "us-central1" + assert kwargs["debug_config"] is dummy_debug_config + + assert kwargs["http_options"] == { + "headers": {"x-goog-api-client": f"fishjam-python-server-sdk/{version}"} + } + + +@patch("google.genai.Client") +def test_create_client_with_dict_options_no_headers( + mock_client_cls: MagicMock, version: str +): + GeminiIntegration.create_client(http_options={"timeout": 30}) + + mock_client_cls.assert_called_once() + + assert mock_client_cls.call_args.kwargs["http_options"] == { + "timeout": 30, + "headers": {"x-goog-api-client": f"fishjam-python-server-sdk/{version}"}, + } + + +@patch("google.genai.Client") +def test_create_client_with_dict_options_existing_headers( + mock_client_cls: MagicMock, version: str +): + GeminiIntegration.create_client( + http_options={ + "headers": { + "existing-header": "value", + "x-goog-api-client": "other", + } + } + ) + + mock_client_cls.assert_called_once() + + assert mock_client_cls.call_args.kwargs["http_options"] == { + "headers": { + "existing-header": "value", + "x-goog-api-client": f"fishjam-python-server-sdk/{version}", + }, + } + + +@patch("google.genai.Client") +def test_create_client_with_object_options(mock_client_cls: MagicMock, version: str): + http_options = types.HttpOptions() + + GeminiIntegration.create_client(http_options=http_options) + + mock_client_cls.assert_called_once() + + # Verify the object passed has the correct headers set + actual_options = mock_client_cls.call_args.kwargs["http_options"] + assert actual_options.headers == { + "x-goog-api-client": f"fishjam-python-server-sdk/{version}" + } + + +@patch("google.genai.Client") +def test_create_client_with_object_options_existing_headers( + mock_client_cls: MagicMock, version: str +): + http_options = types.HttpOptions( + headers={ + "user-header": "123", + "x-goog-api-client": "other", + } + ) + + GeminiIntegration.create_client(http_options=http_options) + + mock_client_cls.assert_called_once() + + actual_options = mock_client_cls.call_args.kwargs["http_options"] + assert actual_options.headers == { + "user-header": "123", + "x-goog-api-client": f"fishjam-python-server-sdk/{version}", + } From df45a06219f1c1ca137f73f7f2558aaee228dd40 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:49:46 +0100 Subject: [PATCH 4/8] Update example --- examples/transcription/transcription/config.py | 4 ++-- examples/transcription/transcription/room.py | 8 ++++++-- examples/transcription/transcription/transcription.py | 5 +++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/transcription/transcription/config.py b/examples/transcription/transcription/config.py index 827ce37..cb0c13d 100644 --- a/examples/transcription/transcription/config.py +++ b/examples/transcription/transcription/config.py @@ -4,8 +4,8 @@ FISHJAM_ID = os.getenv("FISHJAM_ID", "") FISHJAM_TOKEN = os.environ["FISHJAM_MANAGEMENT_TOKEN"] -TRANSCRIPTION_MODEL = "gemini-live-2.5-flash-preview" +TRANSCRIPTION_MODEL = "gemini-2.5-flash-native-audio-preview-09-2025" TRANSCRIPTION_CONFIG = LiveConnectConfig( - response_modalities=[Modality.TEXT], + response_modalities=[Modality.AUDIO], input_audio_transcription=AudioTranscriptionConfig(), ) diff --git a/examples/transcription/transcription/room.py b/examples/transcription/transcription/room.py index 4a02ae1..6ee6b9b 100644 --- a/examples/transcription/transcription/room.py +++ b/examples/transcription/transcription/room.py @@ -1,5 +1,6 @@ -from fishjam import FishjamClient, Room +from fishjam import AgentOptions, FishjamClient, Room from fishjam.errors import NotFoundError +from fishjam.integrations.gemini import GeminiIntegration from transcription.worker import BackgroundWorker from .agent import TranscriptionAgent @@ -27,7 +28,10 @@ def _create_room(self): def _create_agent(self): self.agent = TranscriptionAgent( self.room.id, - fishjam.create_agent(self.room.id), + fishjam.create_agent( + self.room.id, + AgentOptions(output=GeminiIntegration.GeminiInputAudioSettings) + ), self._worker, ) diff --git a/examples/transcription/transcription/transcription.py b/examples/transcription/transcription/transcription.py index ee8e72d..daf539f 100644 --- a/examples/transcription/transcription/transcription.py +++ b/examples/transcription/transcription/transcription.py @@ -1,16 +1,17 @@ from asyncio import Event, Queue, TaskGroup from typing import Callable -from google import genai from google.genai.live import AsyncSession from google.genai.types import Blob +from fishjam.integrations.gemini import GeminiIntegration + from .config import TRANSCRIPTION_CONFIG, TRANSCRIPTION_MODEL class TranscriptionSession: def __init__(self, on_text: Callable[[str], None]): - self._gemini = genai.Client() + self._gemini = GeminiIntegration.create_client() self._audio_queue = Queue[bytes]() self._end_event = Event() self._model = TRANSCRIPTION_MODEL From 5f34c37d91861cfccdb2add5613c522bec53ada0 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:52:05 +0100 Subject: [PATCH 5/8] Formatting --- examples/transcription/transcription/room.py | 2 +- fishjam/integrations/gemini.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/transcription/transcription/room.py b/examples/transcription/transcription/room.py index 6ee6b9b..d5fd571 100644 --- a/examples/transcription/transcription/room.py +++ b/examples/transcription/transcription/room.py @@ -30,7 +30,7 @@ def _create_agent(self): self.room.id, fishjam.create_agent( self.room.id, - AgentOptions(output=GeminiIntegration.GeminiInputAudioSettings) + AgentOptions(output=GeminiIntegration.GEMINI_INPUT_AUDIO_SETTINGS), ), self._worker, ) diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py index 6d233d4..bd9d24a 100644 --- a/fishjam/integrations/gemini.py +++ b/fishjam/integrations/gemini.py @@ -76,7 +76,7 @@ def create_client( ) @property - def GeminiInputAudioSettings(self) -> AgentOutputOptions: + def GEMINI_INPUT_AUDIO_SETTINGS(self) -> AgentOutputOptions: """Audio configuration required for Gemini input. Gemini requires PCM16 audio at 16,000 Hz for correct processing. From 00ba8a83caf6004763292ee29399297f213a3a83 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:57:26 +0100 Subject: [PATCH 6/8] Install extras in tests --- tests/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/Dockerfile b/tests/Dockerfile index 4a7c2a0..52c6349 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -17,8 +17,6 @@ COPY examples/selective_subscription/pyproject.toml ./examples/selective_subscri COPY uv.lock . -RUN uv sync --locked --no-install-project +RUN uv sync --locked --no-install-project --all-extras COPY . /app - -RUN uv sync --locked From a30b173867a08b915e6934c92567511f46c534b0 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 15:05:29 +0100 Subject: [PATCH 7/8] Add mime type --- examples/transcription/transcription/transcription.py | 5 ++++- fishjam/integrations/gemini.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/transcription/transcription/transcription.py b/examples/transcription/transcription/transcription.py index daf539f..b71905d 100644 --- a/examples/transcription/transcription/transcription.py +++ b/examples/transcription/transcription/transcription.py @@ -44,7 +44,10 @@ async def _send_loop(self, session: AsyncSession): while True: audio_frame = await self._audio_queue.get() await session.send_realtime_input( - audio=Blob(data=audio_frame, mime_type="audio/pcm;rate=16000") + audio=Blob( + data=audio_frame, + mime_type=GeminiIntegration.GEMINI_AUDIO_MIME_TYPE, + ) ) async def _recv_loop(self, session: AsyncSession): diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py index bd9d24a..d102972 100644 --- a/fishjam/integrations/gemini.py +++ b/fishjam/integrations/gemini.py @@ -89,6 +89,11 @@ def GEMINI_INPUT_AUDIO_SETTINGS(self) -> AgentOutputOptions: audio_sample_rate=16_000, ) + @property + def GEMINI_AUDIO_MIME_TYPE(self) -> str: + """The mime type for Gemini audio input.""" + return "audio/pcm;rate=16000" + GeminiIntegration = _GeminiIntegration() """Integration with the Gemini Live API.""" From a16ea8877cd42f571164d6b2fe76c77901514fe0 Mon Sep 17 00:00:00 2001 From: Tomasz Mazur <47872060+AHGIJMKLKKZNPJKQR@users.noreply.github.com> Date: Fri, 12 Dec 2025 15:13:44 +0100 Subject: [PATCH 8/8] Add output settings --- fishjam/integrations/gemini.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fishjam/integrations/gemini.py b/fishjam/integrations/gemini.py index d102972..7731452 100644 --- a/fishjam/integrations/gemini.py +++ b/fishjam/integrations/gemini.py @@ -12,6 +12,8 @@ from typing import Optional, Union from fishjam import AgentOutputOptions +from fishjam.agent import OutgoingAudioTrackOptions +from fishjam.events import TrackEncoding from fishjam.version import get_version @@ -79,7 +81,7 @@ def create_client( def GEMINI_INPUT_AUDIO_SETTINGS(self) -> AgentOutputOptions: """Audio configuration required for Gemini input. - Gemini requires PCM16 audio at 16,000 Hz for correct processing. + Gemini consumes PCM16 audio at 16,000 Hz. Returns: AgentOutputOptions: Agent options compatible with the Gemini Live API. @@ -89,6 +91,21 @@ def GEMINI_INPUT_AUDIO_SETTINGS(self) -> AgentOutputOptions: audio_sample_rate=16_000, ) + @property + def GEMINI_OUTPUT_AUDIO_SETTINGS(self) -> OutgoingAudioTrackOptions: + """Audio configuration for an agent's output track. + + Gemini produces PCM16 audio at 24,000 Hz. + + Returns: + OutgoingAudioTrackOptions: Track options compatible with the Gemini Live API + """ + return OutgoingAudioTrackOptions( + encoding=TrackEncoding.TRACK_ENCODING_PCM16, + sample_rate=24_000, + channels=1, + ) + @property def GEMINI_AUDIO_MIME_TYPE(self) -> str: """The mime type for Gemini audio input."""