diff --git a/py/packages/genkit/src/genkit/blocks/generate.py b/py/packages/genkit/src/genkit/blocks/generate.py index 754af90d4e..e138ecd4a4 100644 --- a/py/packages/genkit/src/genkit/blocks/generate.py +++ b/py/packages/genkit/src/genkit/blocks/generate.py @@ -637,7 +637,11 @@ async def _resolve_tool_request(tool: Action, tool_request_part: ToolRequestPart Part( tool_request=tool_request_part.tool_request, metadata={ - **(tool_request_part.metadata if tool_request_part.metadata else {}), + **( + tool_request_part.metadata.root + if isinstance(tool_request_part.metadata, Metadata) + else (tool_request_part.metadata or {}) + ), 'interrupt': (interrupt_error.metadata if interrupt_error.metadata else True), }, ), @@ -815,7 +819,7 @@ def _find_corresponding_tool_response(responses: list[ToolResponsePart], request """ for p in responses: if p.tool_response.name == request.tool_request.name and p.tool_response.ref == request.tool_request.ref: - return p + return Part(root=p) return None diff --git a/py/packages/genkit/src/genkit/blocks/tools.py b/py/packages/genkit/src/genkit/blocks/tools.py index 8c8d0d12e7..d0e5613892 100644 --- a/py/packages/genkit/src/genkit/blocks/tools.py +++ b/py/packages/genkit/src/genkit/blocks/tools.py @@ -17,7 +17,7 @@ from typing import Any from genkit.core.action import ActionRunContext -from genkit.core.typing import Part, ToolRequestPart, ToolResponse +from genkit.core.typing import Metadata, Part, ToolRequestPart, ToolResponse class ToolRunContext(ActionRunContext): @@ -96,6 +96,13 @@ def tool_response( """ # TODO: validate against tool schema tool_request = interrupt.root.tool_request if isinstance(interrupt, Part) else interrupt.tool_request + + interrupt_metadata = True + if isinstance(metadata, Metadata): + interrupt_metadata = metadata.root + elif metadata: + interrupt_metadata = metadata + return Part( tool_response=ToolResponse( name=tool_request.name, @@ -103,6 +110,6 @@ def tool_response( output=response_data, ), metadata={ - 'interruptResponse': metadata if metadata else True, + 'interruptResponse': interrupt_metadata, }, ) diff --git a/py/plugins/google-genai/src/genkit/plugins/google_genai/__init__.py b/py/plugins/google-genai/src/genkit/plugins/google_genai/__init__.py index 7aefb18cf2..1c67d97670 100644 --- a/py/plugins/google-genai/src/genkit/plugins/google_genai/__init__.py +++ b/py/plugins/google-genai/src/genkit/plugins/google_genai/__init__.py @@ -20,7 +20,12 @@ GeminiEmbeddingModels, VertexEmbeddingModels, ) -from genkit.plugins.google_genai.models.gemini import GeminiConfigSchema, GoogleAIGeminiVersion, VertexAIGeminiVersion +from genkit.plugins.google_genai.models.gemini import ( + GeminiConfigSchema, + GeminiImageConfigSchema, + GoogleAIGeminiVersion, + VertexAIGeminiVersion, +) from genkit.plugins.google_genai.models.imagen import ImagenVersion @@ -45,5 +50,6 @@ def package_name() -> str: VertexAIGeminiVersion.__name__, EmbeddingTaskType.__name__, GeminiConfigSchema.__name__, + GeminiImageConfigSchema.__name__, ImagenVersion.__name__, ] diff --git a/py/plugins/google-genai/src/genkit/plugins/google_genai/models/gemini.py b/py/plugins/google-genai/src/genkit/plugins/google_genai/models/gemini.py index e1f4403f47..cb4fce0f4d 100644 --- a/py/plugins/google-genai/src/genkit/plugins/google_genai/models/gemini.py +++ b/py/plugins/google-genai/src/genkit/plugins/google_genai/models/gemini.py @@ -182,6 +182,10 @@ class GeminiConfigSchema(genai_types.GenerateContentConfig): code_execution: bool | None = None response_modalities: list[str] | None = None + thinking_config: dict[str, Any] | None = None + file_search: dict[str, Any] | None = None + url_context: dict[str, Any] | None = None + api_version: str | None = None class GeminiTtsConfigSchema(GeminiConfigSchema): @@ -678,6 +682,11 @@ def _create_tool(self, tool: ToolDefinition) -> genai_types.Tool: Genai tool compatible with Gemini API. """ params = self._convert_schema_property(tool.input_schema) + # Fix for no-arg tools: parameters cannot be None if we want the tool to be callable? + # Actually Google GenAI expects type=OBJECT for params usually. + if not params: + params = genai_types.Schema(type=genai_types.Type.OBJECT, properties={}) + function = genai_types.FunctionDeclaration( name=tool.name, description=tool.description, @@ -741,7 +750,7 @@ def _convert_schema_property( if schema_type == genai_types.Type.OBJECT: schema.properties = {} - properties = input_schema['properties'] + properties = input_schema.get('properties', {}) for key in properties: nested_schema = self._convert_schema_property(properties[key], defs) schema.properties[key] = nested_schema @@ -844,13 +853,59 @@ async def generate(self, request: GenerateRequest, ctx: ActionRunContext) -> Gen if cached_content: request_cfg.cached_content = cached_content.name + client = self._client + # If config specifies an api_version different from default (e.g. 'v1alpha'), + # Create a temporary client with that version, since api_version is a client-level setting. + api_version = None + if request.config: + api_version = getattr(request.config, 'api_version', None) + if not api_version and isinstance(request.config, dict): + api_version = request.config.get('api_version') + + if api_version: + # TODO: Request public API from google-genai maintainers. + # Currently, there is no public way to access the configured api_key, project, or location + # from an existing Client instance. We need to access the private _api_client to + # clone the configuration when overriding the api_version. + # This is brittle and relies on internal implementation details of the google-genai library. + # If the library changes its internal structure (e.g. renames _api_client or _credentials), + # this code WILL BREAK. + api_client = self._client._api_client + kwargs = { + 'vertexai': api_client.vertexai, + 'http_options': {'api_version': api_version}, + } + if api_client.vertexai: + # Vertex AI mode: requires project/location (api_key is optional/unlikely) + if api_client.project: + kwargs['project'] = api_client.project + if api_client.location: + kwargs['location'] = api_client.location + if api_client._credentials: + kwargs['credentials'] = api_client._credentials + # Don't pass api_key if we are in Vertex AI mode with credentials/project + else: + # Google AI mode: primarily uses api_key + if api_client.api_key: + kwargs['api_key'] = api_client.api_key + # Do NOT pass project/location/credentials if in Google AI mode to be safe + if api_client._credentials and not kwargs.get('api_key'): + # Fallback if no api_key but credentials present (unlikely for pure Google AI but possible) + kwargs['credentials'] = api_client._credentials + + client = genai.Client(**kwargs) + if ctx.is_streaming: response = await self._streaming_generate( - request_contents=request_contents, request_cfg=request_cfg, ctx=ctx, model_name=model_name + request_contents=request_contents, + request_cfg=request_cfg, + ctx=ctx, + model_name=model_name, + client=client, ) else: response = await self._generate( - request_contents=request_contents, request_cfg=request_cfg, model_name=model_name + request_contents=request_contents, request_cfg=request_cfg, model_name=model_name, client=client ) response.usage = self._create_usage_stats(request=request, response=response) @@ -862,6 +917,7 @@ async def _generate( request_contents: list[genai_types.Content], request_cfg: genai_types.GenerateContentConfig, model_name: str, + client: genai.Client | None = None, ) -> GenerateResponse: """Call google-genai generate. @@ -885,7 +941,8 @@ async def _generate( fallback=lambda _: '[!! failed to serialize !!]', ), ) - response = await self._client.aio.models.generate_content( + client = client or self._client + response = await client.aio.models.generate_content( model=model_name, contents=request_contents, config=request_cfg ) span.set_attribute('genkit:output', dump_json(response)) @@ -905,6 +962,7 @@ async def _streaming_generate( request_cfg: genai_types.GenerateContentConfig | None, ctx: ActionRunContext, model_name: str, + client: genai.Client | None = None, ) -> GenerateResponse: """Call google-genai generate for streaming. @@ -926,7 +984,8 @@ async def _streaming_generate( 'model': model_name, }), ) - generator = self._client.aio.models.generate_content_stream( + client = client or self._client + generator = client.aio.models.generate_content_stream( model=model_name, contents=request_contents, config=request_cfg ) accumulated_content = [] @@ -989,7 +1048,11 @@ async def _build_messages( continue content_parts: list[genai_types.Part] = [] for p in msg.content: - content_parts.append(PartConverter.to_gemini(p)) + converted = PartConverter.to_gemini(p) + if isinstance(converted, list): + content_parts.extend(converted) + else: + content_parts.append(converted) request_contents.append(genai_types.Content(parts=content_parts, role=msg.role)) if msg.metadata and msg.metadata.get('cache'): @@ -1050,7 +1113,19 @@ def _genkit_to_googleai_cfg(self, request: GenerateRequest) -> genai_types.Gener if request_config.code_execution: tools.extend([genai_types.Tool(code_execution=genai_types.ToolCodeExecution())]) elif isinstance(request_config, dict): - cfg = genai_types.GenerateContentConfig(**request_config) + if 'image_config' in request_config: + cfg = GeminiImageConfigSchema(**request_config) + elif 'speech_config' in request_config: + cfg = GeminiTtsConfigSchema(**request_config) + else: + cfg = GeminiConfigSchema(**request_config) + + if isinstance(cfg, GeminiConfigSchema): + dumped_config = cfg.model_dump(exclude_none=True) + for key in ['code_execution', 'file_search', 'url_context', 'api_version']: + if key in dumped_config: + del dumped_config[key] + cfg = genai_types.GenerateContentConfig(**dumped_config) if request.output: if not cfg: diff --git a/py/plugins/google-genai/src/genkit/plugins/google_genai/models/utils.py b/py/plugins/google-genai/src/genkit/plugins/google_genai/models/utils.py index ec2ab72ed1..76cabf75b3 100644 --- a/py/plugins/google-genai/src/genkit/plugins/google_genai/models/utils.py +++ b/py/plugins/google-genai/src/genkit/plugins/google_genai/models/utils.py @@ -90,6 +90,49 @@ def to_gemini(cls, part: Part) -> genai.types.Part: thought_signature=cls._extract_thought_signature(part.root.metadata), ) if isinstance(part.root, ToolResponsePart): + tool_output = part.root.tool_response.output + parts_to_return = [] + + # Check for multimodal content structure {content: [{media: ...}]} + if isinstance(tool_output, dict) and 'content' in tool_output: + content_list = tool_output['content'] + if isinstance(content_list, list): + # Create a copy to avoid mutating original if that matters, + # but here we just want to separate content from other fields. + clean_output = tool_output.copy() + clean_output.pop('content') + + # Heuristic: if media found, extract it to separate parts. + has_media = False + for item in content_list: + if isinstance(item, dict) and 'media' in item: + has_media = True + media_info = item['media'] + url = media_info.get('url') + content_type = media_info.get('contentType') or media_info.get('content_type') + + if url and url.startswith(cls.DATA): + _, data_str = url.split(',', 1) + data = base64.b64decode(data_str) + parts_to_return.append( + genai.types.Part(inline_data=genai.types.Blob(mime_type=content_type, data=data)) + ) + + if has_media: + # Append the function response part FIRST (contextually correct) + parts_to_return.insert( + 0, + genai.types.Part( + function_response=genai.types.FunctionResponse( + id=part.root.tool_response.ref, + name=part.root.tool_response.name.replace('/', '__'), + response=clean_output, + ) + ), + ) + return parts_to_return + + # Default behavior for standard tool responses return genai.types.Part( function_response=genai.types.FunctionResponse( id=part.root.tool_response.ref, @@ -167,7 +210,7 @@ def from_gemini(cls, part: genai.types.Part, ref: str | None = None) -> Part: metadata=cls._encode_thought_signature(part.thought_signature), ) ) - if part.text: + if part.text is not None: return Part(root=TextPart(text=part.text)) if part.function_call: return Part( diff --git a/py/samples/google-genai-hello/my_room.png b/py/samples/google-genai-hello/my_room.png new file mode 100644 index 0000000000..4b54e0e932 Binary files /dev/null and b/py/samples/google-genai-hello/my_room.png differ diff --git a/py/samples/google-genai-hello/palm_tree.png b/py/samples/google-genai-hello/palm_tree.png new file mode 100644 index 0000000000..79388bf0f8 Binary files /dev/null and b/py/samples/google-genai-hello/palm_tree.png differ diff --git a/py/samples/google-genai-hello/photo.jpg b/py/samples/google-genai-hello/photo.jpg new file mode 100644 index 0000000000..fcf6dfa6ad Binary files /dev/null and b/py/samples/google-genai-hello/photo.jpg differ diff --git a/py/samples/google-genai-hello/src/main.py b/py/samples/google-genai-hello/src/main.py index de7eb51d2f..a4e88a9c5f 100755 --- a/py/samples/google-genai-hello/src/main.py +++ b/py/samples/google-genai-hello/src/main.py @@ -43,11 +43,20 @@ """ import argparse +import asyncio +import base64 +import os +import pathlib +import sys +from enum import Enum import structlog +from google import genai +from google.genai import types as genai_types from pydantic import BaseModel, Field from genkit.ai import Document, Genkit, ToolRunContext, tool_response +from genkit.core.action import ActionRunContext from genkit.plugins.evaluators import ( GenkitEvaluators, GenkitMetricType, @@ -58,12 +67,16 @@ from genkit.plugins.google_genai import ( EmbeddingTaskType, GeminiConfigSchema, + GeminiImageConfigSchema, GoogleAI, ) from genkit.types import ( - ActionRunContext, + GenerateRequest, GenerationCommonConfig, + Media, + MediaPart, Message, + Part, Role, TextPart, ) @@ -82,7 +95,7 @@ ]) ), ], - model='googleai/gemini-3-flash-preview', + model='googleai/gemini-flash-latest', ) @@ -93,16 +106,13 @@ class GablorkenInput(BaseModel): @ai.tool(name='gablorkenTool') -def gablorken_tool(input_: GablorkenInput) -> int: +def gablorken_tool(input_: GablorkenInput) -> dict[str, int]: """Calculate a gablorken. - Args: - input_: The input to calculate gablorken for. - Returns: The calculated gablorken. """ - return input_.value * 3 - 5 + return {'result': input_.value * 3 - 5} @ai.flow() @@ -160,7 +170,7 @@ async def simple_generate_with_interrupts(value: int) -> str: if len(response1.interrupts) == 0: return response1.text - tr = tool_response(response1.interrupts[0], 178) + tr = tool_response(response1.interrupts[0], {'output': 178}) response = await ai.generate( messages=response1.messages, tool_responses=[tr], @@ -193,8 +203,13 @@ async def say_hi(name: str): return resp.text +from typing import Annotated + +from pydantic import Field + + @ai.flow() -async def embed_docs(docs: list[str]): +async def embed_docs(docs: Annotated[list[str], Field(default=[''], description='List of texts to embed')] = ['']): """Generate an embedding for the words in a list. Args: @@ -324,14 +339,368 @@ async def generate_images(name: str, ctx): Returns: The generated response with a function. """ + result = await ai.generate( - model='googleai/gemini-2.5-flash-image', + model='googleai/gemini-3-flash-image-preview', prompt=f'tell me about {name} with photos', - config=GeminiConfigSchema(response_modalities=['text', 'image']).model_dump(exclude_none=True), + config=GeminiConfigSchema(response_modalities=['text', 'image'], api_version='v1alpha').model_dump( + exclude_none=True + ), ) return result +@ai.tool(name='screenshot') +def screenshot() -> dict: + """Takes a screenshot.""" + room_path = pathlib.Path(__file__).parent.parent / 'my_room.png' + with open(room_path, 'rb') as f: + room_b64 = base64.b64encode(f.read()).decode('utf-8') + + return { + 'output': 'success', + 'content': [{'media': {'url': f'data:image/png;base64,{room_b64}', 'contentType': 'image/png'}}], + } + + +@ai.flow() +async def multipart_tool_calling(): + """Multipart tool calling.""" + response = await ai.generate( + model='googleai/gemini-3-pro-preview', + tools=['screenshot'], + config=GenerationCommonConfig(temperature=1), + prompt="Tell me what I'm seeing on the screen.", + ) + return response.text + + +class ThinkingLevel(str, Enum): + LOW = 'LOW' + HIGH = 'HIGH' + + +@ai.flow() +async def thinking_level_pro(level: ThinkingLevel): + """Gemini 3.0 thinkingLevel config (Pro).""" + response = await ai.generate( + model='googleai/gemini-3-pro-preview', + prompt=( + 'Alice, Bob, and Carol each live in a different house on the ' + 'same street: red, green, and blue. The person who lives in the red house ' + 'owns a cat. Bob does not live in the green house. Carol owns a dog. The ' + 'green house is to the left of the red house. Alice does not own a cat. ' + 'The person in the blue house owns a fish. ' + 'Who lives in each house, and what pet do they own? Provide your ' + 'step-by-step reasoning.' + ), + config={ + 'thinking_config': { + 'include_thoughts': True, + 'thinking_level': level.value, + } + }, + ) + return response.text + + +class ThinkingLevelFlash(str, Enum): + MINIMAL = 'MINIMAL' + LOW = 'LOW' + MEDIUM = 'MEDIUM' + HIGH = 'HIGH' + + +@ai.flow() +async def thinking_level_flash(level: ThinkingLevelFlash): + """Gemini 3.0 thinkingLevel config (Flash).""" + response = await ai.generate( + model='googleai/gemini-3-flash-preview', + prompt=( + 'Alice, Bob, and Carol each live in a different house on the ' + 'same street: red, green, and blue. The person who lives in the red house ' + 'owns a cat. Bob does not live in the green house. Carol owns a dog. The ' + 'green house is to the left of the red house. Alice does not own a cat. ' + 'The person in the blue house owns a fish. ' + 'Who lives in each house, and what pet do they own? Provide your ' + 'step-by-step reasoning.' + ), + config={ + 'thinking_config': { + 'include_thoughts': True, + 'thinking_level': level.value, + } + }, + ) + return response.text + + +@ai.flow() +async def gemini_image_editing(): + """Image editing with Gemini.""" + plant_path = pathlib.Path(__file__).parent.parent / 'palm_tree.png' + room_path = pathlib.Path(__file__).parent.parent / 'my_room.png' + + with open(plant_path, 'rb') as f: + plant_b64 = base64.b64encode(f.read()).decode('utf-8') + with open(room_path, 'rb') as f: + room_b64 = base64.b64encode(f.read()).decode('utf-8') + + response = await ai.generate( + model='googleai/gemini-2.5-flash-image-preview', + prompt=[ + TextPart(text='add the plant to my room'), + MediaPart(media=Media(url=f'data:image/png;base64,{plant_b64}')), + MediaPart(media=Media(url=f'data:image/png;base64,{room_b64}')), + ], + config=GeminiImageConfigSchema( + response_modalities=['TEXT', 'IMAGE'], + image_config={'aspect_ratio': '1:1'}, + api_version='v1alpha', + ).model_dump(exclude_none=True), + ) + for part in response.message.content: + if isinstance(part.root, MediaPart): + return part.root.media + + return None + + +@ai.flow() +async def nano_banana_pro(): + """Nano banana pro config.""" + response = await ai.generate( + model='googleai/gemini-3-pro-image-preview', + prompt='Generate a picture of a sunset in the mountains by a lake', + config={ + 'response_modalities': ['TEXT', 'IMAGE'], + 'image_config': { + 'aspect_ratio': '21:9', + 'image_size': '4K', + }, + 'api_version': 'v1alpha', + }, + ) + for part in response.message.content: + if isinstance(part.root, MediaPart): + return part.root.media + return response.media + + +from typing import Any + + +@ai.flow() +async def photo_move_veo(_: Any, context: Any = None): + """An example of using Ver 3 model to make a static photo move.""" + # Find photo.jpg (or my_room.png) + room_path = pathlib.Path(__file__).parent / 'my_room.png' + if not room_path.exists(): + # Fallback search + room_path = pathlib.Path('samples/google-genai-hello/src/my_room.png') + if not room_path.exists(): + room_path = pathlib.Path('my_room.png') + + encoded_image = '' + if room_path.exists(): + with open(room_path, 'rb') as f: + encoded_image = base64.b64encode(f.read()).decode('utf-8') + else: + # Fallback dummy + encoded_image = ( + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==' + ) + + api_key = os.environ.get('GEMINI_API_KEY') or os.environ.get('GOOGLE_GENAI_API_KEY') + if not api_key: + raise ValueError('GEMINI_API_KEY not set') + + # Use v1alpha for Veo + client = genai.Client(api_key=api_key, http_options={'api_version': 'v1alpha'}) + + # Prompt construction + prompt_parts = [ + genai_types.Part(text='make the subject in the photo move'), + genai_types.Part(inline_data=genai_types.Blob(mime_type='image/jpeg', data=base64.b64decode(encoded_image))), + ] + + # Send chunk equivalent + if context: + context.send_chunk(f'Starting generation with veo-3.0-generate-001...') + + try: + operation = await client.aio.models.generate_videos( + model='veo-3.0-generate-001', + prompt='make the subject in the photo move', + image=genai_types.Image(image_bytes=base64.b64decode(encoded_image), mime_type='image/png'), + config={ + # 'aspect_ratio': '9:16', + }, + ) + + if not operation: + raise ValueError('Expected operation to be returned') + + while not operation.done: + op_id = operation.name.split('/')[-1] if operation.name else 'unknown' + if context: + context.send_chunk(f'check status of operation {op_id}') + + # Poll + operation = await client.aio.operations.get(operation) + await asyncio.sleep(5) + + if operation.error: + if context: + context.send_chunk(f'Error: {operation.error.message}') + raise ValueError(f'Failed to generate video: {operation.error.message}') + + # Done + result_info = 'Video generated successfully.' + if hasattr(operation, 'result') and operation.result: + if hasattr(operation.result, 'generated_videos') and operation.result.generated_videos: + vid = operation.result.generated_videos[0] + if vid.video and vid.video.uri: + result_info += f' URI: {vid.video.uri}' + + if context: + context.send_chunk(f'Done! {result_info}') + + return operation + + except Exception as e: + raise ValueError(f'Flow failed: {e}') + + + + +@ai.flow() +async def gemini_media_resolution(): + """Media resolution.""" + # Placeholder base64 for sample + plant_path = pathlib.Path(__file__).parent.parent / 'palm_tree.png' + with open(plant_path, 'rb') as f: + plant_b64 = base64.b64encode(f.read()).decode('utf-8') + response = await ai.generate( + model='googleai/gemini-3-pro-preview', + prompt=[ + TextPart(text='What is in this picture?'), + MediaPart( + media=Media(url=f'data:image/png;base64,{plant_b64}'), + metadata={'mediaResolution': {'level': 'MEDIA_RESOLUTION_HIGH'}}, + ), + ], + config={'api_version': 'v1alpha'}, + ) + return response.text + + +@ai.flow() +async def search_grounding(): + """Search grounding.""" + response = await ai.generate( + model='googleai/gemini-3-flash-preview', + prompt='Who is Albert Einstein?', + config={'tools': [{'googleSearch': {}}], 'api_version': 'v1alpha'}, + ) + return response.text + + +@ai.flow() +async def url_context(): + """Url context.""" + response = await ai.generate( + model='googleai/gemini-3-flash-preview', + prompt='Compare the ingredients and cooking times from the recipes at https://www.foodnetwork.com/recipes/ina-garten/perfect-roast-chicken-recipe-1940592 and https://www.allrecipes.com/recipe/70679/simple-whole-roasted-chicken/', + config={'url_context': {}, 'api_version': 'v1alpha'}, + ) + return response.text + + +@ai.flow() +async def file_search(): + """File Search.""" + # TODO: add file search store + store_name = 'fileSearchStores/sample-store' + response = await ai.generate( + model='googleai/gemini-3-flash-preview', + prompt="What is the character's name in the story?", + config={ + 'file_search': { + 'file_search_store_names': [store_name], + 'metadata_filter': 'author=foo', + }, + 'api_version': 'v1alpha', + }, + ) + return response.text + + +@ai.flow() +async def multimodal_input(): + """Multimodal input.""" + photo_path = pathlib.Path(__file__).parent.parent / 'photo.jpg' + with open(photo_path, 'rb') as f: + photo_b64 = base64.b64encode(f.read()).decode('utf-8') + + response = await ai.generate( + model='googleai/gemini-2.5-flash', + prompt=[ + TextPart(text='describe this photo'), + MediaPart(media=Media(url=f'data:image/jpeg;base64,{photo_b64}', content_type='image/jpeg')), + ], + ) + return response.text + + +@ai.flow() +async def youtube_videos(): + """YouTube videos.""" + response = await ai.generate( + model='googleai/gemini-3-flash-preview', + prompt=[ + TextPart(text='transcribe this video'), + MediaPart(media=Media(url='https://www.youtube.com/watch?v=3p1P5grjXIQ', content_type='video/mp4')), + ], + config={'api_version': 'v1alpha'}, + ) + return response.text + + +class WeatherInput(BaseModel): + """Input for getting weather.""" + + location: str = Field(description='The city and state, e.g. San Francisco, CA') + + +@ai.tool(name='getWeather') +def get_weather(input_: WeatherInput) -> dict: + """Used to get current weather for a location.""" + return { + 'location': input_.location, + 'temperature_celcius': 21.5, + 'conditions': 'cloudy', + } + + +@ai.tool(name='celsiusToFahrenheit') +def celsius_to_fahrenheit(celsius: float) -> float: + """Converts Celsius to Fahrenheit.""" + return (celsius * 9) / 5 + 32 + + +@ai.flow() +async def tool_calling(location: Annotated[str, Field(default='Paris, France')]): + """Tool calling with Gemini.""" + response = await ai.generate( + model='googleai/gemini-2.5-flash', + tools=['getWeather', 'celsiusToFahrenheit'], + prompt=f"What's the weather in {location}? Convert the temperature to Fahrenheit.", + config=GenerationCommonConfig(temperature=1), + ) + return response.text + + async def main() -> None: """Main function.""" await logger.ainfo(await say_hi(', tell me a joke')) diff --git a/py/samples/google-genai-hello/src/main_vertexai.py b/py/samples/google-genai-hello/src/main_vertexai.py new file mode 100644 index 0000000000..9661566b07 --- /dev/null +++ b/py/samples/google-genai-hello/src/main_vertexai.py @@ -0,0 +1,262 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Hello Google GenAI Vertex AI sample.""" + +import argparse +import base64 +import pathlib +from enum import Enum + +import structlog + +from genkit.ai import Genkit, Media, MediaPart, TextPart +from genkit.plugins.google_genai import GeminiImageConfigSchema, VertexAI +from genkit.types import GenerationCommonConfig + +logger = structlog.get_logger(__name__) + + +ai = Genkit( + plugins=[ + VertexAI(location='us-central1'), + ], + model='vertexai/gemini-2.5-flash', +) + + +class ThinkingLevel(str, Enum): + LOW = 'LOW' + HIGH = 'HIGH' + + +@ai.flow() +async def thinking_level_pro(level: ThinkingLevel): + """Gemini 3.0 thinkingLevel config (Pro).""" + response = await ai.generate( + model='vertexai/gemini-3-pro-preview', + prompt=( + 'Alice, Bob, and Carol each live in a different house on the ' + 'same street: red, green, and blue. The person who lives in the red house ' + 'owns a cat. Bob does not live in the green house. Carol owns a dog. The ' + 'green house is to the left of the red house. Alice does not own a cat. ' + 'The person in the blue house owns a fish. ' + 'Who lives in each house, and what pet do they own? Provide your ' + 'step-by-step reasoning.' + ), + config={ + 'thinking_config': { + 'include_thoughts': True, + 'thinking_level': level.value, + } + }, + ) + return response.text + + +class ThinkingLevelFlash(str, Enum): + MINIMAL = 'MINIMAL' + LOW = 'LOW' + MEDIUM = 'MEDIUM' + HIGH = 'HIGH' + + +@ai.flow() +async def thinking_level_flash(level: ThinkingLevelFlash): + """Gemini 3.0 thinkingLevel config (Flash).""" + response = await ai.generate( + model='vertexai/gemini-3-flash-preview', + prompt=( + 'Alice, Bob, and Carol each live in a different house on the ' + 'same street: red, green, and blue. The person who lives in the red house ' + 'owns a cat. Bob does not live in the green house. Carol owns a dog. The ' + 'green house is to the left of the red house. Alice does not own a cat. ' + 'The person in the blue house owns a fish. ' + 'Who lives in each house, and what pet do they own? Provide your ' + 'step-by-step reasoning.' + ), + config={ + 'thinking_config': { + 'include_thoughts': True, + 'thinking_level': level.value, + } + }, + ) + return response.text + + +@ai.flow() +async def video_understanding_metadata(): + """Video understanding with metadata.""" + response = await ai.generate( + model='vertexai/gemini-2.5-flash', + prompt=[ + MediaPart( + media=Media(url='gs://cloud-samples-data/video/animals.mp4', content_type='video/mp4'), + metadata={ + 'videoMetadata': { + 'fps': 0.5, + 'startOffset': '3.5s', + 'endOffset': '10.2s', + } + }, + ), + TextPart(text='describe this video'), + ], + ) + return response.text + + +@ai.flow() +async def maps_grounding(): + """Google maps grounding.""" + response = await ai.generate( + model='vertexai/gemini-2.5-flash', + prompt='Describe some sights near me', + config={ + 'tools': [{'googleMaps': {'enableWidget': True}}], + 'retrieval_config': { + 'latLng': { + 'latitude': 43.0896, + 'longitude': -79.0849, + }, + }, + }, + ) + return response.text + + +@ai.flow() +async def search_grounding(): + """Search grounding.""" + response = await ai.generate( + model='vertexai/gemini-2.5-flash', + prompt='Who is Albert Einstein?', + config={'tools': [{'googleSearch': {}}]}, + ) + return response.text + + +@ai.flow() +async def gemini_media_resolution(): + """Media resolution.""" + # Placeholder base64 for sample + plant_b64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=' + response = await ai.generate( + model='vertexai/gemini-3-pro-preview', + prompt=[ + TextPart(text='What is in this picture?'), + MediaPart( + media=Media(url=f'data:image/png;base64,{plant_b64}'), + metadata={'mediaResolution': {'level': 'MEDIA_RESOLUTION_HIGH'}}, + ), + ], + ) + return response.text + + +@ai.flow() +async def gemini_image_editing(): + """Image editing with Gemini.""" + plant_path = pathlib.Path(__file__).parent.parent / 'palm_tree.png' + room_path = pathlib.Path(__file__).parent.parent / 'my_room.png' + + with open(plant_path, 'rb') as f: + plant_b64 = base64.b64encode(f.read()).decode('utf-8') + with open(room_path, 'rb') as f: + room_b64 = base64.b64encode(f.read()).decode('utf-8') + + response = await ai.generate( + model='vertexai/gemini-2.5-flash-image-preview', + prompt=[ + TextPart(text='add the plant to my room'), + MediaPart(media=Media(url=f'data:image/png;base64,{plant_b64}')), + MediaPart(media=Media(url=f'data:image/png;base64,{room_b64}')), + ], + config=GeminiImageConfigSchema( + response_modalities=['TEXT', 'IMAGE'], + image_config={'aspect_ratio': '1:1'}, + ).model_dump(exclude_none=True), + ) + + for part in response.message.content: + if isinstance(part.root, MediaPart): + return part.root.media + + return None + + +@ai.flow() +async def nano_banana_pro(): + """Nano banana pro config.""" + response = await ai.generate( + model='vertexai/gemini-3-pro-image-preview', + prompt='Generate a picture of a sunset in the mountains by a lake', + config={ + 'response_modalities': ['TEXT', 'IMAGE'], + 'image_config': { + 'aspect_ratio': '3:4', + 'image_size': '1K', + }, + }, + ) + return response.media + + +@ai.flow() +async def imagen_image_generation(): + """A simple example of image generation with Gemini (Imagen).""" + response = await ai.generate( + model='vertexai/imagen-3.0-generate-002', + prompt='generate an image of a banana riding a bicycle', + ) + return response.media + + +@ai.tool(name='getWeather') +def get_weather(location: str) -> dict: + """Used to get current weather for a location.""" + return { + 'location': location, + 'temperature_celcius': 21.5, + 'conditions': 'cloudy', + } + + +@ai.tool(name='celsiusToFahrenheit') +def celsius_to_fahrenheit(celsius: float) -> float: + """Converts Celsius to Fahrenheit.""" + return (celsius * 9) / 5 + 32 + + +@ai.flow() +async def tool_calling(location: str = 'Paris, France'): + """Tool calling with Gemini.""" + response = await ai.generate( + model='vertexai/gemini-2.5-flash', + tools=['getWeather', 'celsiusToFahrenheit'], + prompt=f"What's the weather in {location}? Convert the temperature to Fahrenheit.", + config=GenerationCommonConfig(temperature=1), + ) + return response.text + + +async def main() -> None: + """Main function.""" + # Example run logic can go here or be empty for pure flow server + pass + + +if __name__ == '__main__': + ai.run_main(main())