diff --git a/core/src/main/java/com/google/adk/events/Event.java b/core/src/main/java/com/google/adk/events/Event.java index cde4e4452..be2883f43 100644 --- a/core/src/main/java/com/google/adk/events/Event.java +++ b/core/src/main/java/com/google/adk/events/Event.java @@ -33,6 +33,7 @@ import com.google.genai.types.FunctionResponse; import com.google.genai.types.GenerateContentResponseUsageMetadata; import com.google.genai.types.GroundingMetadata; +import com.google.genai.types.Transcription; import java.time.Instant; import java.util.List; import java.util.Objects; @@ -42,6 +43,7 @@ import org.jspecify.annotations.Nullable; // TODO - b/413761119 update Agent.java when resolved. + /** Represents an event in a session. */ @JsonDeserialize(builder = Event.Builder.class) public class Event extends JsonBaseModel { @@ -64,6 +66,9 @@ public class Event extends JsonBaseModel { private @Nullable GroundingMetadata groundingMetadata; private @Nullable List customMetadata; private @Nullable String modelVersion; + private @Nullable Transcription inputTranscription; + private @Nullable Transcription outputTranscription; + private long timestamp; private Event() {} @@ -266,6 +271,32 @@ public void setModelVersion(@Nullable String modelVersion) { this.modelVersion = modelVersion; } + /** + * Input transcription. The transcription is independent to the model turn which means it doesn’t + * imply any ordering between transcription and model turn. + */ + @JsonProperty("inputTranscription") + public Optional inputTranscription() { + return Optional.ofNullable(inputTranscription); + } + + public void setInputTranscription(@Nullable Transcription inputTranscription) { + this.inputTranscription = inputTranscription; + } + + /** + * Output transcription. The transcription is independent to the model turn which means it doesn’t + * imply any ordering between transcription and model turn. + */ + @JsonProperty("outputTranscription") + public Optional outputTranscription() { + return Optional.ofNullable(outputTranscription); + } + + public void setOutputTranscription(@Nullable Transcription outputTranscription) { + this.outputTranscription = outputTranscription; + } + /** The timestamp of the event. */ @JsonProperty("timestamp") public long timestamp() { @@ -362,6 +393,8 @@ public static class Builder { private @Nullable GroundingMetadata groundingMetadata; private @Nullable List customMetadata; private @Nullable String modelVersion; + private @Nullable Transcription inputTranscription; + private @Nullable Transcription outputTranscription; private @Nullable Long timestamp; @JsonCreator @@ -520,6 +553,20 @@ public Builder modelVersion(@Nullable String value) { return this; } + @CanIgnoreReturnValue + @JsonProperty("inputTranscription") + public Builder inputTranscription(@Nullable Transcription value) { + this.inputTranscription = value; + return this; + } + + @CanIgnoreReturnValue + @JsonProperty("outputTranscription") + public Builder outputTranscription(@Nullable Transcription value) { + this.outputTranscription = value; + return this; + } + public Event build() { Event event = new Event(); event.setId(id); @@ -541,6 +588,8 @@ public Event build() { event.setModelVersion(modelVersion); event.setActions(actions().orElseGet(() -> EventActions.builder().build())); event.setTimestamp(timestamp().orElseGet(() -> Instant.now().toEpochMilli())); + event.setInputTranscription(inputTranscription); + event.setOutputTranscription(outputTranscription); return event; } } @@ -575,7 +624,9 @@ public Builder toBuilder() { .branch(this.branch) .groundingMetadata(this.groundingMetadata) .customMetadata(this.customMetadata) - .modelVersion(this.modelVersion); + .modelVersion(this.modelVersion) + .inputTranscription(this.inputTranscription) + .outputTranscription(this.outputTranscription); if (this.timestamp != 0) { builder.timestamp(this.timestamp); } @@ -608,7 +659,9 @@ public boolean equals(Object obj) { && Objects.equals(branch, other.branch) && Objects.equals(groundingMetadata, other.groundingMetadata) && Objects.equals(customMetadata, other.customMetadata) - && Objects.equals(modelVersion, other.modelVersion); + && Objects.equals(modelVersion, other.modelVersion) + && Objects.equals(inputTranscription, other.inputTranscription) + && Objects.equals(outputTranscription, other.outputTranscription); } @Override @@ -637,6 +690,8 @@ public int hashCode() { groundingMetadata, customMetadata, modelVersion, + inputTranscription, + outputTranscription, timestamp); } } diff --git a/core/src/main/java/com/google/adk/flows/llmflows/BaseLlmFlow.java b/core/src/main/java/com/google/adk/flows/llmflows/BaseLlmFlow.java index 48d141819..fffeab698 100644 --- a/core/src/main/java/com/google/adk/flows/llmflows/BaseLlmFlow.java +++ b/core/src/main/java/com/google/adk/flows/llmflows/BaseLlmFlow.java @@ -677,7 +677,9 @@ private Flowable buildPostprocessingEvents( && updatedResponse.errorCode().isEmpty() && !updatedResponse.interrupted().orElse(false) && !updatedResponse.turnComplete().orElse(false) - && updatedResponse.usageMetadata().isEmpty()) { + && updatedResponse.usageMetadata().isEmpty() + && updatedResponse.inputTranscription().isEmpty() + && updatedResponse.outputTranscription().isEmpty()) { return processorEvents; } @@ -740,7 +742,9 @@ private Event buildModelResponseEvent( .avgLogprobs(llmResponse.avgLogprobs().orElse(null)) .finishReason(llmResponse.finishReason().orElse(null)) .usageMetadata(llmResponse.usageMetadata().orElse(null)) - .modelVersion(llmResponse.modelVersion().orElse(null)); + .modelVersion(llmResponse.modelVersion().orElse(null)) + .inputTranscription(llmResponse.inputTranscription().orElse(null)) + .outputTranscription(llmResponse.outputTranscription().orElse(null)); Event event = eventBuilder.build(); diff --git a/core/src/main/java/com/google/adk/models/GeminiLlmConnection.java b/core/src/main/java/com/google/adk/models/GeminiLlmConnection.java index 7585d3a7a..1f3d0b8c5 100644 --- a/core/src/main/java/com/google/adk/models/GeminiLlmConnection.java +++ b/core/src/main/java/com/google/adk/models/GeminiLlmConnection.java @@ -192,6 +192,8 @@ private static LlmResponse createServerContentResponse(LiveServerContent serverC .partial(serverContent.turnComplete().map(completed -> !completed).orElse(false)) .turnComplete(serverContent.turnComplete().orElse(false)) .interrupted(serverContent.interrupted().orElse(null)) + .inputTranscription(serverContent.inputTranscription().orElse(null)) + .outputTranscription(serverContent.outputTranscription().orElse(null)) .build(); } diff --git a/core/src/main/java/com/google/adk/models/LlmResponse.java b/core/src/main/java/com/google/adk/models/LlmResponse.java index 560e2abcc..37577edc8 100644 --- a/core/src/main/java/com/google/adk/models/LlmResponse.java +++ b/core/src/main/java/com/google/adk/models/LlmResponse.java @@ -31,6 +31,7 @@ import com.google.genai.types.GenerateContentResponsePromptFeedback; import com.google.genai.types.GenerateContentResponseUsageMetadata; import com.google.genai.types.GroundingMetadata; +import com.google.genai.types.Transcription; import java.util.List; import java.util.Optional; import org.jspecify.annotations.Nullable; @@ -115,6 +116,20 @@ public abstract class LlmResponse extends JsonBaseModel { @JsonProperty("modelVersion") public abstract Optional modelVersion(); + /** + * Input transcription. The transcription is independent to the model turn which means it doesn’t + * imply any ordering between transcription and model turn. + */ + @JsonProperty("inputTranscription") + public abstract Optional inputTranscription(); + + /** + * Output transcription. The transcription is independent to the model turn which means it doesn’t + * imply any ordering between transcription and model turn. + */ + @JsonProperty("outputTranscription") + public abstract Optional outputTranscription(); + public abstract Builder toBuilder(); /** Builder for constructing {@link LlmResponse} instances. */ @@ -164,6 +179,12 @@ public abstract Builder usageMetadata( @JsonProperty("modelVersion") public abstract Builder modelVersion(@Nullable String modelVersion); + @JsonProperty("inputTranscription") + public abstract Builder inputTranscription(@Nullable Transcription inputTranscription); + + @JsonProperty("outputTranscription") + public abstract Builder outputTranscription(@Nullable Transcription outputTranscription); + @CanIgnoreReturnValue public final Builder response(GenerateContentResponse response) { Optional> candidatesOpt = response.candidates(); diff --git a/core/src/test/java/com/google/adk/events/EventTest.java b/core/src/test/java/com/google/adk/events/EventTest.java index a4feab5c1..38186c682 100644 --- a/core/src/test/java/com/google/adk/events/EventTest.java +++ b/core/src/test/java/com/google/adk/events/EventTest.java @@ -26,6 +26,7 @@ import com.google.genai.types.FunctionCall; import com.google.genai.types.GenerateContentResponseUsageMetadata; import com.google.genai.types.Part; +import com.google.genai.types.Transcription; import java.time.Instant; import java.util.concurrent.ConcurrentHashMap; import org.junit.Test; @@ -192,6 +193,81 @@ public void event_json_serialization_works() throws Exception { assertThat(deserializedEvent).isEqualTo(EVENT); } + @Test + public void event_builder_with_transcriptions_works() { + Transcription inputTranscription = + Transcription.builder().text("user said hello").finished(true).build(); + Transcription outputTranscription = + Transcription.builder().text("model said hi").finished(false).build(); + Event event = + Event.builder() + .id("event_id") + .invocationId("invocation_id") + .author("agent") + .timestamp(123456789L) + .inputTranscription(inputTranscription) + .outputTranscription(outputTranscription) + .build(); + + assertThat(event.inputTranscription()).hasValue(inputTranscription); + assertThat(event.outputTranscription()).hasValue(outputTranscription); + } + + @Test + public void event_transcriptions_empty_by_default() { + Event event = + Event.builder().id("event_id").invocationId("invocation_id").author("agent").build(); + + assertThat(event.inputTranscription()).isEmpty(); + assertThat(event.outputTranscription()).isEmpty(); + } + + @Test + public void event_equals_differentiates_transcriptions() { + Transcription transcription = Transcription.builder().text("hello").finished(true).build(); + Event eventWithTranscription = + Event.builder() + .id("event_id") + .invocationId("invocation_id") + .author("agent") + .timestamp(123456789L) + .inputTranscription(transcription) + .build(); + Event eventWithoutTranscription = + Event.builder() + .id("event_id") + .invocationId("invocation_id") + .author("agent") + .timestamp(123456789L) + .build(); + + assertThat(eventWithTranscription).isNotEqualTo(eventWithoutTranscription); + } + + @Test + public void event_json_serialization_with_transcriptions_works() throws Exception { + Transcription inputTranscription = + Transcription.builder().text("user said hello").finished(true).build(); + Transcription outputTranscription = + Transcription.builder().text("model said hi").finished(false).build(); + Event event = + Event.builder() + .id("event_id") + .invocationId("invocation_id") + .author("agent") + .timestamp(123456789L) + .inputTranscription(inputTranscription) + .outputTranscription(outputTranscription) + .build(); + + String json = event.toJson(); + Event deserialized = Event.fromJson(json); + + assertThat(deserialized).isEqualTo(event); + assertThat(deserialized.inputTranscription()).hasValue(inputTranscription); + assertThat(deserialized.outputTranscription()).hasValue(outputTranscription); + } + @Test public void finalResponse_returnsTrueIfNoToolCalls() { Event event = diff --git a/core/src/test/java/com/google/adk/flows/llmflows/BaseLlmFlowTest.java b/core/src/test/java/com/google/adk/flows/llmflows/BaseLlmFlowTest.java index 6cae6c88a..2a06c1f0a 100644 --- a/core/src/test/java/com/google/adk/flows/llmflows/BaseLlmFlowTest.java +++ b/core/src/test/java/com/google/adk/flows/llmflows/BaseLlmFlowTest.java @@ -43,6 +43,7 @@ import com.google.genai.types.FunctionDeclaration; import com.google.genai.types.GenerateContentResponseUsageMetadata; import com.google.genai.types.Part; +import com.google.genai.types.Transcription; import io.opentelemetry.context.Context; import io.opentelemetry.context.ContextKey; import io.opentelemetry.context.Scope; @@ -641,6 +642,94 @@ public void run_contextPropagation() { assertThat(events.get(0).content()).hasValue(content); } + @Test + public void postprocess_onlyInputTranscription_returnsEvent() { + Transcription inputTranscription = + Transcription.builder().text("user said hello").finished(true).build(); + LlmResponse llmResponse = LlmResponse.builder().inputTranscription(inputTranscription).build(); + InvocationContext invocationContext = + createInvocationContext(createTestAgent(createTestLlm(llmResponse))); + BaseLlmFlow baseLlmFlow = createBaseLlmFlowWithoutProcessors(); + Event baseEvent = + Event.builder() + .invocationId(invocationContext.invocationId()) + .author(invocationContext.agent().name()) + .build(); + + List events = + baseLlmFlow + .postprocess( + invocationContext, + baseEvent, + LlmRequest.builder().build(), + llmResponse, + Context.current()) + .toList() + .blockingGet(); + + assertThat(events).hasSize(1); + Event event = getOnlyElement(events); + assertThat(event.inputTranscription()).hasValue(inputTranscription); + assertThat(event.outputTranscription()).isEmpty(); + } + + @Test + public void postprocess_onlyOutputTranscription_returnsEvent() { + Transcription outputTranscription = + Transcription.builder().text("model replied hi").finished(false).build(); + LlmResponse llmResponse = + LlmResponse.builder().outputTranscription(outputTranscription).build(); + InvocationContext invocationContext = + createInvocationContext(createTestAgent(createTestLlm(llmResponse))); + BaseLlmFlow baseLlmFlow = createBaseLlmFlowWithoutProcessors(); + Event baseEvent = + Event.builder() + .invocationId(invocationContext.invocationId()) + .author(invocationContext.agent().name()) + .build(); + + List events = + baseLlmFlow + .postprocess( + invocationContext, + baseEvent, + LlmRequest.builder().build(), + llmResponse, + Context.current()) + .toList() + .blockingGet(); + + assertThat(events).hasSize(1); + Event event = getOnlyElement(events); + assertThat(event.outputTranscription()).hasValue(outputTranscription); + assertThat(event.inputTranscription()).isEmpty(); + } + + @Test + public void run_responseWithTranscriptions_propagatesTranscriptionsToEvent() { + Transcription inputTranscription = + Transcription.builder().text("user said hello").finished(true).build(); + Transcription outputTranscription = + Transcription.builder().text("model replied hi").finished(true).build(); + Content content = Content.fromParts(Part.fromText("model replied hi")); + LlmResponse llmResponse = + LlmResponse.builder() + .content(content) + .inputTranscription(inputTranscription) + .outputTranscription(outputTranscription) + .build(); + TestLlm testLlm = createTestLlm(llmResponse); + InvocationContext invocationContext = createInvocationContext(createTestAgent(testLlm)); + BaseLlmFlow baseLlmFlow = createBaseLlmFlowWithoutProcessors(); + + List events = baseLlmFlow.run(invocationContext).toList().blockingGet(); + + assertThat(events).hasSize(1); + Event event = getOnlyElement(events); + assertThat(event.inputTranscription()).hasValue(inputTranscription); + assertThat(event.outputTranscription()).hasValue(outputTranscription); + } + @Test public void postprocess_noResponseProcessors_onlyUsageMetadata_returnsEvent() { GenerateContentResponseUsageMetadata usageMetadata = diff --git a/core/src/test/java/com/google/adk/models/LlmResponseTest.java b/core/src/test/java/com/google/adk/models/LlmResponseTest.java index 646b89602..4ff758c27 100644 --- a/core/src/test/java/com/google/adk/models/LlmResponseTest.java +++ b/core/src/test/java/com/google/adk/models/LlmResponseTest.java @@ -28,6 +28,7 @@ import com.google.genai.types.FunctionCall; import com.google.genai.types.GenerateContentResponseUsageMetadata; import com.google.genai.types.Part; +import com.google.genai.types.Transcription; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -141,6 +142,47 @@ public void testSerializationAndDeserialization_optionalFieldsEmpty() assertThat(deserializedResponse.usageMetadata()).isEmpty(); } + @Test + public void testSerializationAndDeserialization_withTranscriptions() + throws JsonProcessingException { + Transcription inputTranscription = + Transcription.builder().text("user said hello").finished(true).build(); + Transcription outputTranscription = + Transcription.builder().text("model replied hi").finished(false).build(); + LlmResponse originalResponse = + LlmResponse.builder() + .content(createSampleContent("hello")) + .inputTranscription(inputTranscription) + .outputTranscription(outputTranscription) + .build(); + + String json = originalResponse.toJson(); + JsonNode jsonNode = objectMapper.readTree(json); + + assertThat(jsonNode.has("inputTranscription")).isTrue(); + assertThat(jsonNode.get("inputTranscription").get("text").asText()) + .isEqualTo("user said hello"); + assertThat(jsonNode.get("inputTranscription").get("finished").asBoolean()).isTrue(); + assertThat(jsonNode.has("outputTranscription")).isTrue(); + assertThat(jsonNode.get("outputTranscription").get("text").asText()) + .isEqualTo("model replied hi"); + assertThat(jsonNode.get("outputTranscription").get("finished").asBoolean()).isFalse(); + + LlmResponse deserializedResponse = LlmResponse.fromJsonString(json, LlmResponse.class); + + assertThat(deserializedResponse).isEqualTo(originalResponse); + assertThat(deserializedResponse.inputTranscription()).hasValue(inputTranscription); + assertThat(deserializedResponse.outputTranscription()).hasValue(outputTranscription); + } + + @Test + public void testTranscriptions_emptyByDefault() { + LlmResponse response = LlmResponse.builder().content(createSampleContent("hello")).build(); + + assertThat(response.inputTranscription()).isEmpty(); + assertThat(response.outputTranscription()).isEmpty(); + } + @Test public void testDeserialization_optionalFieldsNullInJson() throws JsonProcessingException {