diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 64afb7c..5362a46 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -103,6 +103,10 @@ android { useLegacyPackaging = false } } + + testOptions { + unitTests.isReturnDefaultValues = true + } } fun parseLoadAlignments(readelfOutput: String): List { diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt index 021875d..4e48cc8 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureApiClients.kt @@ -239,13 +239,35 @@ internal suspend fun callPuterApi(modelName: String, apiKey: String, chatHistory @Serializable data class ServiceGroqRequest( val model: String, - val messages: List, + val messages: List, val max_tokens: Int = 4096, val temperature: Double = 0.7, val top_p: Double = 1.0, val stream: Boolean = false ) +@Serializable +data class ServiceGroqMessage( + val role: String, + val content: List +) + +@Serializable +@OptIn(ExperimentalSerializationApi::class) +@JsonClassDiscriminator("type") +sealed class ServiceGroqContent + +@Serializable +@SerialName("text") +data class ServiceGroqTextContent(@SerialName("text") val text: String) : ServiceGroqContent() + +@Serializable +@SerialName("image_url") +data class ServiceGroqImageContent(@SerialName("image_url") val imageUrl: ServiceGroqImageUrl) : ServiceGroqContent() + +@Serializable +data class ServiceGroqImageUrl(val url: String) + internal suspend fun callGroqApi(modelName: String, apiKey: String, chatHistory: List, inputContent: Content): Pair { var responseText: String? = null var errorMessage: String? = null @@ -254,12 +276,22 @@ internal suspend fun callGroqApi(modelName: String, apiKey: String, chatHistory: val supportsScreenshot = currentModelOption?.supportsScreenshot ?: true try { - val apiMessages = mutableListOf() + val apiMessages = mutableListOf() (chatHistory + inputContent).forEach { content -> val parts = content.parts.mapNotNull { part -> when (part) { - is TextPart -> if (part.text.isNotBlank()) ServiceMistralTextContent(text = part.text) else null - is ImagePart -> if (supportsScreenshot) ServiceMistralImageContent(imageUrl = "data:image/jpeg;base64,${com.google.ai.sample.util.ImageUtils.bitmapToBase64(part.image)}") else null + is TextPart -> if (part.text.isNotBlank()) ServiceGroqTextContent(text = part.text) else null + is ImagePart -> { + if (supportsScreenshot) { + ServiceGroqImageContent( + imageUrl = ServiceGroqImageUrl( + url = "data:image/jpeg;base64,${com.google.ai.sample.util.ImageUtils.bitmapToBase64(part.image)}" + ) + ) + } else { + null + } + } else -> null } } @@ -269,12 +301,20 @@ internal suspend fun callGroqApi(modelName: String, apiKey: String, chatHistory: "system" -> "system" else -> "assistant" } - apiMessages.add(ServiceMistralMessage(role = role, content = parts)) + apiMessages.add(ServiceGroqMessage(role = role, content = parts)) } } val requestBody = ServiceGroqRequest(model = modelName, messages = apiMessages) - val json = Json { ignoreUnknownKeys = true; serializersModule = SerializersModule { polymorphic(ServiceMistralContent::class) { subclass(ServiceMistralTextContent::class); subclass(ServiceMistralImageContent::class) } } } + val json = Json { + ignoreUnknownKeys = true + serializersModule = SerializersModule { + polymorphic(ServiceGroqContent::class) { + subclass(ServiceGroqTextContent::class) + subclass(ServiceGroqImageContent::class) + } + } + } val mediaType = "application/json".toMediaType() val client = OkHttpClient() val request = Request.Builder() diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt index ac65f2f..17374ff 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt @@ -75,8 +75,9 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { fun clearCommandQueue() { val instance = serviceInstance if (instance != null) { + instance.cancelPendingDelayedScreenshot() instance.commandQueue.clearAndUnlock() - Log.d(TAG, "Command queue cleared and processing flag reset.") + Log.d(TAG, "Command queue cleared, delayed screenshot cancelled, and processing flag reset.") } else { Log.w(TAG, "clearCommandQueue: serviceInstance is null, nothing to clear.") } @@ -141,6 +142,9 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { // Handler for delayed operations private val handler = Handler(Looper.getMainLooper()) // Instance handler + private var pendingScreenshotDelayMillis: Long = 0L + private var pendingDelayedScreenshotRunnable: Runnable? = null + // App name to package mapper private lateinit var appNamePackageMapper: AppNamePackageMapper @@ -234,38 +238,14 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { this.tapAtCoordinates(point.xPx, point.yPx) true // Asynchronous } - is Command.TakeScreenshot -> { - val currentModel = GenerativeAiViewModelFactory.getCurrentModel() - if (!currentModel.supportsScreenshot) { - Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.") - this.showToast("Capturing screen info...", false) - val screenInfo = captureScreenInformation() - val mainActivity = MainActivity.getInstance() - mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation( - Uri.EMPTY, - applicationContext, - screenInfo - ) - false - } else { - Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.") - this.showToast("Preparing screenshot...", false) // Updated toast message - - val screenInfo = captureScreenInformation() // Capture fresh screen info - - val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply { - putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo) - // Set package to ensure only our app's receiver gets it - `package` = applicationContext.packageName - } - applicationContext.sendBroadcast(intent) - Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.") - - // The command is considered "handled" once the broadcast is sent. - // MainActivity and ScreenCaptureService will handle the rest asynchronously. - // Return false to allow the command queue to proceed immediately. - false - } + is Command.TakeScreenshot -> executeTakeScreenshotCommand() + is Command.Wait -> { + pendingScreenshotDelayMillis = command.seconds + .coerceAtLeast(0L) + .coerceAtMost(Long.MAX_VALUE / 1000L) * 1000L + Log.d(TAG, "Command.Wait: Delaying the next takeScreenshot command by ${command.seconds} seconds.") + showToast("Delaying next screenshot by ${command.seconds} seconds", false) + false } is Command.PressHomeButton -> { executeSyncCommandAction( @@ -438,6 +418,63 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { } } + private fun executeTakeScreenshotCommand(): Boolean { + val delayMillis = pendingScreenshotDelayMillis + pendingScreenshotDelayMillis = 0L + + val captureAndRequestScreenshot = { + val currentModel = GenerativeAiViewModelFactory.getCurrentModel() + if (!currentModel.supportsScreenshot) { + Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.") + showToast("Capturing screen info...", false) + val screenInfo = captureScreenInformation() + val mainActivity = MainActivity.getInstance() + mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation( + Uri.EMPTY, + applicationContext, + screenInfo + ) + } else { + Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.") + showToast("Preparing screenshot...", false) + + val screenInfo = captureScreenInformation() + + val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply { + putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo) + `package` = applicationContext.packageName + } + applicationContext.sendBroadcast(intent) + Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.") + } + } + + if (delayMillis <= 0L) { + captureAndRequestScreenshot() + return false + } + + Log.d(TAG, "Command.TakeScreenshot: Waiting ${delayMillis}ms before capturing screen info and screenshot.") + showToast("Waiting ${delayMillis / 1000L} seconds before screenshot...", false) + val delayedScreenshotRunnable = Runnable { + pendingDelayedScreenshotRunnable = null + captureAndRequestScreenshot() + scheduleNextCommandProcessing() + } + pendingDelayedScreenshotRunnable = delayedScreenshotRunnable + handler.postDelayed(delayedScreenshotRunnable, delayMillis) + return true + } + + private fun cancelPendingDelayedScreenshot() { + pendingScreenshotDelayMillis = 0L + pendingDelayedScreenshotRunnable?.let { runnable -> + handler.removeCallbacks(runnable) + Log.d(TAG, "Cancelled pending delayed screenshot.") + } + pendingDelayedScreenshotRunnable = null + } + private fun executeSyncCommandAction( logMessage: String, toastMessage: String, diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt index 7e76089..8849bcb 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt @@ -28,7 +28,9 @@ internal object PhotoReasoningHistoryBuilder { var currentUserContent = "" var currentModelContent = "" - for (message in messages) { + val sanitizedMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages) + + for (message in sanitizedMessages) { when (message.participant) { PhotoParticipant.USER -> { if (currentModelContent.isNotEmpty()) { diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt index b7f2e18..6790cc8 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt @@ -5,14 +5,16 @@ internal object PhotoReasoningMessageMutations { chatState: PhotoReasoningChatState, userMessage: PhotoReasoningMessage ): List { - chatState.addMessage(userMessage) - chatState.addMessage( + val messages = chatState.getAllMessages().toMutableList() + messages.add(userMessage) + messages.add( PhotoReasoningMessage( text = "", participant = PhotoParticipant.MODEL, isPending = true ) ) + chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages)) return chatState.getAllMessages() } diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt index 5cc1d2c..abb9985 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt @@ -349,6 +349,7 @@ fun PhotoReasoningScreen( is Command.ClickButton -> "Click on button: \"${command.buttonText}\"" is Command.TapCoordinates -> "Tap coordinates: (${command.x}, ${command.y})" is Command.TakeScreenshot -> "Take screenshot" + is Command.Wait -> "Wait: ${command.seconds} seconds" is Command.Retrieve -> "Retrieve: \"${command.heading}\"" else -> command::class.simpleName ?: "Unknown Command" } diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt new file mode 100644 index 0000000..4e577fe --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt @@ -0,0 +1,52 @@ +package com.google.ai.sample.feature.multimodal + +internal object PhotoReasoningScreenElementHistoryPolicy { + private const val MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES = 3 + private const val MARKER = "Screen elements:" + private const val NO_LONGER_RELEVANT = "no longer relevant" + private val screenElementsSectionRegex = Regex( + pattern = "(?is)(Screen elements:\\s*).*", + options = setOf(RegexOption.IGNORE_CASE) + ) + + fun sanitizeMessages(messages: List): List { + var remainingRelevant = MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES + val keepRelevantIds = messages + .asReversed() + .filter { hasScreenElements(it.text) && !isAlreadyObsolete(it.text) } + .mapNotNull { message -> + if (remainingRelevant > 0) { + remainingRelevant-- + message.id + } else { + null + } + } + .toSet() + + return messages.map { message -> + if (hasScreenElements(message.text) && !isAlreadyObsolete(message.text) && message.id !in keepRelevantIds) { + message.copy(text = replaceScreenElementsWithObsoleteMarker(message.text)) + } else { + message + } + } + } + + fun hasScreenElements(text: String): Boolean { + return text.contains(MARKER, ignoreCase = true) + } + + private fun isAlreadyObsolete(text: String): Boolean { + val markerIndex = text.indexOf(MARKER, ignoreCase = true) + if (markerIndex < 0) return false + val sectionText = text.substring(markerIndex + MARKER.length).trim() + return sectionText.equals(NO_LONGER_RELEVANT, ignoreCase = true) + } + + private fun replaceScreenElementsWithObsoleteMarker(text: String): String { + return screenElementsSectionRegex.replace(text) { match -> + "${match.groupValues[1]}$NO_LONGER_RELEVANT" + } + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index a05462b..5b1b2b6 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -735,7 +735,7 @@ class PhotoReasoningViewModel( isPending = true ) messages.add(pendingAiMessage) - _chatState.setAllMessages(messages) + _chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages)) _chatMessagesFlow.value = _chatState.getAllMessages() currentReasoningJob?.cancel() // Cancel any previous reasoning job @@ -1181,7 +1181,7 @@ class PhotoReasoningViewModel( val formattedDbEntries = PhotoReasoningTextPolicies.formatDatabaseEntriesAsText(context) if (formattedDbEntries.isNotBlank()) apiMessages.add(CerebrasMessage(role = "user", content = formattedDbEntries)) - _chatState.getAllMessages() + PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) .filter { !it.isPending && it.participant != PhotoParticipant.ERROR } .forEach { message -> val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant" @@ -1315,7 +1315,7 @@ class PhotoReasoningViewModel( if (systemContent.isNotEmpty()) apiMessages.add(MistralMessage(role = "system", content = systemContent)) - _chatState.getAllMessages() + PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) .filter { !it.isPending && it.participant != PhotoParticipant.ERROR } .forEach { message -> val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant" @@ -1466,15 +1466,7 @@ class PhotoReasoningViewModel( imageUris = if (currentModel.supportsScreenshot) (imageUrisForChat ?: emptyList()) else emptyList(), isPending = false ) - _chatState.addMessage(userMessage) - - val pendingAiMessage = PhotoReasoningMessage( - text = "", - participant = PhotoParticipant.MODEL, - isPending = true - ) - _chatState.addMessage(pendingAiMessage) - _chatMessagesFlow.value = _chatState.getAllMessages() + appendUserAndPendingModelMessages(userMessage) _uiState.value = PhotoReasoningUiState.Loading @@ -1499,7 +1491,7 @@ class PhotoReasoningViewModel( } // Add Chat History (exclude the last added user message) - val allMessages = _chatState.getAllMessages() + val allMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) // exclude the last pending message and the last user message we just added val historyMessages = allMessages.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }.dropLast(1) @@ -1627,7 +1619,7 @@ class PhotoReasoningViewModel( } // Add chat history - val messages = _chatState.getAllMessages() + val messages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) messages.forEach { msg -> when (msg.participant) { PhotoParticipant.USER -> { @@ -2431,7 +2423,7 @@ private fun processCommands(text: String) { fun loadChatHistory(context: Context) { val savedMessages = ChatHistoryPreferences.loadChatMessages(context) if (savedMessages.isNotEmpty()) { - _chatState.setAllMessages(savedMessages) + _chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(savedMessages)) _chatMessagesFlow.value = _chatState.getAllMessages() if (isLiveMode) { @@ -2487,6 +2479,12 @@ private fun processCommands(text: String) { * Clear the chat history */ fun clearChatHistory(context: Context? = null) { + stopExecutionFlag.set(true) + currentReasoningJob?.cancel() + commandProcessingJob?.cancel() + ScreenOperatorAccessibilityService.clearCommandQueue() + _showStopNotificationFlow.value = false + // Clear visible messages completely for UI _chatState.setAllMessages(emptyList()) @@ -2532,14 +2530,11 @@ private fun processCommands(text: String) { // Reset retry attempt counter currentRetryAttempt = 0 - // Clear any pending jobs - currentReasoningJob?.cancel() - commandProcessingJob?.cancel() - // Reset UI state _uiState.value = PhotoReasoningUiState.Initial _commandExecutionStatus.value = "" _detectedCommands.value = emptyList() + refreshStopButtonState() } /** diff --git a/app/src/main/kotlin/com/google/ai/sample/util/Command.kt b/app/src/main/kotlin/com/google/ai/sample/util/Command.kt index a44cb09..f82628f 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/Command.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/Command.kt @@ -8,6 +8,7 @@ sealed class Command { data class LongClickButton(val buttonText: String) : Command() data class TapCoordinates(val x: String, val y: String) : Command() object TakeScreenshot : Command() + data class Wait(val seconds: Long) : Command() object PressHomeButton : Command() object PressBackButton : Command() object ShowRecentApps : Command() diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt index 9619e1c..62c920c 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt @@ -11,7 +11,7 @@ object CommandParser { // Enum to represent different command types private enum class CommandTypeEnum { - CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, PRESS_HOME, PRESS_BACK, + CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, WAIT, PRESS_HOME, PRESS_BACK, SHOW_RECENT_APPS, SCROLL_DOWN, SCROLL_UP, SCROLL_LEFT, SCROLL_RIGHT, SCROLL_DOWN_FROM_COORDINATES, SCROLL_UP_FROM_COORDINATES, SCROLL_LEFT_FROM_COORDINATES, SCROLL_RIGHT_FROM_COORDINATES, @@ -53,8 +53,9 @@ object CommandParser { // Tap coordinates patterns PatternInfo("tapCoords1", Regex("(?i)\\btapAtCoordinates\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"), { match -> Command.TapCoordinates(match.groupValues[1], match.groupValues[2]) }, CommandTypeEnum.TAP_COORDINATES), - // Screenshot patterns + // Screenshot and wait patterns PatternInfo("screenshot1", Regex("(?i)\\btakeScreenshot\\(\\)"), { Command.TakeScreenshot }, CommandTypeEnum.TAKE_SCREENSHOT), + PatternInfo("wait1", Regex("(?i)\\bWait\\(\\s*(\\d+)\\s*\\)"), { match -> Command.Wait(match.groupValues[1].toLong()) }, CommandTypeEnum.WAIT), // Home button patterns PatternInfo("home1", Regex("(?i)\\bhome\\(\\)"), { Command.PressHomeButton }, CommandTypeEnum.PRESS_HOME), @@ -150,6 +151,7 @@ object CommandParser { is Command.LongClickButton -> Log.d(TAG, "Command details: LongClickButton(\"${command.buttonText}\")") is Command.TapCoordinates -> Log.d(TAG, "Command details: TapCoordinates(${command.x}, ${command.y})") is Command.TakeScreenshot -> Log.d(TAG, "Command details: TakeScreenshot") + is Command.Wait -> Log.d(TAG, "Command details: Wait(${command.seconds})") is Command.PressHomeButton -> Log.d(TAG, "Command details: PressHomeButton") is Command.PressBackButton -> Log.d(TAG, "Command details: PressBackButton") is Command.ShowRecentApps -> Log.d(TAG, "Command details: ShowRecentApps") diff --git a/app/src/test/java/com/google/ai/sample/ScreenCaptureApiClientsTest.kt b/app/src/test/java/com/google/ai/sample/ScreenCaptureApiClientsTest.kt new file mode 100644 index 0000000..fde282a --- /dev/null +++ b/app/src/test/java/com/google/ai/sample/ScreenCaptureApiClientsTest.kt @@ -0,0 +1,40 @@ +package com.google.ai.sample + +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import kotlinx.serialization.modules.SerializersModule +import kotlinx.serialization.modules.polymorphic +import kotlinx.serialization.modules.subclass +import org.junit.Assert.assertTrue +import org.junit.Test + +class ScreenCaptureApiClientsTest { + @Test + fun serviceGroqRequest_serializesImageUrlAsObject() { + val json = Json { + serializersModule = SerializersModule { + polymorphic(ServiceGroqContent::class) { + subclass(ServiceGroqTextContent::class) + subclass(ServiceGroqImageContent::class) + } + } + } + val request = ServiceGroqRequest( + model = "meta-llama/llama-4-scout-17b-16e-instruct", + messages = listOf( + ServiceGroqMessage( + role = "user", + content = listOf( + ServiceGroqTextContent("look"), + ServiceGroqImageContent(ServiceGroqImageUrl("data:image/jpeg;base64,abc")) + ) + ) + ) + ) + + val encoded = json.encodeToString(ServiceGroqRequest.serializer(), request) + + assertTrue(encoded.contains("\"type\":\"image_url\"")) + assertTrue(encoded.contains("\"image_url\":{\"url\":\"data:image/jpeg;base64,abc\"}")) + } +} diff --git a/app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt b/app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt new file mode 100644 index 0000000..15973ae --- /dev/null +++ b/app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt @@ -0,0 +1,24 @@ +package com.google.ai.sample.feature.multimodal + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Test + +class PhotoReasoningScreenElementHistoryPolicyTest { + @Test + fun sanitizeMessages_keepsOnlyThreeLatestScreenElementSectionsRelevant() { + val messages = (1..4).map { index -> + PhotoReasoningMessage( + text = "Screenshot $index\n\nScreen elements:\n$index. Button $index", + participant = PhotoParticipant.USER + ) + } + + val sanitized = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages) + + assertEquals("Screenshot 1\n\nScreen elements:\nno longer relevant", sanitized[0].text) + assertTrue(sanitized[1].text.contains("Button 2")) + assertTrue(sanitized[2].text.contains("Button 3")) + assertTrue(sanitized[3].text.contains("Button 4")) + } +} diff --git a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt index f51db6b..361b894 100644 --- a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt +++ b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt @@ -67,4 +67,15 @@ class CommandParserTest { assertEquals(1, commands.size) assertTrue(commands.first() is Command.Retrieve) } + + @Test + fun parseCommands_extractsWaitCommand() { + val commands = CommandParser.parseCommands("Wait(7) takeScreenshot()", clearBuffer = true) + + assertEquals(2, commands.size) + val wait = commands.first() + assertTrue(wait is Command.Wait) + assertEquals(7L, (wait as Command.Wait).seconds) + assertTrue(commands[1] is Command.TakeScreenshot) + } }