From 0a049d210e81e11448e38d949c6e054dd71c314a Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Wed, 6 May 2026 14:21:49 +0200 Subject: [PATCH 1/2] Rename pause command to wait --- app/build.gradle.kts | 4 + .../ScreenOperatorAccessibilityService.kt | 87 ++++++++++++------- .../PhotoReasoningHistoryBuilder.kt | 4 +- .../PhotoReasoningMessageMutations.kt | 6 +- .../multimodal/PhotoReasoningScreen.kt | 1 + ...hotoReasoningScreenElementHistoryPolicy.kt | 52 +++++++++++ .../multimodal/PhotoReasoningViewModel.kt | 22 ++--- .../com/google/ai/sample/util/Command.kt | 1 + .../google/ai/sample/util/CommandParser.kt | 6 +- ...ReasoningScreenElementHistoryPolicyTest.kt | 24 +++++ .../ai/sample/util/CommandParserTest.kt | 11 +++ 11 files changed, 166 insertions(+), 52 deletions(-) create mode 100644 app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt create mode 100644 app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 64afb7c5..5362a462 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -103,6 +103,10 @@ android { useLegacyPackaging = false } } + + testOptions { + unitTests.isReturnDefaultValues = true + } } fun parseLoadAlignments(readelfOutput: String): List { diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt index ac65f2f1..77f3bd97 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt @@ -141,6 +141,8 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { // Handler for delayed operations private val handler = Handler(Looper.getMainLooper()) // Instance handler + private var pendingScreenshotDelayMillis: Long = 0L + // App name to package mapper private lateinit var appNamePackageMapper: AppNamePackageMapper @@ -234,38 +236,14 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { this.tapAtCoordinates(point.xPx, point.yPx) true // Asynchronous } - is Command.TakeScreenshot -> { - val currentModel = GenerativeAiViewModelFactory.getCurrentModel() - if (!currentModel.supportsScreenshot) { - Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.") - this.showToast("Capturing screen info...", false) - val screenInfo = captureScreenInformation() - val mainActivity = MainActivity.getInstance() - mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation( - Uri.EMPTY, - applicationContext, - screenInfo - ) - false - } else { - Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.") - this.showToast("Preparing screenshot...", false) // Updated toast message - - val screenInfo = captureScreenInformation() // Capture fresh screen info - - val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply { - putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo) - // Set package to ensure only our app's receiver gets it - `package` = applicationContext.packageName - } - applicationContext.sendBroadcast(intent) - Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.") - - // The command is considered "handled" once the broadcast is sent. - // MainActivity and ScreenCaptureService will handle the rest asynchronously. - // Return false to allow the command queue to proceed immediately. - false - } + is Command.TakeScreenshot -> executeTakeScreenshotCommand() + is Command.Wait -> { + pendingScreenshotDelayMillis = command.seconds + .coerceAtLeast(0L) + .coerceAtMost(Long.MAX_VALUE / 1000L) * 1000L + Log.d(TAG, "Command.Wait: Delaying the next takeScreenshot command by ${command.seconds} seconds.") + showToast("Delaying next screenshot by ${command.seconds} seconds", false) + false } is Command.PressHomeButton -> { executeSyncCommandAction( @@ -438,6 +416,51 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { } } + private fun executeTakeScreenshotCommand(): Boolean { + val delayMillis = pendingScreenshotDelayMillis + pendingScreenshotDelayMillis = 0L + + val captureAndRequestScreenshot = { + val currentModel = GenerativeAiViewModelFactory.getCurrentModel() + if (!currentModel.supportsScreenshot) { + Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.") + showToast("Capturing screen info...", false) + val screenInfo = captureScreenInformation() + val mainActivity = MainActivity.getInstance() + mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation( + Uri.EMPTY, + applicationContext, + screenInfo + ) + } else { + Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.") + showToast("Preparing screenshot...", false) + + val screenInfo = captureScreenInformation() + + val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply { + putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo) + `package` = applicationContext.packageName + } + applicationContext.sendBroadcast(intent) + Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.") + } + } + + if (delayMillis <= 0L) { + captureAndRequestScreenshot() + return false + } + + Log.d(TAG, "Command.TakeScreenshot: Waiting ${delayMillis}ms before capturing screen info and screenshot.") + showToast("Waiting ${delayMillis / 1000L} seconds before screenshot...", false) + handler.postDelayed({ + captureAndRequestScreenshot() + scheduleNextCommandProcessing() + }, delayMillis) + return true + } + private fun executeSyncCommandAction( logMessage: String, toastMessage: String, diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt index 7e760893..8849bcb5 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningHistoryBuilder.kt @@ -28,7 +28,9 @@ internal object PhotoReasoningHistoryBuilder { var currentUserContent = "" var currentModelContent = "" - for (message in messages) { + val sanitizedMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages) + + for (message in sanitizedMessages) { when (message.participant) { PhotoParticipant.USER -> { if (currentModelContent.isNotEmpty()) { diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt index b7f2e18a..6790cc81 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningMessageMutations.kt @@ -5,14 +5,16 @@ internal object PhotoReasoningMessageMutations { chatState: PhotoReasoningChatState, userMessage: PhotoReasoningMessage ): List { - chatState.addMessage(userMessage) - chatState.addMessage( + val messages = chatState.getAllMessages().toMutableList() + messages.add(userMessage) + messages.add( PhotoReasoningMessage( text = "", participant = PhotoParticipant.MODEL, isPending = true ) ) + chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages)) return chatState.getAllMessages() } diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt index 5cc1d2ce..abb9985e 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt @@ -349,6 +349,7 @@ fun PhotoReasoningScreen( is Command.ClickButton -> "Click on button: \"${command.buttonText}\"" is Command.TapCoordinates -> "Tap coordinates: (${command.x}, ${command.y})" is Command.TakeScreenshot -> "Take screenshot" + is Command.Wait -> "Wait: ${command.seconds} seconds" is Command.Retrieve -> "Retrieve: \"${command.heading}\"" else -> command::class.simpleName ?: "Unknown Command" } diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt new file mode 100644 index 00000000..4e577fe1 --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicy.kt @@ -0,0 +1,52 @@ +package com.google.ai.sample.feature.multimodal + +internal object PhotoReasoningScreenElementHistoryPolicy { + private const val MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES = 3 + private const val MARKER = "Screen elements:" + private const val NO_LONGER_RELEVANT = "no longer relevant" + private val screenElementsSectionRegex = Regex( + pattern = "(?is)(Screen elements:\\s*).*", + options = setOf(RegexOption.IGNORE_CASE) + ) + + fun sanitizeMessages(messages: List): List { + var remainingRelevant = MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES + val keepRelevantIds = messages + .asReversed() + .filter { hasScreenElements(it.text) && !isAlreadyObsolete(it.text) } + .mapNotNull { message -> + if (remainingRelevant > 0) { + remainingRelevant-- + message.id + } else { + null + } + } + .toSet() + + return messages.map { message -> + if (hasScreenElements(message.text) && !isAlreadyObsolete(message.text) && message.id !in keepRelevantIds) { + message.copy(text = replaceScreenElementsWithObsoleteMarker(message.text)) + } else { + message + } + } + } + + fun hasScreenElements(text: String): Boolean { + return text.contains(MARKER, ignoreCase = true) + } + + private fun isAlreadyObsolete(text: String): Boolean { + val markerIndex = text.indexOf(MARKER, ignoreCase = true) + if (markerIndex < 0) return false + val sectionText = text.substring(markerIndex + MARKER.length).trim() + return sectionText.equals(NO_LONGER_RELEVANT, ignoreCase = true) + } + + private fun replaceScreenElementsWithObsoleteMarker(text: String): String { + return screenElementsSectionRegex.replace(text) { match -> + "${match.groupValues[1]}$NO_LONGER_RELEVANT" + } + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index a05462b5..659f53b3 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -735,7 +735,7 @@ class PhotoReasoningViewModel( isPending = true ) messages.add(pendingAiMessage) - _chatState.setAllMessages(messages) + _chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages)) _chatMessagesFlow.value = _chatState.getAllMessages() currentReasoningJob?.cancel() // Cancel any previous reasoning job @@ -1181,7 +1181,7 @@ class PhotoReasoningViewModel( val formattedDbEntries = PhotoReasoningTextPolicies.formatDatabaseEntriesAsText(context) if (formattedDbEntries.isNotBlank()) apiMessages.add(CerebrasMessage(role = "user", content = formattedDbEntries)) - _chatState.getAllMessages() + PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) .filter { !it.isPending && it.participant != PhotoParticipant.ERROR } .forEach { message -> val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant" @@ -1315,7 +1315,7 @@ class PhotoReasoningViewModel( if (systemContent.isNotEmpty()) apiMessages.add(MistralMessage(role = "system", content = systemContent)) - _chatState.getAllMessages() + PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) .filter { !it.isPending && it.participant != PhotoParticipant.ERROR } .forEach { message -> val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant" @@ -1466,15 +1466,7 @@ class PhotoReasoningViewModel( imageUris = if (currentModel.supportsScreenshot) (imageUrisForChat ?: emptyList()) else emptyList(), isPending = false ) - _chatState.addMessage(userMessage) - - val pendingAiMessage = PhotoReasoningMessage( - text = "", - participant = PhotoParticipant.MODEL, - isPending = true - ) - _chatState.addMessage(pendingAiMessage) - _chatMessagesFlow.value = _chatState.getAllMessages() + appendUserAndPendingModelMessages(userMessage) _uiState.value = PhotoReasoningUiState.Loading @@ -1499,7 +1491,7 @@ class PhotoReasoningViewModel( } // Add Chat History (exclude the last added user message) - val allMessages = _chatState.getAllMessages() + val allMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) // exclude the last pending message and the last user message we just added val historyMessages = allMessages.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }.dropLast(1) @@ -1627,7 +1619,7 @@ class PhotoReasoningViewModel( } // Add chat history - val messages = _chatState.getAllMessages() + val messages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) messages.forEach { msg -> when (msg.participant) { PhotoParticipant.USER -> { @@ -2431,7 +2423,7 @@ private fun processCommands(text: String) { fun loadChatHistory(context: Context) { val savedMessages = ChatHistoryPreferences.loadChatMessages(context) if (savedMessages.isNotEmpty()) { - _chatState.setAllMessages(savedMessages) + _chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(savedMessages)) _chatMessagesFlow.value = _chatState.getAllMessages() if (isLiveMode) { diff --git a/app/src/main/kotlin/com/google/ai/sample/util/Command.kt b/app/src/main/kotlin/com/google/ai/sample/util/Command.kt index a44cb09b..f82628f4 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/Command.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/Command.kt @@ -8,6 +8,7 @@ sealed class Command { data class LongClickButton(val buttonText: String) : Command() data class TapCoordinates(val x: String, val y: String) : Command() object TakeScreenshot : Command() + data class Wait(val seconds: Long) : Command() object PressHomeButton : Command() object PressBackButton : Command() object ShowRecentApps : Command() diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt index 9619e1c8..62c920cf 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt @@ -11,7 +11,7 @@ object CommandParser { // Enum to represent different command types private enum class CommandTypeEnum { - CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, PRESS_HOME, PRESS_BACK, + CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, WAIT, PRESS_HOME, PRESS_BACK, SHOW_RECENT_APPS, SCROLL_DOWN, SCROLL_UP, SCROLL_LEFT, SCROLL_RIGHT, SCROLL_DOWN_FROM_COORDINATES, SCROLL_UP_FROM_COORDINATES, SCROLL_LEFT_FROM_COORDINATES, SCROLL_RIGHT_FROM_COORDINATES, @@ -53,8 +53,9 @@ object CommandParser { // Tap coordinates patterns PatternInfo("tapCoords1", Regex("(?i)\\btapAtCoordinates\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"), { match -> Command.TapCoordinates(match.groupValues[1], match.groupValues[2]) }, CommandTypeEnum.TAP_COORDINATES), - // Screenshot patterns + // Screenshot and wait patterns PatternInfo("screenshot1", Regex("(?i)\\btakeScreenshot\\(\\)"), { Command.TakeScreenshot }, CommandTypeEnum.TAKE_SCREENSHOT), + PatternInfo("wait1", Regex("(?i)\\bWait\\(\\s*(\\d+)\\s*\\)"), { match -> Command.Wait(match.groupValues[1].toLong()) }, CommandTypeEnum.WAIT), // Home button patterns PatternInfo("home1", Regex("(?i)\\bhome\\(\\)"), { Command.PressHomeButton }, CommandTypeEnum.PRESS_HOME), @@ -150,6 +151,7 @@ object CommandParser { is Command.LongClickButton -> Log.d(TAG, "Command details: LongClickButton(\"${command.buttonText}\")") is Command.TapCoordinates -> Log.d(TAG, "Command details: TapCoordinates(${command.x}, ${command.y})") is Command.TakeScreenshot -> Log.d(TAG, "Command details: TakeScreenshot") + is Command.Wait -> Log.d(TAG, "Command details: Wait(${command.seconds})") is Command.PressHomeButton -> Log.d(TAG, "Command details: PressHomeButton") is Command.PressBackButton -> Log.d(TAG, "Command details: PressBackButton") is Command.ShowRecentApps -> Log.d(TAG, "Command details: ShowRecentApps") diff --git a/app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt b/app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt new file mode 100644 index 00000000..15973aef --- /dev/null +++ b/app/src/test/java/com/google/ai/sample/feature/multimodal/PhotoReasoningScreenElementHistoryPolicyTest.kt @@ -0,0 +1,24 @@ +package com.google.ai.sample.feature.multimodal + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Test + +class PhotoReasoningScreenElementHistoryPolicyTest { + @Test + fun sanitizeMessages_keepsOnlyThreeLatestScreenElementSectionsRelevant() { + val messages = (1..4).map { index -> + PhotoReasoningMessage( + text = "Screenshot $index\n\nScreen elements:\n$index. Button $index", + participant = PhotoParticipant.USER + ) + } + + val sanitized = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages) + + assertEquals("Screenshot 1\n\nScreen elements:\nno longer relevant", sanitized[0].text) + assertTrue(sanitized[1].text.contains("Button 2")) + assertTrue(sanitized[2].text.contains("Button 3")) + assertTrue(sanitized[3].text.contains("Button 4")) + } +} diff --git a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt index f51db6b7..361b894f 100644 --- a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt +++ b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt @@ -67,4 +67,15 @@ class CommandParserTest { assertEquals(1, commands.size) assertTrue(commands.first() is Command.Retrieve) } + + @Test + fun parseCommands_extractsWaitCommand() { + val commands = CommandParser.parseCommands("Wait(7) takeScreenshot()", clearBuffer = true) + + assertEquals(2, commands.size) + val wait = commands.first() + assertTrue(wait is Command.Wait) + assertEquals(7L, (wait as Command.Wait).seconds) + assertTrue(commands[1] is Command.TakeScreenshot) + } } From 3465a955e0e70aca5b87d327eb999c96bf1f8afd Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Wed, 6 May 2026 14:26:49 +0200 Subject: [PATCH 2/2] Update default system message on first start --- .../com/google/ai/sample/util/SystemMessagePreferences.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt index 9c231ec1..31defa68 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt @@ -14,7 +14,7 @@ object SystemMessagePreferences { private const val KEY_FIRST_START_COMPLETED = "first_start_completed" // New flag // Content from pasted_content.txt - private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "takeScreenshot()" Termux("command") To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and do not use takeScreenshot() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. You can see the screen and get additional Informations about them with: "takeScreenshot()" You need this command at the end of every message until you are finish. When you're done don't say "takeScreenshot()"""" + private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "takeScreenshot()" "Wait(seconds)" "Termux("command")" To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and do not use takeScreenshot() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. You can see the screen and get additional Informations about them with: "takeScreenshot()" You need this command at the end of every message until you are finish. When you're done don't say "takeScreenshot()"""" private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) /**