Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ android {
useLegacyPackaging = false
}
}

testOptions {
unitTests.isReturnDefaultValues = true
}
}

fun parseLoadAlignments(readelfOutput: String): List<Long> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
// Handler for delayed operations
private val handler = Handler(Looper.getMainLooper()) // Instance handler

private var pendingScreenshotDelayMillis: Long = 0L

// App name to package mapper
private lateinit var appNamePackageMapper: AppNamePackageMapper

Expand Down Expand Up @@ -234,38 +236,14 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
this.tapAtCoordinates(point.xPx, point.yPx)
true // Asynchronous
}
is Command.TakeScreenshot -> {
val currentModel = GenerativeAiViewModelFactory.getCurrentModel()
if (!currentModel.supportsScreenshot) {
Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.")
this.showToast("Capturing screen info...", false)
val screenInfo = captureScreenInformation()
val mainActivity = MainActivity.getInstance()
mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation(
Uri.EMPTY,
applicationContext,
screenInfo
)
false
} else {
Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.")
this.showToast("Preparing screenshot...", false) // Updated toast message

val screenInfo = captureScreenInformation() // Capture fresh screen info

val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply {
putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo)
// Set package to ensure only our app's receiver gets it
`package` = applicationContext.packageName
}
applicationContext.sendBroadcast(intent)
Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.")

// The command is considered "handled" once the broadcast is sent.
// MainActivity and ScreenCaptureService will handle the rest asynchronously.
// Return false to allow the command queue to proceed immediately.
false
}
is Command.TakeScreenshot -> executeTakeScreenshotCommand()
is Command.Wait -> {
pendingScreenshotDelayMillis = command.seconds
.coerceAtLeast(0L)
.coerceAtMost(Long.MAX_VALUE / 1000L) * 1000L
Log.d(TAG, "Command.Wait: Delaying the next takeScreenshot command by ${command.seconds} seconds.")
showToast("Delaying next screenshot by ${command.seconds} seconds", false)
false
}
is Command.PressHomeButton -> {
executeSyncCommandAction(
Expand Down Expand Up @@ -438,6 +416,51 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
}
}

private fun executeTakeScreenshotCommand(): Boolean {
val delayMillis = pendingScreenshotDelayMillis
pendingScreenshotDelayMillis = 0L

val captureAndRequestScreenshot = {
val currentModel = GenerativeAiViewModelFactory.getCurrentModel()
if (!currentModel.supportsScreenshot) {
Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.")
showToast("Capturing screen info...", false)
val screenInfo = captureScreenInformation()
val mainActivity = MainActivity.getInstance()
mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation(
Uri.EMPTY,
applicationContext,
screenInfo
)
} else {
Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.")
showToast("Preparing screenshot...", false)

val screenInfo = captureScreenInformation()

val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply {
putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo)
`package` = applicationContext.packageName
}
applicationContext.sendBroadcast(intent)
Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.")
}
}

if (delayMillis <= 0L) {
captureAndRequestScreenshot()
return false
}

Log.d(TAG, "Command.TakeScreenshot: Waiting ${delayMillis}ms before capturing screen info and screenshot.")
showToast("Waiting ${delayMillis / 1000L} seconds before screenshot...", false)
handler.postDelayed({
captureAndRequestScreenshot()
scheduleNextCommandProcessing()
}, delayMillis)
return true
}

private fun executeSyncCommandAction(
logMessage: String,
toastMessage: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ internal object PhotoReasoningHistoryBuilder {
var currentUserContent = ""
var currentModelContent = ""

for (message in messages) {
val sanitizedMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages)

for (message in sanitizedMessages) {
when (message.participant) {
PhotoParticipant.USER -> {
if (currentModelContent.isNotEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ internal object PhotoReasoningMessageMutations {
chatState: PhotoReasoningChatState,
userMessage: PhotoReasoningMessage
): List<PhotoReasoningMessage> {
chatState.addMessage(userMessage)
chatState.addMessage(
val messages = chatState.getAllMessages().toMutableList()
messages.add(userMessage)
messages.add(
PhotoReasoningMessage(
text = "",
participant = PhotoParticipant.MODEL,
isPending = true
)
)
chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages))
return chatState.getAllMessages()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ fun PhotoReasoningScreen(
is Command.ClickButton -> "Click on button: \"${command.buttonText}\""
is Command.TapCoordinates -> "Tap coordinates: (${command.x}, ${command.y})"
is Command.TakeScreenshot -> "Take screenshot"
is Command.Wait -> "Wait: ${command.seconds} seconds"
is Command.Retrieve -> "Retrieve: \"${command.heading}\""
else -> command::class.simpleName ?: "Unknown Command"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.google.ai.sample.feature.multimodal

internal object PhotoReasoningScreenElementHistoryPolicy {
private const val MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES = 3
private const val MARKER = "Screen elements:"
private const val NO_LONGER_RELEVANT = "no longer relevant"
private val screenElementsSectionRegex = Regex(
pattern = "(?is)(Screen elements:\\s*).*",
options = setOf(RegexOption.IGNORE_CASE)
)

fun sanitizeMessages(messages: List<PhotoReasoningMessage>): List<PhotoReasoningMessage> {
var remainingRelevant = MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES
val keepRelevantIds = messages
.asReversed()
.filter { hasScreenElements(it.text) && !isAlreadyObsolete(it.text) }
.mapNotNull { message ->
if (remainingRelevant > 0) {
remainingRelevant--
message.id
} else {
null
}
}
.toSet()

return messages.map { message ->
if (hasScreenElements(message.text) && !isAlreadyObsolete(message.text) && message.id !in keepRelevantIds) {
message.copy(text = replaceScreenElementsWithObsoleteMarker(message.text))
} else {
message
}
}
}

fun hasScreenElements(text: String): Boolean {
return text.contains(MARKER, ignoreCase = true)
}

private fun isAlreadyObsolete(text: String): Boolean {
val markerIndex = text.indexOf(MARKER, ignoreCase = true)
if (markerIndex < 0) return false
val sectionText = text.substring(markerIndex + MARKER.length).trim()
return sectionText.equals(NO_LONGER_RELEVANT, ignoreCase = true)
}

private fun replaceScreenElementsWithObsoleteMarker(text: String): String {
return screenElementsSectionRegex.replace(text) { match ->
"${match.groupValues[1]}$NO_LONGER_RELEVANT"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ class PhotoReasoningViewModel(
isPending = true
)
messages.add(pendingAiMessage)
_chatState.setAllMessages(messages)
_chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages))
_chatMessagesFlow.value = _chatState.getAllMessages()

currentReasoningJob?.cancel() // Cancel any previous reasoning job
Expand Down Expand Up @@ -1181,7 +1181,7 @@ class PhotoReasoningViewModel(
val formattedDbEntries = PhotoReasoningTextPolicies.formatDatabaseEntriesAsText(context)
if (formattedDbEntries.isNotBlank())
apiMessages.add(CerebrasMessage(role = "user", content = formattedDbEntries))
_chatState.getAllMessages()
PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages())
.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }
.forEach { message ->
val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant"
Expand Down Expand Up @@ -1315,7 +1315,7 @@ class PhotoReasoningViewModel(
if (systemContent.isNotEmpty())
apiMessages.add(MistralMessage(role = "system", content = systemContent))

_chatState.getAllMessages()
PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages())
.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }
.forEach { message ->
val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant"
Expand Down Expand Up @@ -1466,15 +1466,7 @@ class PhotoReasoningViewModel(
imageUris = if (currentModel.supportsScreenshot) (imageUrisForChat ?: emptyList()) else emptyList(),
isPending = false
)
_chatState.addMessage(userMessage)

val pendingAiMessage = PhotoReasoningMessage(
text = "",
participant = PhotoParticipant.MODEL,
isPending = true
)
_chatState.addMessage(pendingAiMessage)
_chatMessagesFlow.value = _chatState.getAllMessages()
appendUserAndPendingModelMessages(userMessage)

_uiState.value = PhotoReasoningUiState.Loading

Expand All @@ -1499,7 +1491,7 @@ class PhotoReasoningViewModel(
}

// Add Chat History (exclude the last added user message)
val allMessages = _chatState.getAllMessages()
val allMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages())
// exclude the last pending message and the last user message we just added
val historyMessages = allMessages.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }.dropLast(1)

Expand Down Expand Up @@ -1627,7 +1619,7 @@ class PhotoReasoningViewModel(
}

// Add chat history
val messages = _chatState.getAllMessages()
val messages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages())
messages.forEach { msg ->
when (msg.participant) {
PhotoParticipant.USER -> {
Expand Down Expand Up @@ -2431,7 +2423,7 @@ private fun processCommands(text: String) {
fun loadChatHistory(context: Context) {
val savedMessages = ChatHistoryPreferences.loadChatMessages(context)
if (savedMessages.isNotEmpty()) {
_chatState.setAllMessages(savedMessages)
_chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(savedMessages))
_chatMessagesFlow.value = _chatState.getAllMessages()

if (isLiveMode) {
Expand Down
1 change: 1 addition & 0 deletions app/src/main/kotlin/com/google/ai/sample/util/Command.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ sealed class Command {
data class LongClickButton(val buttonText: String) : Command()
data class TapCoordinates(val x: String, val y: String) : Command()
object TakeScreenshot : Command()
data class Wait(val seconds: Long) : Command()
object PressHomeButton : Command()
object PressBackButton : Command()
object ShowRecentApps : Command()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ object CommandParser {

// Enum to represent different command types
private enum class CommandTypeEnum {
CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, PRESS_HOME, PRESS_BACK,
CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, WAIT, PRESS_HOME, PRESS_BACK,
SHOW_RECENT_APPS, SCROLL_DOWN, SCROLL_UP, SCROLL_LEFT, SCROLL_RIGHT,
SCROLL_DOWN_FROM_COORDINATES, SCROLL_UP_FROM_COORDINATES,
SCROLL_LEFT_FROM_COORDINATES, SCROLL_RIGHT_FROM_COORDINATES,
Expand Down Expand Up @@ -53,8 +53,9 @@ object CommandParser {
// Tap coordinates patterns
PatternInfo("tapCoords1", Regex("(?i)\\btapAtCoordinates\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"), { match -> Command.TapCoordinates(match.groupValues[1], match.groupValues[2]) }, CommandTypeEnum.TAP_COORDINATES),

// Screenshot patterns
// Screenshot and wait patterns
PatternInfo("screenshot1", Regex("(?i)\\btakeScreenshot\\(\\)"), { Command.TakeScreenshot }, CommandTypeEnum.TAKE_SCREENSHOT),
PatternInfo("wait1", Regex("(?i)\\bWait\\(\\s*(\\d+)\\s*\\)"), { match -> Command.Wait(match.groupValues[1].toLong()) }, CommandTypeEnum.WAIT),

// Home button patterns
PatternInfo("home1", Regex("(?i)\\bhome\\(\\)"), { Command.PressHomeButton }, CommandTypeEnum.PRESS_HOME),
Expand Down Expand Up @@ -150,6 +151,7 @@ object CommandParser {
is Command.LongClickButton -> Log.d(TAG, "Command details: LongClickButton(\"${command.buttonText}\")")
is Command.TapCoordinates -> Log.d(TAG, "Command details: TapCoordinates(${command.x}, ${command.y})")
is Command.TakeScreenshot -> Log.d(TAG, "Command details: TakeScreenshot")
is Command.Wait -> Log.d(TAG, "Command details: Wait(${command.seconds})")
is Command.PressHomeButton -> Log.d(TAG, "Command details: PressHomeButton")
is Command.PressBackButton -> Log.d(TAG, "Command details: PressBackButton")
is Command.ShowRecentApps -> Log.d(TAG, "Command details: ShowRecentApps")
Expand Down
Loading
Loading