Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ android {
useLegacyPackaging = false
}
}

testOptions {
unitTests.isReturnDefaultValues = true
}
}

fun parseLoadAlignments(readelfOutput: String): List<Long> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,35 @@ internal suspend fun callPuterApi(modelName: String, apiKey: String, chatHistory
@Serializable
data class ServiceGroqRequest(
val model: String,
val messages: List<ServiceMistralMessage>,
val messages: List<ServiceGroqMessage>,
val max_tokens: Int = 4096,
val temperature: Double = 0.7,
val top_p: Double = 1.0,
val stream: Boolean = false
)

@Serializable
data class ServiceGroqMessage(
val role: String,
val content: List<ServiceGroqContent>
)

@Serializable
@OptIn(ExperimentalSerializationApi::class)
@JsonClassDiscriminator("type")
sealed class ServiceGroqContent

@Serializable
@SerialName("text")
data class ServiceGroqTextContent(@SerialName("text") val text: String) : ServiceGroqContent()

@Serializable
@SerialName("image_url")
data class ServiceGroqImageContent(@SerialName("image_url") val imageUrl: ServiceGroqImageUrl) : ServiceGroqContent()

@Serializable
data class ServiceGroqImageUrl(val url: String)

internal suspend fun callGroqApi(modelName: String, apiKey: String, chatHistory: List<Content>, inputContent: Content): Pair<String?, String?> {
var responseText: String? = null
var errorMessage: String? = null
Expand All @@ -254,12 +276,22 @@ internal suspend fun callGroqApi(modelName: String, apiKey: String, chatHistory:
val supportsScreenshot = currentModelOption?.supportsScreenshot ?: true

try {
val apiMessages = mutableListOf<ServiceMistralMessage>()
val apiMessages = mutableListOf<ServiceGroqMessage>()
(chatHistory + inputContent).forEach { content ->
val parts = content.parts.mapNotNull { part ->
when (part) {
is TextPart -> if (part.text.isNotBlank()) ServiceMistralTextContent(text = part.text) else null
is ImagePart -> if (supportsScreenshot) ServiceMistralImageContent(imageUrl = "data:image/jpeg;base64,${com.google.ai.sample.util.ImageUtils.bitmapToBase64(part.image)}") else null
is TextPart -> if (part.text.isNotBlank()) ServiceGroqTextContent(text = part.text) else null
is ImagePart -> {
if (supportsScreenshot) {
ServiceGroqImageContent(
imageUrl = ServiceGroqImageUrl(
url = "data:image/jpeg;base64,${com.google.ai.sample.util.ImageUtils.bitmapToBase64(part.image)}"
)
)
} else {
null
}
}
else -> null
}
}
Expand All @@ -269,12 +301,20 @@ internal suspend fun callGroqApi(modelName: String, apiKey: String, chatHistory:
"system" -> "system"
else -> "assistant"
}
apiMessages.add(ServiceMistralMessage(role = role, content = parts))
apiMessages.add(ServiceGroqMessage(role = role, content = parts))
}
}

val requestBody = ServiceGroqRequest(model = modelName, messages = apiMessages)
val json = Json { ignoreUnknownKeys = true; serializersModule = SerializersModule { polymorphic(ServiceMistralContent::class) { subclass(ServiceMistralTextContent::class); subclass(ServiceMistralImageContent::class) } } }
val json = Json {
ignoreUnknownKeys = true
serializersModule = SerializersModule {
polymorphic(ServiceGroqContent::class) {
subclass(ServiceGroqTextContent::class)
subclass(ServiceGroqImageContent::class)
}
}
}
val mediaType = "application/json".toMediaType()
val client = OkHttpClient()
val request = Request.Builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
fun clearCommandQueue() {
val instance = serviceInstance
if (instance != null) {
instance.cancelPendingDelayedScreenshot()
instance.commandQueue.clearAndUnlock()
Log.d(TAG, "Command queue cleared and processing flag reset.")
Log.d(TAG, "Command queue cleared, delayed screenshot cancelled, and processing flag reset.")
} else {
Log.w(TAG, "clearCommandQueue: serviceInstance is null, nothing to clear.")
}
Expand Down Expand Up @@ -141,6 +142,9 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
// Handler for delayed operations
private val handler = Handler(Looper.getMainLooper()) // Instance handler

private var pendingScreenshotDelayMillis: Long = 0L
private var pendingDelayedScreenshotRunnable: Runnable? = null

// App name to package mapper
private lateinit var appNamePackageMapper: AppNamePackageMapper

Expand Down Expand Up @@ -234,38 +238,14 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
this.tapAtCoordinates(point.xPx, point.yPx)
true // Asynchronous
}
is Command.TakeScreenshot -> {
val currentModel = GenerativeAiViewModelFactory.getCurrentModel()
if (!currentModel.supportsScreenshot) {
Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.")
this.showToast("Capturing screen info...", false)
val screenInfo = captureScreenInformation()
val mainActivity = MainActivity.getInstance()
mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation(
Uri.EMPTY,
applicationContext,
screenInfo
)
false
} else {
Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.")
this.showToast("Preparing screenshot...", false) // Updated toast message

val screenInfo = captureScreenInformation() // Capture fresh screen info

val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply {
putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo)
// Set package to ensure only our app's receiver gets it
`package` = applicationContext.packageName
}
applicationContext.sendBroadcast(intent)
Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.")

// The command is considered "handled" once the broadcast is sent.
// MainActivity and ScreenCaptureService will handle the rest asynchronously.
// Return false to allow the command queue to proceed immediately.
false
}
is Command.TakeScreenshot -> executeTakeScreenshotCommand()
is Command.Wait -> {
pendingScreenshotDelayMillis = command.seconds
.coerceAtLeast(0L)
.coerceAtMost(Long.MAX_VALUE / 1000L) * 1000L
Comment on lines +243 to +245
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛑 Logic Error: Integer overflow risk when multiplying seconds by 1000. If command.seconds equals Long.MAX_VALUE / 1000L (9223372036854775), multiplying by 1000 causes overflow to negative value. The coerceAtMost check happens before multiplication, allowing the overflow.

Suggested change
pendingScreenshotDelayMillis = command.seconds
.coerceAtLeast(0L)
.coerceAtMost(Long.MAX_VALUE / 1000L) * 1000L
pendingScreenshotDelayMillis = command.seconds
.coerceAtLeast(0L)
.coerceAtMost(Long.MAX_VALUE / 1000L - 1L) * 1000L

Log.d(TAG, "Command.Wait: Delaying the next takeScreenshot command by ${command.seconds} seconds.")
showToast("Delaying next screenshot by ${command.seconds} seconds", false)
false
}
is Command.PressHomeButton -> {
executeSyncCommandAction(
Expand Down Expand Up @@ -438,6 +418,63 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
}
}

private fun executeTakeScreenshotCommand(): Boolean {
val delayMillis = pendingScreenshotDelayMillis
pendingScreenshotDelayMillis = 0L

val captureAndRequestScreenshot = {
val currentModel = GenerativeAiViewModelFactory.getCurrentModel()
if (!currentModel.supportsScreenshot) {
Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.")
showToast("Capturing screen info...", false)
val screenInfo = captureScreenInformation()
val mainActivity = MainActivity.getInstance()
mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation(
Uri.EMPTY,
applicationContext,
screenInfo
)
} else {
Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.")
showToast("Preparing screenshot...", false)

val screenInfo = captureScreenInformation()

val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply {
putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo)
`package` = applicationContext.packageName
}
applicationContext.sendBroadcast(intent)
Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.")
}
}

if (delayMillis <= 0L) {
captureAndRequestScreenshot()
return false
}

Log.d(TAG, "Command.TakeScreenshot: Waiting ${delayMillis}ms before capturing screen info and screenshot.")
showToast("Waiting ${delayMillis / 1000L} seconds before screenshot...", false)
val delayedScreenshotRunnable = Runnable {
pendingDelayedScreenshotRunnable = null
captureAndRequestScreenshot()
scheduleNextCommandProcessing()
}
pendingDelayedScreenshotRunnable = delayedScreenshotRunnable
handler.postDelayed(delayedScreenshotRunnable, delayMillis)
return true
}

private fun cancelPendingDelayedScreenshot() {
pendingScreenshotDelayMillis = 0L
pendingDelayedScreenshotRunnable?.let { runnable ->
handler.removeCallbacks(runnable)
Log.d(TAG, "Cancelled pending delayed screenshot.")
}
pendingDelayedScreenshotRunnable = null
}

private fun executeSyncCommandAction(
logMessage: String,
toastMessage: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ internal object PhotoReasoningHistoryBuilder {
var currentUserContent = ""
var currentModelContent = ""

for (message in messages) {
val sanitizedMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages)

for (message in sanitizedMessages) {
when (message.participant) {
PhotoParticipant.USER -> {
if (currentModelContent.isNotEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ internal object PhotoReasoningMessageMutations {
chatState: PhotoReasoningChatState,
userMessage: PhotoReasoningMessage
): List<PhotoReasoningMessage> {
chatState.addMessage(userMessage)
chatState.addMessage(
val messages = chatState.getAllMessages().toMutableList()
messages.add(userMessage)
messages.add(
PhotoReasoningMessage(
text = "",
participant = PhotoParticipant.MODEL,
isPending = true
)
)
chatState.setAllMessages(PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(messages))
return chatState.getAllMessages()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ fun PhotoReasoningScreen(
is Command.ClickButton -> "Click on button: \"${command.buttonText}\""
is Command.TapCoordinates -> "Tap coordinates: (${command.x}, ${command.y})"
is Command.TakeScreenshot -> "Take screenshot"
is Command.Wait -> "Wait: ${command.seconds} seconds"
is Command.Retrieve -> "Retrieve: \"${command.heading}\""
else -> command::class.simpleName ?: "Unknown Command"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.google.ai.sample.feature.multimodal

internal object PhotoReasoningScreenElementHistoryPolicy {
private const val MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES = 3
private const val MARKER = "Screen elements:"
private const val NO_LONGER_RELEVANT = "no longer relevant"
private val screenElementsSectionRegex = Regex(
pattern = "(?is)(Screen elements:\\s*).*",
options = setOf(RegexOption.IGNORE_CASE)
)

fun sanitizeMessages(messages: List<PhotoReasoningMessage>): List<PhotoReasoningMessage> {
var remainingRelevant = MAX_RELEVANT_SCREEN_ELEMENT_MESSAGES
val keepRelevantIds = messages
.asReversed()
.filter { hasScreenElements(it.text) && !isAlreadyObsolete(it.text) }
.mapNotNull { message ->
if (remainingRelevant > 0) {
remainingRelevant--
message.id
} else {
null
}
}
.toSet()

return messages.map { message ->
if (hasScreenElements(message.text) && !isAlreadyObsolete(message.text) && message.id !in keepRelevantIds) {
message.copy(text = replaceScreenElementsWithObsoleteMarker(message.text))
} else {
message
}
}
}

fun hasScreenElements(text: String): Boolean {
return text.contains(MARKER, ignoreCase = true)
}

private fun isAlreadyObsolete(text: String): Boolean {
val markerIndex = text.indexOf(MARKER, ignoreCase = true)
if (markerIndex < 0) return false
val sectionText = text.substring(markerIndex + MARKER.length).trim()
return sectionText.equals(NO_LONGER_RELEVANT, ignoreCase = true)
}

private fun replaceScreenElementsWithObsoleteMarker(text: String): String {
return screenElementsSectionRegex.replace(text) { match ->
"${match.groupValues[1]}$NO_LONGER_RELEVANT"
}
}
}
Loading
Loading