diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt index 17374ff..fad120c 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt @@ -7,6 +7,7 @@ import android.accessibilityservice.GestureDescription import android.content.ComponentName import android.content.Context import android.content.Intent +import android.app.PendingIntent import android.content.pm.PackageManager import android.content.pm.ResolveInfo import android.graphics.Bitmap @@ -29,6 +30,7 @@ import com.google.ai.sample.util.AppOpenFeedbackPreferences import com.google.ai.sample.util.Command import com.google.ai.sample.util.CoordinateParser import com.google.ai.sample.util.TermuxFeedbackPreferences +import com.google.ai.sample.util.TermuxOutputPreferences import java.io.File import java.text.SimpleDateFormat import com.google.ai.sample.GenerativeViewModelFactory @@ -384,7 +386,7 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { } } is Command.TermuxCommand -> { - executeSyncCommandAction( + executeAsyncCommandAction( logMessage = "Executing Termux command: ${command.command}", toastMessage = "Executing Termux command..." ) { @@ -529,6 +531,13 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { } private fun executeTermuxCommand(command: String) { + Log.i(TAG, "Termux command requested. Raw command length=${command.length}") + val trimmedCommand = command.trim() + if (trimmedCommand.isEmpty()) { + Log.w(TAG, "Skipping Termux command dispatch because command is empty after trim.") + return + } + val termuxPackage = "com.termux" val pm = packageManager val launchIntent = pm.getLaunchIntentForPackage(termuxPackage) @@ -537,23 +546,163 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { Log.w(TAG, "Termux not found for command execution.") return } + + val runCommandServiceClass = "com.termux.app.RunCommandService" + val serviceProbeIntent = Intent("com.termux.RUN_COMMAND").apply { + `package` = termuxPackage + setClassName(termuxPackage, runCommandServiceClass) + } + val resolvedService = pm.resolveService(serviceProbeIntent, PackageManager.MATCH_DEFAULT_ONLY) + if (resolvedService == null) { + Log.e(TAG, "Termux RunCommandService not resolvable. package=$termuxPackage class=$runCommandServiceClass") + TermuxFeedbackPreferences.markTermuxNotFound(applicationContext) + return + } + + Log.i( + TAG, + "Resolved Termux RunCommandService=${resolvedService.serviceInfo?.name}, app=${resolvedService.serviceInfo?.packageName}" + ) + + val callbackAction = "com.google.ai.sample.TERMUX_COMMAND_RESULT" + val callbackIntent = Intent(callbackAction).apply { + `package` = packageName + } + val callbackFlags = PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_MUTABLE + val pendingResultIntent = PendingIntent.getBroadcast(applicationContext, 7001, callbackIntent, callbackFlags) + + val callbackReceiver = TermuxResultReceiver(applicationContext) + try { + applicationContext.registerReceiver(callbackReceiver, android.content.IntentFilter(callbackAction), Context.RECEIVER_NOT_EXPORTED) + Log.i(TAG, "Registered Termux result receiver for action=$callbackAction") + } catch (t: Throwable) { + Log.e(TAG, "Failed to register Termux result receiver", t) + } + val intent = Intent("com.termux.RUN_COMMAND").apply { `package` = termuxPackage - setClassName(termuxPackage, "com.termux.app.RunCommandService") + setClassName(termuxPackage, runCommandServiceClass) putExtra("com.termux.RUN_COMMAND_PATH", "/data/data/com.termux/files/usr/bin/bash") - putExtra("com.termux.RUN_COMMAND_ARGUMENTS", arrayOf("-lc", command)) + putExtra("com.termux.RUN_COMMAND_ARGUMENTS", arrayOf("-lc", trimmedCommand)) putExtra("com.termux.RUN_COMMAND_WORKDIR", "/data/data/com.termux/files/home") putExtra("com.termux.RUN_COMMAND_BACKGROUND", true) putExtra("com.termux.RUN_COMMAND_SESSION_ACTION", 0) + putExtra("com.termux.RUN_COMMAND_RUNNER", "app-shell") + putExtra("com.termux.RUN_COMMAND_PENDING_INTENT", pendingResultIntent) + putExtra("com.termux.RUN_COMMAND_BACKGROUND_CUSTOM_LOG_LEVEL", 0) + putExtra("com.termux.RUN_COMMAND_RETURN_STDOUT", true) + putExtra("com.termux.RUN_COMMAND_RETURN_STDERR", true) } + + Log.i( + TAG, + "Dispatching Termux RUN_COMMAND with path=${intent.getStringExtra("com.termux.RUN_COMMAND_PATH")}, " + + "workdir=${intent.getStringExtra("com.termux.RUN_COMMAND_WORKDIR")}, " + + "background=${intent.getBooleanExtra("com.termux.RUN_COMMAND_BACKGROUND", false)}, " + + "runner=${intent.getStringExtra("com.termux.RUN_COMMAND_RUNNER")}, " + + "argsCount=${intent.getStringArrayExtra("com.termux.RUN_COMMAND_ARGUMENTS")?.size ?: 0}" + ) + try { startService(intent) + Log.i(TAG, "Termux command dispatch succeeded.") + } catch (se: SecurityException) { + Log.e(TAG, "Failed to dispatch Termux command due to security restriction. Check Termux RUN_COMMAND permission grant.", se) + TermuxFeedbackPreferences.markTermuxNotFound(applicationContext) } catch (t: Throwable) { Log.e(TAG, "Failed to dispatch Termux command", t) TermuxFeedbackPreferences.markTermuxNotFound(applicationContext) } } + private class TermuxResultReceiver(private val appContext: Context) : android.content.BroadcastReceiver() { + override fun onReceive(context: Context?, intent: Intent?) { + fun unregisterSelf() { + try { + appContext.unregisterReceiver(this) + Log.i(TAG, "Termux result receiver unregistered") + } catch (t: Throwable) { + Log.w(TAG, "Failed to unregister Termux result receiver", t) + } + } + if (intent == null) { + Log.w(TAG, "Termux result receiver invoked with null intent") + unregisterSelf() + return + } + val resultBundle = intent.getBundleExtra("com.termux.app.extra.TERMUX_SERVICE.EXTRA_PLUGIN_RESULT_BUNDLE") + ?: intent.getBundleExtra("result") + if (resultBundle == null) { + Log.w(TAG, "Termux result bundle missing; available extras=${intent.extras?.keySet()?.joinToString()}") + unregisterSelf() + return + } + + val stdout = resultBundle.getString("com.termux.app.extra.TERMUX_SERVICE.EXTRA_PLUGIN_RESULT_BUNDLE_STDOUT") + ?: resultBundle.getString("stdout") + ?: "" + val stderr = resultBundle.getString("com.termux.app.extra.TERMUX_SERVICE.EXTRA_PLUGIN_RESULT_BUNDLE_STDERR") + ?: resultBundle.getString("stderr") + ?: "" + val exitCode = when { + resultBundle.containsKey("com.termux.app.extra.TERMUX_SERVICE.EXTRA_PLUGIN_RESULT_BUNDLE_EXIT_CODE") -> { + resultBundle.getInt("com.termux.app.extra.TERMUX_SERVICE.EXTRA_PLUGIN_RESULT_BUNDLE_EXIT_CODE", Int.MIN_VALUE) + } + resultBundle.containsKey("exitCode") -> resultBundle.getInt("exitCode", Int.MIN_VALUE) + else -> Int.MIN_VALUE + } + + Log.i(TAG, "Termux result received: exitCode=$exitCode stdoutLen=${stdout.length} stderrLen=${stderr.length} keys=${resultBundle.keySet().joinToString()}") + + val hasKnownResult = stdout.isNotBlank() || stderr.isNotBlank() || exitCode != Int.MIN_VALUE + if (!hasKnownResult) { + Log.w(TAG, "Ignoring Termux callback without stdout/stderr/exitCode to avoid polluting pending output.") + unregisterSelf() + return + } + + val combined = buildString { + append("Termux finished") + if (exitCode != Int.MIN_VALUE) { + append(" (exit=") + append(exitCode) + append(")") + } + if (stdout.isNotBlank()) { + append("\nstdout:\n") + append(stdout) + } + if (stderr.isNotBlank()) { + append("\nstderr:\n") + append(stderr) + } + } + + val aiRelevantOutput = combined.trim() + if (aiRelevantOutput.isNotBlank()) { + TermuxOutputPreferences.appendOutput(appContext, aiRelevantOutput) + Log.i(TAG, "Stored Termux output for next screenshot bubble. chars=${aiRelevantOutput.length}") + } + + mainHandler.post { + MainActivity.getInstance()?.updateStatusMessage("Termux stream start", false) + } + combined.lineSequence().forEachIndexed { idx, line -> + val framed = "Termux[$idx]: $line" + Log.d(TAG, framed) + mainHandler.post { + MainActivity.getInstance()?.updateStatusMessage(framed, false) + } + } + + serviceInstance?.handler?.post { + Log.d(TAG, "Termux result received, scheduling next command processing.") + serviceInstance?.scheduleNextCommandProcessing() + } + unregisterSelf() + } + } + private fun processCommandQueue() { if (!commandQueue.tryAcquireProcessing()) { diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index 5ed8f1f..3393a9b 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -31,6 +31,7 @@ import com.google.ai.sample.util.CommandParser import com.google.ai.sample.util.SystemMessagePreferences import com.google.ai.sample.util.SystemMessageEntry import com.google.ai.sample.util.TermuxFeedbackPreferences +import com.google.ai.sample.util.TermuxOutputPreferences import com.google.ai.sample.util.UserInputPreferences import com.google.ai.sample.feature.multimodal.ModelDownloadManager import com.google.ai.sample.ModelOption @@ -2558,9 +2559,8 @@ private fun processCommands(text: String) { context: Context, screenInfo: String? = null ) { - val enrichedScreenInfo = buildEnrichedScreenInfo(screenInfo) - if (screenshotUri == Uri.EMPTY) { + val enrichedScreenInfo = buildEnrichedScreenInfo(screenInfo) // This case is for offline models, where we don't have a screenshot. // We just want to send the screen info. val genericAnalysisPrompt = createGenericScreenshotPrompt() @@ -2578,6 +2578,9 @@ private fun processCommands(text: String) { return // Exit the function early if it's a duplicate call within the window } + val enrichedScreenInfo = buildEnrichedScreenInfo(screenInfo) + Log.d(TAG, "addScreenshotToConversation: Using enrichedScreenInfo=${!enrichedScreenInfo.isNullOrBlank()} for URI=$screenshotUri") + PhotoReasoningApplication.applicationScope.launch(Dispatchers.Main) { try { Log.d(TAG, "Adding screenshot to conversation: $screenshotUri") @@ -2658,15 +2661,16 @@ private fun processCommands(text: String) { } else { null } + val termuxOutputInfo = TermuxOutputPreferences.consumeOutput(appContext)?.let { "Termux output:\n$it" } + if (!termuxOutputInfo.isNullOrBlank()) { + Log.i(TAG, "buildEnrichedScreenInfo: Injecting Termux output into next screen-info bubble. chars=${termuxOutputInfo.length}") + } val missingInfo = listOfNotNull(appNotFoundInfo, termuxNotFoundInfo).joinToString("\n").ifBlank { null } + val extraInfo = listOfNotNull(missingInfo, retrievedInfo, termuxOutputInfo).joinToString("\n\n").ifBlank { null } return when { - !missingInfo.isNullOrBlank() && !retrievedInfo.isNullOrBlank() && !screenInfo.isNullOrBlank() -> "$missingInfo\n\n$retrievedInfo\n\n$screenInfo" - !missingInfo.isNullOrBlank() && !retrievedInfo.isNullOrBlank() -> "$missingInfo\n\n$retrievedInfo" - !missingInfo.isNullOrBlank() && !screenInfo.isNullOrBlank() -> "$missingInfo\n\n$screenInfo" - !missingInfo.isNullOrBlank() -> missingInfo - !retrievedInfo.isNullOrBlank() && !screenInfo.isNullOrBlank() -> "$retrievedInfo\n\n$screenInfo" - !retrievedInfo.isNullOrBlank() -> retrievedInfo + !extraInfo.isNullOrBlank() && !screenInfo.isNullOrBlank() -> "$extraInfo\n\n$screenInfo" + !extraInfo.isNullOrBlank() -> extraInfo !screenInfo.isNullOrBlank() -> screenInfo else -> null } diff --git a/app/src/main/kotlin/com/google/ai/sample/util/TermuxOutputPreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/TermuxOutputPreferences.kt new file mode 100644 index 0000000..ec2d0ab --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/util/TermuxOutputPreferences.kt @@ -0,0 +1,24 @@ +package com.google.ai.sample.util + +import android.content.Context + +object TermuxOutputPreferences { + private const val PREF_NAME = "termux_output_prefs" + private const val KEY_PENDING_OUTPUT = "pending_output" + + fun appendOutput(context: Context, output: String) { + if (output.isBlank()) return + val prefs = context.getSharedPreferences(PREF_NAME, Context.MODE_PRIVATE) + val existing = prefs.getString(KEY_PENDING_OUTPUT, "").orEmpty() + val merged = if (existing.isBlank()) output else "$existing\n\n$output" + prefs.edit().putString(KEY_PENDING_OUTPUT, merged).apply() + } + + fun consumeOutput(context: Context): String? { + val prefs = context.getSharedPreferences(PREF_NAME, Context.MODE_PRIVATE) + val value = prefs.getString(KEY_PENDING_OUTPUT, "").orEmpty().trim() + if (value.isBlank()) return null + prefs.edit().remove(KEY_PENDING_OUTPUT).apply() + return value + } +}