Skip to content

Commit 2862a2c

Browse files
Here's a proposed change to your codebase:
Feat: Implement comprehensive DTOs and Mappers for all SDK Part types I've expanded the DTOs in `PhotoReasoningDtos.kt` and mappers in `PhotoReasoningMappers.kt` to support a wider range of `com.google.ai.client.generativeai.type.Part` subtypes. This is intended to fix issues where your chat history context was being lost during serialization if it contained parts other than Text or Image, leading to degraded AI model performance. Key changes: 1. **Updated DTOs (`PhotoReasoningDtos.kt`):** * I added `@Serializable` data classes for `BlobPartDto` (handles `ByteArray` data), `FunctionCallPartDto` (handles function name and arguments map), and `FunctionResponsePartDto` (handles function name and a JSON string representation of the function's response). * These new DTOs are included in the `PartDto` sealed interface. 2. **Updated Mappers (`PhotoReasoningMappers.kt`):** * `Part.toDto(context: Context)`: Now includes cases to map SDK `BlobPart`, `FunctionCallPart`, and `FunctionResponsePart` to their respective DTOs. For `FunctionResponsePart`, the `org.json.JSONObject` response is converted to a string. * `PartDto.toSdk()`: Now includes cases to map `BlobPartDto`, `FunctionCallPartDto`, and `FunctionResponsePartDto` back to their SDK `Part` equivalents. For `FunctionResponsePartDto`, the JSON string response is parsed back into an `org.json.JSONObject`. I've included error handling for JSON parsing. 3. **Impact:** * This ensures that when `PhotoReasoningViewModel` serializes `chat.history` and `inputContent` for `ScreenCaptureService`, all common SDK `Part` types are preserved. * `ScreenCaptureService` can now correctly reconstruct these parts, providing a complete history and input to the Gemini model. * This should resolve issues of "context reduction" and thereby improve the consistency and quality of AI responses. The file-based handling for `ImagePartDto` (to prevent `TransactionTooLargeException`) remains in place and is complemented by this more complete handling of other data types within the DTO structure.
1 parent 3030db5 commit 2862a2c

5 files changed

Lines changed: 631 additions & 388 deletions

File tree

app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ import android.util.DisplayMetrics
2525
import android.util.Log
2626
import android.view.WindowManager
2727
import android.widget.Toast
28+
import com.google.ai.client.generativeai.GenerativeModel
29+
import com.google.ai.client.generativeai.type.Content
30+
import com.google.ai.client.generativeai.type.ImagePart // For instance check
31+
import com.google.ai.sample.feature.multimodal.dtos.ContentDto
32+
import com.google.ai.sample.feature.multimodal.dtos.toSdk
33+
import kotlinx.coroutines.CoroutineScope
34+
import kotlinx.coroutines.Dispatchers
35+
import kotlinx.coroutines.SupervisorJob
36+
import kotlinx.coroutines.cancel
37+
import kotlinx.coroutines.launch
38+
import kotlinx.serialization.json.Json
39+
import kotlinx.serialization.decodeFromString
2840
import androidx.core.app.NotificationCompat
2941
import java.io.File
3042
import java.io.FileOutputStream
@@ -37,13 +49,28 @@ class ScreenCaptureService : Service() {
3749
private const val TAG = "ScreenCaptureService"
3850
private const val CHANNEL_ID = "ScreenCaptureChannel"
3951
private const val NOTIFICATION_ID = 2001
52+
private const val NOTIFICATION_ID_AI = NOTIFICATION_ID + 1 // Or any distinct ID
4053
const val ACTION_START_CAPTURE = "com.google.ai.sample.START_CAPTURE"
4154
const val ACTION_TAKE_SCREENSHOT = "com.google.ai.sample.TAKE_SCREENSHOT" // New action
4255
const val ACTION_STOP_CAPTURE = "com.google.ai.sample.STOP_CAPTURE" // New action
4356
const val EXTRA_RESULT_CODE = "result_code"
4457
const val EXTRA_RESULT_DATA = "result_data"
4558
const val EXTRA_TAKE_SCREENSHOT_ON_START = "take_screenshot_on_start"
4659

60+
// For triggering AI call execution in the service
61+
const val ACTION_EXECUTE_AI_CALL = "com.google.ai.sample.EXECUTE_AI_CALL"
62+
const val EXTRA_AI_INPUT_CONTENT_JSON = "com.google.ai.sample.EXTRA_AI_INPUT_CONTENT_JSON"
63+
const val EXTRA_AI_CHAT_HISTORY_JSON = "com.google.ai.sample.EXTRA_AI_CHAT_HISTORY_JSON"
64+
const val EXTRA_AI_MODEL_NAME = "com.google.ai.sample.EXTRA_AI_MODEL_NAME" // For service to create model
65+
const val EXTRA_AI_API_KEY = "com.google.ai.sample.EXTRA_AI_API_KEY" // For service to create model
66+
const val EXTRA_TEMP_FILE_PATHS = "com.google.ai.sample.EXTRA_TEMP_FILE_PATHS"
67+
68+
69+
// For broadcasting AI call results from the service
70+
const val ACTION_AI_CALL_RESULT = "com.google.ai.sample.AI_CALL_RESULT"
71+
const val EXTRA_AI_RESPONSE_TEXT = "com.google.ai.sample.EXTRA_AI_RESPONSE_TEXT"
72+
const val EXTRA_AI_ERROR_MESSAGE = "com.google.ai.sample.EXTRA_AI_ERROR_MESSAGE"
73+
4774
private var instance: ScreenCaptureService? = null
4875

4976
fun isRunning(): Boolean = instance != null && instance?.isReady == true
@@ -54,6 +81,7 @@ class ScreenCaptureService : Service() {
5481
private var imageReader: ImageReader? = null
5582
private var isReady = false // Flag to indicate if MediaProjection is set up and active
5683
private val isScreenshotRequestedRef = java.util.concurrent.atomic.AtomicBoolean(false)
84+
private val serviceScope = CoroutineScope(Dispatchers.IO + SupervisorJob())
5785

5886
// Callback for MediaProjection
5987
private val mediaProjectionCallback = object : MediaProjection.Callback() {
@@ -117,6 +145,153 @@ class ScreenCaptureService : Service() {
117145
Log.d(TAG, "Received ACTION_STOP_CAPTURE. Cleaning up.")
118146
cleanup()
119147
}
148+
ACTION_EXECUTE_AI_CALL -> {
149+
Log.d(TAG, "ACTION_EXECUTE_AI_CALL: Ensuring foreground state for AI processing.")
150+
val aiNotification = createAiOperationNotification()
151+
// Comment: Attempt to start foreground for the AI call.
152+
// If the service is already in foreground (e.g., for screen capture), this updates the notification
153+
// or is a no-op depending on exact state. The goal is to elevate priority for the network call.
154+
// We will not explicitly call stopForeground() after the AI call in this handler to keep service
155+
// lifecycle management simple and rely on existing cleanup/stop mechanisms.
156+
// This might mean the "AI processing" notification persists if no other action stops/changes foreground state.
157+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
158+
// Using a generic type like DATA_SYNC or SPECIAL_USE if not mediaProjection related.
159+
// However, to avoid permission issues if service was started for mediaProjection,
160+
// sticking to mediaProjection type might be safer if it's already in that mode.
161+
// For simplicity and if this call path doesn't define its own service type, we rely on the OS.
162+
// Let's use a generic type if possible, but be mindful of existing foreground state.
163+
// Re-evaluating: The service is already declared with mediaProjection.
164+
// It's safer to re-assert this type or one compatible.
165+
// Given this service *can* do media projection, reusing that type is safest.
166+
startForeground(NOTIFICATION_ID_AI, aiNotification, ServiceInfo.FOREGROUND_SERVICE_TYPE_MEDIA_PROJECTION)
167+
} else {
168+
startForeground(NOTIFICATION_ID_AI, aiNotification)
169+
}
170+
171+
Log.d(TAG, "Received ACTION_EXECUTE_AI_CALL")
172+
// This service, already a Foreground Service for MediaProjection,
173+
// is now also responsible for executing AI calls to leverage foreground network priority.
174+
val inputContentJson = intent.getStringExtra(EXTRA_AI_INPUT_CONTENT_JSON)
175+
val chatHistoryJson = intent.getStringExtra(EXTRA_AI_CHAT_HISTORY_JSON)
176+
val modelName = intent.getStringExtra(EXTRA_AI_MODEL_NAME)
177+
val apiKey = intent.getStringExtra(EXTRA_AI_API_KEY)
178+
val tempFilePaths = intent.getStringArrayListExtra(EXTRA_TEMP_FILE_PATHS) ?: ArrayList()
179+
Log.d(TAG, "Received tempFilePaths for cleanup: $tempFilePaths")
180+
181+
if (inputContentJson == null || chatHistoryJson == null || modelName == null || apiKey == null) {
182+
Log.e(TAG, "Missing necessary data for AI call. inputContentJson: ${inputContentJson != null}, chatHistoryJson: ${chatHistoryJson != null}, modelName: ${modelName != null}, apiKey: ${apiKey != null}")
183+
// Optionally broadcast an error back immediately
184+
broadcastAiCallError("Missing parameters for AI call in service.")
185+
return START_STICKY // Or START_NOT_STICKY if this is a fatal error for this call
186+
}
187+
188+
serviceScope.launch {
189+
var responseText: String? = null
190+
var errorMessage: String? = null
191+
try {
192+
// Deserialize JSON to DTOs.
193+
val chatHistoryDtos = Json.decodeFromString<List<ContentDto>>(chatHistoryJson)
194+
val inputContentDto = Json.decodeFromString<ContentDto>(inputContentJson)
195+
196+
// Convert DTOs back to SDK types.
197+
val chatHistory = chatHistoryDtos.map { it.toSdk() } // Uses ContentDto.toSdk()
198+
val inputContent = inputContentDto.toSdk() // Uses ContentDto.toSdk()
199+
200+
Log.d(TAG, "ACTION_EXECUTE_AI_CALL: Logging reloaded Bitmap properties after DTO conversion from file:")
201+
202+
// Log properties for inputContent's images
203+
inputContent.parts.filterIsInstance<com.google.ai.client.generativeai.type.ImagePart>().forEachIndexed { index, imagePart ->
204+
val bitmap = imagePart.image // This is the reloaded Bitmap
205+
Log.d(TAG, " InputContent Reloaded Image[${index}]: Width=${bitmap.width}, Height=${bitmap.height}, Config=${bitmap.config?.name ?: "null"}, HasAlpha=${bitmap.hasAlpha()}, IsMutable=${bitmap.isMutable}")
206+
}
207+
208+
// Log properties for chat.history images
209+
chatHistory.forEachIndexed { historyIndex, contentItem ->
210+
contentItem.parts.filterIsInstance<com.google.ai.client.generativeai.type.ImagePart>().forEachIndexed { partIndex, imagePart ->
211+
val bitmap = imagePart.image // This is the reloaded Bitmap
212+
Log.d(TAG, " History[${historyIndex}] Reloaded Image[${partIndex}]: Width=${bitmap.width}, Height=${bitmap.height}, Config=${bitmap.config?.name ?: "null"}, HasAlpha=${bitmap.hasAlpha()}, IsMutable=${bitmap.isMutable}")
213+
}
214+
}
215+
216+
Log.d(TAG, "ACTION_EXECUTE_AI_CALL: Saving reloaded Bitmaps for visual integrity check.")
217+
218+
// Save reloaded bitmaps from inputContent
219+
inputContent.parts.filterIsInstance<com.google.ai.client.generativeai.type.ImagePart>().forEachIndexed { index, imagePart ->
220+
val reloadedBitmap = imagePart.image
221+
val reloadedBitmapDebugPath = com.google.ai.sample.util.ImageUtils.saveBitmapToTempFile(applicationContext, reloadedBitmap)
222+
if (reloadedBitmapDebugPath != null) {
223+
Log.d(TAG, " InputContent Reloaded Image[${index}] (for debug) also saved to: $reloadedBitmapDebugPath. Compare with original.")
224+
}
225+
}
226+
227+
// Save reloaded bitmaps from chat.history
228+
chatHistory.forEachIndexed { historyIndex, contentItem ->
229+
contentItem.parts.filterIsInstance<com.google.ai.client.generativeai.type.ImagePart>().forEachIndexed { partIndex, imagePart ->
230+
val reloadedBitmap = imagePart.image
231+
val reloadedBitmapDebugPath = com.google.ai.sample.util.ImageUtils.saveBitmapToTempFile(applicationContext, reloadedBitmap)
232+
if (reloadedBitmapDebugPath != null) {
233+
Log.d(TAG, " History[${historyIndex}] Reloaded Image[${partIndex}] (for debug) also saved to: $reloadedBitmapDebugPath. Compare with original.")
234+
}
235+
}
236+
}
237+
238+
// Create a GenerativeModel instance for this specific call.
239+
// This ensures the call uses the API key and model name provided by the ViewModel.
240+
// Consider a default GenerationConfig or make it configurable too if needed.
241+
val generativeModel = GenerativeModel(
242+
modelName = modelName,
243+
apiKey = apiKey
244+
// generationConfig = generationConfig { ... } // Optional: add default config
245+
)
246+
247+
// Start a new chat session with the provided history for this call.
248+
val tempChat = generativeModel.startChat(history = chatHistory) // Use the mapped SDK history
249+
Log.d(TAG, "Executing AI sendMessage with history size: ${chatHistory.size}")
250+
val aiResponse = tempChat.sendMessage(inputContent) // Use the mapped SDK inputContent
251+
responseText = aiResponse.text
252+
Log.d(TAG, "AI call successful. Response text available: ${responseText != null}")
253+
254+
} catch (e: Exception) {
255+
// Catching general exceptions from model/chat operations or serialization
256+
Log.e(TAG, "Error during AI call execution in service", e)
257+
errorMessage = e.localizedMessage ?: "Unknown error during AI call in service"
258+
// More specific error handling (like API key failure leading to trying another key via ApiKeyManager)
259+
// could be added here if this service becomes responsible for ApiKeyManager interactions.
260+
// For "minimal changes", we just report the error back.
261+
}
262+
finally {
263+
// Broadcast the result (success or error) back to the ViewModel.
264+
val resultIntent = Intent(ACTION_AI_CALL_RESULT).apply {
265+
`package` = applicationContext.packageName // Ensure only our app receives it
266+
if (responseText != null) {
267+
putExtra(EXTRA_AI_RESPONSE_TEXT, responseText)
268+
}
269+
if (errorMessage != null) {
270+
putExtra(EXTRA_AI_ERROR_MESSAGE, errorMessage)
271+
}
272+
}
273+
applicationContext.sendBroadcast(resultIntent)
274+
Log.d(TAG, "Broadcast sent for AI_CALL_RESULT. Error: $errorMessage, Response: ${responseText != null}")
275+
276+
// Comment: Clean up temporary image files passed from the ViewModel.
277+
if (tempFilePaths.isNotEmpty()) {
278+
Log.d(TAG, "Cleaning up ${tempFilePaths.size} temporary image files.")
279+
for (filePath in tempFilePaths) {
280+
val deleted = com.google.ai.sample.util.ImageUtils.deleteFile(filePath)
281+
if (!deleted) {
282+
Log.w(TAG, "Failed to delete temporary file: $filePath")
283+
}
284+
}
285+
} else {
286+
Log.d(TAG, "No temporary image files to clean up.")
287+
}
288+
}
289+
}
290+
// START_STICKY is appropriate if the service is also managing MediaProjection independently.
291+
// If it becomes purely command-driven, START_NOT_STICKY might be considered after all commands processed.
292+
// For now, keep START_STICKY consistent with existing behavior.
293+
return START_STICKY
294+
}
120295
else -> {
121296
Log.w(TAG, "Unknown or null action received: ${intent?.action}.")
122297
// If service is started with unknown action and not ready, stop it.
@@ -128,6 +303,25 @@ class ScreenCaptureService : Service() {
128303
return START_STICKY
129304
}
130305

306+
private fun broadcastAiCallError(message: String) {
307+
val errorIntent = Intent(ACTION_AI_CALL_RESULT).apply {
308+
`package` = applicationContext.packageName
309+
putExtra(EXTRA_AI_ERROR_MESSAGE, message)
310+
}
311+
applicationContext.sendBroadcast(errorIntent)
312+
Log.d(TAG, "Broadcast error sent for AI_CALL_RESULT: $message")
313+
}
314+
315+
private fun createAiOperationNotification(): Notification {
316+
return NotificationCompat.Builder(this, CHANNEL_ID) // Reuse existing channel
317+
.setContentTitle("Screen Operator")
318+
.setContentText("Processing AI request...")
319+
.setSmallIcon(android.R.drawable.ic_dialog_info) // Replace with a proper app icon
320+
.setPriority(NotificationCompat.PRIORITY_LOW)
321+
.setOngoing(false) // AI operation is not typically as long as screen capture
322+
.build()
323+
}
324+
131325
private fun createNotification(): Notification {
132326
return NotificationCompat.Builder(this, CHANNEL_ID)
133327
.setContentTitle("Screen Capture Active")
@@ -422,6 +616,7 @@ private fun takeScreenshot() {
422616
if (isReady || mediaProjection != null) { // Check if cleanup is actually needed
423617
cleanup()
424618
}
619+
serviceScope.cancel() // Cancel all coroutines in this scope
425620
instance = null // Ensure instance is cleared
426621
super.onDestroy()
427622
}

0 commit comments

Comments
 (0)