Skip to content

Commit 87fda45

Browse files
Add files via upload
1 parent f4b3ed0 commit 87fda45

1 file changed

Lines changed: 148 additions & 141 deletions

File tree

app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt

Lines changed: 148 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -10,95 +10,87 @@ object CommandParser {
1010

1111
// Regex patterns for different command formats
1212

13-
// Click button patterns - reorganized by priority
13+
// Click button patterns - significantly expanded to catch more variations
1414
private val CLICK_BUTTON_PATTERNS = listOf(
15-
// Function-like patterns (highest priority)
16-
Regex("(?i)\\b(?:clickOnButton|tapOnButton|pressButton)\\([\"']([^\"']+)[\"']\\)"),
17-
18-
// Standard patterns with quotes (high priority)
19-
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?(?:button|knopf|schaltfläche)?(?: labeled| mit text)? [\"']([^\"']+)[\"']"),
15+
// Standard patterns with quotes
16+
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?(?:button|knopf|schaltfläche|button labeled|knopf mit text|schaltfläche mit text)? [\"']([^\"']+)[\"']"),
2017
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?[\"']([^\"']+)[\"'] (?:button|knopf|schaltfläche)?"),
2118

22-
// Patterns with "labeled" keyword (medium priority)
23-
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?(?:button|knopf|schaltfläche) labeled ([\\w\\s\\-]+)(?:\\b|$)"),
19+
// Patterns without quotes
20+
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?(?:button|knopf|schaltfläche) ([\\w\\s\\-]+)\\b"),
21+
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?(?:button|knopf|schaltfläche) labeled ([\\w\\s\\-]+)\\b"),
2422

25-
// Direct command patterns (lower priority)
26-
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?([\\w\\s\\-]+) (?:button|knopf|schaltfläche)\\b"),
23+
// Direct command patterns
24+
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) ([\\w\\s\\-]+) (?:button|knopf|schaltfläche)\\b"),
2725

28-
// Patterns without quotes (lowest priority)
29-
Regex("(?i)\\b(?:click|tap|press|klick|tippe auf|drücke|klicke auf|drücke auf) (?:on )?(?:the )?(?:button|knopf|schaltfläche) ([\\w\\s\\-]+)(?:\\b|$)")
26+
// Function-like patterns
27+
Regex("(?i)\\bclickOnButton\\([\"']([^\"']+)[\"']\\)"),
28+
Regex("(?i)\\btapOnButton\\([\"']([^\"']+)[\"']\\)"),
29+
Regex("(?i)\\bpressButton\\([\"']([^\"']+)[\"']\\)")
3030
)
3131

32-
// Tap coordinates patterns - updated to support decimal numbers
32+
// Tap coordinates patterns - expanded to catch more variations
3333
private val TAP_COORDINATES_PATTERNS = listOf(
34-
// Function-like patterns
35-
Regex("(?i)\\b(?:tapAtCoordinates|clickAtPosition|tapAt)\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)"),
36-
3734
// Standard patterns
3835
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) (?:coordinates?|koordinaten|position|stelle|punkt)[:\\s]\\s*\\(?\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)?"),
39-
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) \\(?\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)?")
36+
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) \\(?\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)?"),
37+
38+
// Function-like patterns
39+
Regex("(?i)\\btapAtCoordinates\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)"),
40+
Regex("(?i)\\bclickAtPosition\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)"),
41+
Regex("(?i)\\btapAt\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)")
4042
)
4143

42-
// Take screenshot patterns - expanded German language support
44+
// Screenshot patterns - expanded for consistency
4345
private val TAKE_SCREENSHOT_PATTERNS = listOf(
44-
// English patterns
45-
Regex("(?i)\\b(?:take|capture|make) (?:a )?(?:screenshot|screen shot|screen-shot)(?:\\s|$)"),
46-
47-
// German patterns (expanded)
48-
Regex("(?i)\\b(?:nimm|erstelle|mache|nehme|erzeuge) (?:ein(?:e)? )?(?:bildschirmfoto|screenshot|bildschirmaufnahme|bildschirmabbild)(?:\\s|$)"),
49-
50-
// Function-like patterns
51-
Regex("(?i)\\b(?:takeScreenshot|captureScreen)\\(\\)")
46+
Regex("(?i)\\b(?:take|capture|make|nimm|erstelle|mache|nehme|erzeuge) (?:a |ein(?:e)? )?(?:screenshot|bildschirmfoto|bildschirmaufnahme|bildschirmabbild)"),
47+
Regex("(?i)\\btakeScreenshot\\(\\)"),
48+
Regex("(?i)\\bcaptureScreen\\(\\)")
5249
)
5350

51+
// Buffer for storing partial text between calls
52+
private var textBuffer = ""
53+
54+
// Flag to indicate if we should clear the buffer on next call
55+
private var shouldClearBuffer = false
56+
5457
/**
5558
* Parse commands from the given text
5659
*
5760
* @param text The text to parse for commands
61+
* @param clearBuffer Whether to clear the buffer before parsing (default: false)
5862
* @return A list of commands found in the text
5963
*/
60-
fun parseCommands(text: String): List<Command> {
64+
fun parseCommands(text: String, clearBuffer: Boolean = false): List<Command> {
6165
val commands = mutableListOf<Command>()
6266

6367
try {
64-
// Debug the input text
65-
Log.d(TAG, "Parsing text for commands: $text")
68+
// Clear buffer if requested or if flag is set
69+
if (clearBuffer || shouldClearBuffer) {
70+
textBuffer = ""
71+
shouldClearBuffer = false
72+
Log.d(TAG, "Buffer cleared")
73+
}
6674

67-
// Process the text line by line to improve context separation
68-
val lines = text.split("\n")
69-
for (line in lines) {
70-
// Skip empty lines
71-
if (line.trim().isEmpty()) continue
72-
73-
// Track if we found a command in this line
74-
var commandFoundInLine = false
75-
76-
// Look for click button commands
77-
val clickButtonCommand = findClickButtonCommand(line)
78-
if (clickButtonCommand != null) {
79-
commands.add(clickButtonCommand)
80-
commandFoundInLine = true
81-
Log.d(TAG, "Found click button command in line: $line")
82-
continue // Move to next line after finding a command
83-
}
84-
85-
// Look for tap coordinates commands
86-
val tapCoordinatesCommand = findTapCoordinatesCommand(line)
87-
if (tapCoordinatesCommand != null) {
88-
commands.add(tapCoordinatesCommand)
89-
commandFoundInLine = true
90-
Log.d(TAG, "Found tap coordinates command in line: $line")
91-
continue // Move to next line after finding a command
92-
}
93-
94-
// Look for take screenshot commands
95-
val takeScreenshotCommand = findTakeScreenshotCommand(line)
96-
if (takeScreenshotCommand != null) {
97-
commands.add(takeScreenshotCommand)
98-
commandFoundInLine = true
99-
Log.d(TAG, "Found take screenshot command in line: $line")
100-
continue // Move to next line after finding a command
101-
}
75+
// Normalize the text (trim whitespace, normalize line breaks)
76+
val normalizedText = normalizeText(text)
77+
78+
// Append to buffer
79+
textBuffer += normalizedText
80+
81+
// Debug the buffer
82+
Log.d(TAG, "Current buffer for command parsing: $textBuffer")
83+
84+
// Process the buffer line by line
85+
val lines = textBuffer.split("\n")
86+
87+
// Process each line and the combined buffer
88+
processText(textBuffer, commands)
89+
90+
// If we found commands, clear the buffer for next time
91+
if (commands.isNotEmpty()) {
92+
shouldClearBuffer = true
93+
Log.d(TAG, "Commands found, buffer will be cleared on next call")
10294
}
10395

10496
Log.d(TAG, "Found ${commands.size} commands in text")
@@ -119,115 +111,130 @@ object CommandParser {
119111
}
120112

121113
/**
122-
* Find a click button command in the text
123-
* Returns the first valid match or null if none found
114+
* Process text to find commands
115+
*/
116+
private fun processText(text: String, commands: MutableList<Command>) {
117+
// Look for click button commands
118+
findClickButtonCommands(text, commands)
119+
120+
// Look for tap coordinates commands
121+
findTapCoordinatesCommands(text, commands)
122+
123+
// Look for take screenshot commands
124+
findTakeScreenshotCommands(text, commands)
125+
}
126+
127+
/**
128+
* Normalize text by trimming whitespace and normalizing line breaks
124129
*/
125-
private fun findClickButtonCommand(text: String): Command.ClickButton? {
130+
private fun normalizeText(text: String): String {
131+
// Replace multiple spaces with a single space
132+
var normalized = text.replace(Regex("\\s+"), " ")
133+
134+
// Ensure consistent line breaks
135+
normalized = normalized.replace(Regex("\\r\\n|\\r"), "\n")
136+
137+
return normalized
138+
}
139+
140+
/**
141+
* Find click button commands in the text
142+
*/
143+
private fun findClickButtonCommands(text: String, commands: MutableList<Command>) {
144+
// Try each pattern
126145
for (pattern in CLICK_BUTTON_PATTERNS) {
127-
val match = pattern.find(text)
128-
if (match != null && match.groupValues.size > 1) {
129-
val buttonText = match.groupValues[1].trim()
130-
if (buttonText.isNotEmpty()) {
131-
Log.d(TAG, "Found click button command with pattern ${pattern.pattern}: \"$buttonText\"")
132-
return Command.ClickButton(buttonText)
146+
val matches = pattern.findAll(text)
147+
for (match in matches) {
148+
try {
149+
if (match.groupValues.size > 1) {
150+
val buttonText = match.groupValues[1].trim()
151+
if (buttonText.isNotEmpty()) {
152+
// Check if this command is already in the list (avoid duplicates)
153+
if (!commands.any { it is Command.ClickButton && it.buttonText == buttonText }) {
154+
Log.d(TAG, "Found click button command with pattern ${pattern.pattern}: \"$buttonText\"")
155+
commands.add(Command.ClickButton(buttonText))
156+
}
157+
}
158+
}
159+
} catch (e: Exception) {
160+
Log.e(TAG, "Error processing click button match: ${e.message}", e)
133161
}
134162
}
135163
}
136-
return null
137164
}
138165

139166
/**
140-
* Find a tap coordinates command in the text
141-
* Returns the first valid match or null if none found
167+
* Find tap coordinates commands in the text
142168
*/
143-
private fun findTapCoordinatesCommand(text: String): Command.TapCoordinates? {
169+
private fun findTapCoordinatesCommands(text: String, commands: MutableList<Command>) {
170+
// Try each pattern
144171
for (pattern in TAP_COORDINATES_PATTERNS) {
145-
val match = pattern.find(text)
146-
if (match != null && match.groupValues.size > 2) {
172+
val matches = pattern.findAll(text)
173+
for (match in matches) {
147174
try {
148-
val x = match.groupValues[1].trim().toFloat()
149-
val y = match.groupValues[2].trim().toFloat()
150-
Log.d(TAG, "Found tap coordinates command with pattern ${pattern.pattern}: ($x, $y)")
151-
return Command.TapCoordinates(x, y)
175+
if (match.groupValues.size > 2) {
176+
val x = match.groupValues[1].trim().toFloat()
177+
val y = match.groupValues[2].trim().toFloat()
178+
179+
// Check if this command is already in the list (avoid duplicates)
180+
if (!commands.any { it is Command.TapCoordinates && it.x == x && it.y == y }) {
181+
Log.d(TAG, "Found tap coordinates command with pattern ${pattern.pattern}: ($x, $y)")
182+
commands.add(Command.TapCoordinates(x, y))
183+
}
184+
}
152185
} catch (e: Exception) {
153186
Log.e(TAG, "Error processing tap coordinates match: ${e.message}", e)
154187
}
155188
}
156189
}
157-
return null
158190
}
159191

160192
/**
161-
* Find a take screenshot command in the text
162-
* Returns a command if found or null if none found
193+
* Find take screenshot commands in the text
163194
*/
164-
private fun findTakeScreenshotCommand(text: String): Command.TakeScreenshot? {
195+
private fun findTakeScreenshotCommands(text: String, commands: MutableList<Command>) {
196+
// Try each pattern
165197
for (pattern in TAKE_SCREENSHOT_PATTERNS) {
166198
if (pattern.containsMatchIn(text)) {
167-
Log.d(TAG, "Found take screenshot command with pattern ${pattern.pattern}")
168-
return Command.TakeScreenshot
199+
// Check if this command is already in the list (avoid duplicates)
200+
if (!commands.any { it is Command.TakeScreenshot }) {
201+
Log.d(TAG, "Found take screenshot command with pattern ${pattern.pattern}")
202+
commands.add(Command.TakeScreenshot)
203+
// Only add one screenshot command even if multiple matches are found
204+
break
205+
}
169206
}
170207
}
171-
return null
208+
}
209+
210+
/**
211+
* Clear the text buffer
212+
*/
213+
fun clearBuffer() {
214+
textBuffer = ""
215+
shouldClearBuffer = false
216+
Log.d(TAG, "Buffer manually cleared")
172217
}
173218

174219
/**
175220
* Debug method to test if a specific command would be recognized
176-
* Returns detailed information about the matching process
177221
*/
178-
fun testCommandRecognition(commandText: String): String {
179-
val result = StringBuilder()
180-
result.append("Testing command recognition for: \"$commandText\"\n")
222+
fun testCommandRecognition(commandText: String): List<Command> {
223+
Log.d(TAG, "Testing command recognition for: \"$commandText\"")
181224

182-
// Test click button patterns
183-
result.append("\nTesting Click Button Patterns:\n")
184-
for (i in CLICK_BUTTON_PATTERNS.indices) {
185-
val pattern = CLICK_BUTTON_PATTERNS[i]
186-
val match = pattern.find(commandText)
187-
if (match != null && match.groupValues.size > 1) {
188-
val buttonText = match.groupValues[1].trim()
189-
result.append(" Pattern ${i+1}: Matched button text: \"$buttonText\"\n")
190-
} else if (match != null) {
191-
result.append(" Pattern ${i+1}: Matched but no capture group\n")
192-
}
193-
}
225+
// Clear buffer for testing
226+
clearBuffer()
194227

195-
// Test tap coordinates patterns
196-
result.append("\nTesting Tap Coordinates Patterns:\n")
197-
for (i in TAP_COORDINATES_PATTERNS.indices) {
198-
val pattern = TAP_COORDINATES_PATTERNS[i]
199-
val match = pattern.find(commandText)
200-
if (match != null && match.groupValues.size > 2) {
201-
val x = match.groupValues[1].trim()
202-
val y = match.groupValues[2].trim()
203-
result.append(" Pattern ${i+1}: Matched coordinates: x=$x, y=$y\n")
204-
} else if (match != null) {
205-
result.append(" Pattern ${i+1}: Matched but insufficient capture groups\n")
206-
}
207-
}
208-
209-
// Test take screenshot patterns
210-
result.append("\nTesting Take Screenshot Patterns:\n")
211-
for (i in TAKE_SCREENSHOT_PATTERNS.indices) {
212-
val pattern = TAKE_SCREENSHOT_PATTERNS[i]
213-
val match = pattern.find(commandText)
214-
if (match != null) {
215-
result.append(" Pattern ${i+1}: Matched\n")
216-
}
217-
}
218-
219-
// Parse commands using the normal method
220228
val commands = parseCommands(commandText)
221-
result.append("\nFinal Recognition Result: ${commands.size} commands found\n")
222-
commands.forEach { command ->
223-
when (command) {
224-
is Command.ClickButton -> result.append(" Command: ClickButton(\"${command.buttonText}\")\n")
225-
is Command.TapCoordinates -> result.append(" Command: TapCoordinates(${command.x}, ${command.y})\n")
226-
is Command.TakeScreenshot -> result.append(" Command: TakeScreenshot\n")
227-
}
228-
}
229-
230-
return result.toString()
229+
Log.d(TAG, "Recognition test result: ${commands.size} commands found")
230+
return commands
231+
}
232+
233+
/**
234+
* Get the current buffer content (for debugging)
235+
*/
236+
fun getBufferContent(): String {
237+
return textBuffer
231238
}
232239
}
233240

0 commit comments

Comments
 (0)