From 17a399794908bd584386bba1ac8e4d97dbcb46bc Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Fri, 13 Mar 2026 11:39:38 +0000 Subject: [PATCH] fix(describe-ui): Add point option support Expose the documented --point option on describe-ui and route point lookups through the simulator accessibility API instead of relying on full-tree output alone. Add command-surface and E2E coverage for help, validation, and point-specific payload shape so the CLI contract stays aligned with the runtime behavior and bundled skill docs. Fixes #38 Co-Authored-By: OpenAI Codex --- CHANGELOG.md | 6 ++ Skills/CLI/axe/SKILL.md | 6 +- Sources/AXe/Commands/DescribeUI.swift | 68 ++++++++++++------- Sources/AXe/Resources/skills/axe/SKILL.md | 4 +- .../AXe/Utilities/AccessibilityFetcher.swift | 48 +++++++++---- Tests/DescribeUITests.swift | 68 +++++++++++++++++++ Tests/TestUtilities.swift | 66 +++++++++++++++--- 7 files changed, 216 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e15bfd6..f816b2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to the AXe iOS testing framework will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Fixed `describe-ui` to expose and implement the documented `--point` option in command help and runtime behavior ([#38](https://github.com/cameroncooke/AXe/issues/38)) + ## [v1.5.0] - 2026-03-04 ### Added diff --git a/Skills/CLI/axe/SKILL.md b/Skills/CLI/axe/SKILL.md index 19b1167..3ea4e12 100644 --- a/Skills/CLI/axe/SKILL.md +++ b/Skills/CLI/axe/SKILL.md @@ -5,8 +5,8 @@ description: Provides agent-ready AXe CLI usage guidance for iOS Simulator autom ## Step 1: Confirm runtime context 1. Identify simulator UDID target first (`axe list-simulators`). -2. Every AXe command requires `--udid `. -3. Run `axe describe-ui --udid ` to inspect the current screen. Use the output to discover available `--id` and `--label` values for selector taps, and to confirm coordinates for coordinate-based taps. +2. Simulator-interaction AXe commands require `--udid `. Commands like `list-simulators` and `init` do not. +3. Run `axe describe-ui --udid ` to inspect the full current screen. Use `axe describe-ui --point --udid ` to inspect the element at a specific coordinate. Use the output to discover available `--id` and `--label` values for selector taps, and to confirm coordinates for coordinate-based taps. 4. Prefer selector taps (`tap --id` / `tap --label`) over raw coordinates. Selectors are resilient to layout changes, work across device sizes, and support element waiting (`--wait-timeout`) in batch flows. ## Step 2: Choose the right command @@ -20,6 +20,7 @@ axe tap --label --udid axe tap -x -y --udid axe type 'text' --udid axe describe-ui --udid +axe describe-ui --point --udid axe screenshot --udid --output screenshot.png ``` @@ -64,6 +65,7 @@ Batch and individual commands are execution-focused, not assertion-focused. Alwa ```bash axe describe-ui --udid +axe describe-ui --point --udid # or axe screenshot --udid --output post-state.png ``` diff --git a/Sources/AXe/Commands/DescribeUI.swift b/Sources/AXe/Commands/DescribeUI.swift index 34b05e9..9f8b228 100644 --- a/Sources/AXe/Commands/DescribeUI.swift +++ b/Sources/AXe/Commands/DescribeUI.swift @@ -1,7 +1,5 @@ import ArgumentParser import Foundation -import FBControlCore -import FBSimulatorControl struct DescribeUI: AsyncParsableCommand { static let configuration = CommandConfiguration( @@ -11,36 +9,54 @@ struct DescribeUI: AsyncParsableCommand { @Option(name: .customLong("udid"), help: "The UDID of the simulator.") var simulatorUDID: String + @Option( + name: .customLong("point"), + help: ArgumentHelp( + "Describe only the accessibility element at screen coordinates x,y.", + valueName: "x,y" + ) + ) + var point: String? + + func validate() throws { + _ = try parsedPoint() + } + func run() async throws { let logger = AxeLogger() - try await performGlobalSetup(logger: logger) - // Check Xcode availability - do { - let isXcodeAvailable: NSString = try await FutureBridge.value(FBXcodeDirectory.xcodeSelectDeveloperDirectory()) - if isXcodeAvailable.length == 0 { - logger.error().log("Xcode is not available, idb will not be able to use Simulators") - throw CLIError(errorDescription: "Xcode is not available, idb will not be able to use Simulators") - } - } catch { - logger.error().log("Xcode is not available, idb will not be able to use Simulators: \(error.localizedDescription)") - throw CLIError(errorDescription: "Xcode is not available, idb will not be able to use Simulators") - } - - // Load essential frameworks - do { - try FBSimulatorControlFrameworkLoader.essentialFrameworks.loadPrivateFrameworks(logger) - } catch { - logger.info().log("Essential private frameworks failed to loaded.") - throw error - } - - // Fetch accessibility information - let jsonData = try await AccessibilityFetcher.fetchAccessibilityInfoJSONData(for: simulatorUDID, logger: logger) + let jsonData = try await AccessibilityFetcher.fetchAccessibilityInfoJSONData( + for: simulatorUDID, + point: try parsedPoint(), + logger: logger + ) guard let jsonString = String(data: jsonData, encoding: .utf8) else { throw CLIError(errorDescription: "Failed to convert accessibility info to JSON string.") } print(jsonString) } -} + + private func parsedPoint() throws -> AccessibilityPoint? { + guard let point else { + return nil + } + + let coordinates = point + .split(separator: ",", omittingEmptySubsequences: false) + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + + guard coordinates.count == 2, + let x = Double(coordinates[0]), + let y = Double(coordinates[1]), + x.isFinite, + y.isFinite, + x >= 0, + y >= 0 + else { + throw ValidationError("--point must be in the form x,y using non-negative numbers.") + } + + return AccessibilityPoint(x: x, y: y) + } +} diff --git a/Sources/AXe/Resources/skills/axe/SKILL.md b/Sources/AXe/Resources/skills/axe/SKILL.md index c2900be..d5d1bde 100644 --- a/Sources/AXe/Resources/skills/axe/SKILL.md +++ b/Sources/AXe/Resources/skills/axe/SKILL.md @@ -6,7 +6,7 @@ description: Provides agent-ready AXe CLI usage guidance for iOS Simulator autom ## Step 1: Confirm runtime context 1. Identify simulator UDID target first (`axe list-simulators`). 2. Simulator-interaction AXe commands require `--udid `. Commands like `list-simulators` and `init` do not. -3. Run `axe describe-ui --udid ` to inspect the current screen. Use the output to discover available `--id` and `--label` values for selector taps, and to confirm coordinates for coordinate-based taps. +3. Run `axe describe-ui --udid ` to inspect the full current screen. Use `axe describe-ui --point --udid ` to inspect the element at a specific coordinate. Use the output to discover available `--id` and `--label` values for selector taps, and to confirm coordinates for coordinate-based taps. 4. Prefer selector taps (`tap --id` / `tap --label`) over raw coordinates. Selectors are resilient to layout changes, work across device sizes, and support element waiting (`--wait-timeout`) in batch flows. ## Step 2: Choose the right command @@ -20,6 +20,7 @@ axe tap --label --udid axe tap -x -y --udid axe type 'text' --udid axe describe-ui --udid +axe describe-ui --point --udid axe screenshot --udid --output screenshot.png ``` @@ -64,6 +65,7 @@ Batch and individual commands are execution-focused, not assertion-focused. Alwa ```bash axe describe-ui --udid +axe describe-ui --point --udid # or axe screenshot --udid --output post-state.png ``` diff --git a/Sources/AXe/Utilities/AccessibilityFetcher.swift b/Sources/AXe/Utilities/AccessibilityFetcher.swift index c30e01d..e81da5c 100644 --- a/Sources/AXe/Utilities/AccessibilityFetcher.swift +++ b/Sources/AXe/Utilities/AccessibilityFetcher.swift @@ -2,10 +2,23 @@ import Foundation import FBControlCore import FBSimulatorControl +struct AccessibilityPoint: Equatable { + let x: Double + let y: Double + + var cgPoint: CGPoint { + CGPoint(x: x, y: y) + } +} + // MARK: - Accessibility Fetcher @MainActor struct AccessibilityFetcher { - static func fetchAccessibilityInfoJSONData(for simulatorUDID: String, logger: AxeLogger) async throws -> Data { + static func fetchAccessibilityInfoJSONData( + for simulatorUDID: String, + point: AccessibilityPoint? = nil, + logger: AxeLogger + ) async throws -> Data { let simulatorSet = try await getSimulatorSet(deviceSetPath: nil, logger: logger, reporter: EmptyEventReporter.shared) guard let target = simulatorSet.allSimulators.first(where: { $0.udid == simulatorUDID }) else { @@ -13,21 +26,19 @@ struct AccessibilityFetcher { } // FBSimulator conforms to FBAccessibilityCommands. - let accessibilityInfoFuture: FBFuture = target.accessibilityElements(withNestedFormat: true) - let infoAnyObject: AnyObject = try await FutureBridge.value(accessibilityInfoFuture) - - if let nsDict = infoAnyObject as? NSDictionary { - return try JSONSerialization.data(withJSONObject: nsDict, options: [.prettyPrinted]) + let accessibilityInfoFuture: FBFuture + if let point { + accessibilityInfoFuture = target.accessibilityElement(at: point.cgPoint, nestedFormat: true) + } else { + accessibilityInfoFuture = target.accessibilityElements(withNestedFormat: true) } - if let nsArray = infoAnyObject as? NSArray { - return try JSONSerialization.data(withJSONObject: nsArray, options: [.prettyPrinted]) - } - - throw CLIError(errorDescription: "Accessibility info was not a dictionary or array as expected.") + + let infoAnyObject: AnyObject = try await FutureBridge.value(accessibilityInfoFuture) + return try serializeAccessibilityInfo(infoAnyObject) } - + static func fetchAccessibilityElements(for simulatorUDID: String, logger: AxeLogger) async throws -> [AccessibilityElement] { - let jsonData = try await fetchAccessibilityInfoJSONData(for: simulatorUDID, logger: logger) + let jsonData = try await fetchAccessibilityInfoJSONData(for: simulatorUDID, point: nil, logger: logger) let decoder = JSONDecoder() if let roots = try? decoder.decode([AccessibilityElement].self, from: jsonData) { @@ -37,4 +48,15 @@ struct AccessibilityFetcher { let root = try decoder.decode(AccessibilityElement.self, from: jsonData) return [root] } + + private static func serializeAccessibilityInfo(_ accessibilityInfo: AnyObject) throws -> Data { + if let nsDict = accessibilityInfo as? NSDictionary { + return try JSONSerialization.data(withJSONObject: nsDict, options: [.prettyPrinted]) + } + if let nsArray = accessibilityInfo as? NSArray { + return try JSONSerialization.data(withJSONObject: nsArray, options: [.prettyPrinted]) + } + + throw CLIError(errorDescription: "Accessibility info was not a dictionary or array as expected.") + } } diff --git a/Tests/DescribeUITests.swift b/Tests/DescribeUITests.swift index 38381bf..17b07d8 100644 --- a/Tests/DescribeUITests.swift +++ b/Tests/DescribeUITests.swift @@ -1,6 +1,28 @@ import Testing import Foundation +@Suite("Describe UI Command Surface Tests") +struct DescribeUICommandSurfaceTests { + @Test("--point appears in describe-ui --help") + func describeUIHelpIncludesPoint() async throws { + let result = try await TestHelpers.runAxeCommand("describe-ui --help") + #expect(result.output.contains("--point ")) + } + + @Test("--point appears in help describe-ui") + func helpDescribeUIIncludesPoint() async throws { + let result = try await TestHelpers.runAxeCommand("help describe-ui") + #expect(result.output.contains("--point ")) + } + + @Test("Invalid --point format fails with guidance") + func invalidPointFormatFails() async throws { + let result = try await TestHelpers.runAxeCommandAllowFailure("describe-ui --udid invalid --point nope") + #expect(result.exitCode != 0) + #expect(result.output.contains("--point must be in the form x,y using non-negative numbers.")) + } +} + @Suite("Describe UI Command Tests", .serialized, .enabled(if: isE2EEnabled)) struct DescribeUITests { @Test("Basic describe-ui returns valid JSON") @@ -28,4 +50,50 @@ struct DescribeUITests { #expect(uiState.children != nil, "Root element should have children") #expect(uiState.children?.count ?? 0 > 0, "Should have at least one child element") } + + @Test("Describe-ui --point returns the targeted element") + func describeUIAtPoint() async throws { + let simulatorUDID = try TestHelpers.requireSimulatorUDID() + try await TestHelpers.launchPlaygroundApp(to: "tap-test", simulatorUDID: simulatorUDID) + + let uiState = try await TestHelpers.getUIState(simulatorUDID: simulatorUDID) + guard let backButton = UIStateParser.findElement(in: uiState, withIdentifier: "BackButton"), + let frame = backButton.frame + else { + throw TestError.elementNotFound("BackButton with frame was not found in describe-ui output") + } + + let centerX = frame.x + (frame.width / 2) + let centerY = frame.y + (frame.height / 2) + let point = "\(centerX),\(centerY)" + + let result = try await TestHelpers.runAxeCommand( + "describe-ui --point \(point)", + simulatorUDID: simulatorUDID + ) + + let roots = try UIStateParser.parseDescribeUIRoots(result.output) + #expect(roots.count == 1, "Point-based describe-ui should return a single top-level element") + + let targetedElement = try #require(roots.first) + let targetedFrame = try #require(targetedElement.frame) + + #expect(targetedElement.identifier == "BackButton") + #expect(targetedElement.label == "AXe Playground") + #expect(targetedElement.type == "Button") + #expect(targetedElement.role == "AXButton") + #expect(targetedElement.roleDescription == "back button") + #expect(targetedElement.enabled == true) + #expect(targetedElement.contentRequired == false) + #expect(targetedElement.title == nil) + #expect(targetedElement.helpText == nil) + #expect(targetedElement.subrole == nil) + #expect(targetedElement.AXFrame == "{{16, 62}, {44, 44}}") + #expect(targetedElement.children?.isEmpty == true) + #expect(targetedElement.customActions?.isEmpty == true) + #expect(targetedFrame.x == 16) + #expect(targetedFrame.y == 62) + #expect(targetedFrame.width == 44) + #expect(targetedFrame.height == 44) + } } diff --git a/Tests/TestUtilities.swift b/Tests/TestUtilities.swift index f2ec89b..7f9c455 100644 --- a/Tests/TestUtilities.swift +++ b/Tests/TestUtilities.swift @@ -97,11 +97,40 @@ struct UIElement: Codable { let type: String let frame: Frame? let children: [UIElement]? + let role: String? + let enabled: Bool? + let title: String? + let subrole: String? + let contentRequired: Bool? + let roleDescription: String? + let helpText: String? + let AXFrame: String? + let customActions: [String]? // The actual JSON uses AX prefixed fields let AXLabel: String? let AXValue: String? + let AXUniqueId: String? let AXIdentifier: String? + + enum CodingKeys: String, CodingKey { + case type + case frame + case children + case role + case enabled + case title + case subrole + case contentRequired = "content_required" + case roleDescription = "role_description" + case helpText = "help" + case AXFrame + case customActions = "custom_actions" + case AXLabel + case AXValue + case AXUniqueId + case AXIdentifier + } struct Frame: Codable { let x: Double @@ -120,18 +149,15 @@ struct UIElement: Codable { } var identifier: String? { - return AXIdentifier + return AXUniqueId ?? AXIdentifier } } struct UIStateParser { - static func parseDescribeUIOutput(_ jsonString: String) throws -> UIElement { - // The describe-ui command outputs a header "Accessibility Information (JSON):" - // followed by the JSON array. We need to extract just the JSON part. + static func parseDescribeUIRoots(_ jsonString: String) throws -> [UIElement] { var jsonContent = jsonString - // Find the first '[' which marks the start of the JSON array - if let jsonStart = jsonString.firstIndex(of: "[") { + if let jsonStart = jsonString.firstIndex(where: { $0 == "[" || $0 == "{" }) { jsonContent = String(jsonString[jsonStart...]) } @@ -140,8 +166,16 @@ struct UIStateParser { } let decoder = JSONDecoder() - // The output is an array, so decode it and return the first element - let elements = try decoder.decode([UIElement].self, from: data) + if let elements = try? decoder.decode([UIElement].self, from: data) { + return elements + } + + let element = try decoder.decode(UIElement.self, from: data) + return [element] + } + + static func parseDescribeUIOutput(_ jsonString: String) throws -> UIElement { + let elements = try parseDescribeUIRoots(jsonString) guard let firstElement = elements.first else { throw TestError.invalidJSON("No UI elements found") } @@ -181,6 +215,22 @@ struct UIStateParser { element.label?.contains(containing) == true } } + + static func findElement(in roots: [UIElement], matching predicate: (UIElement) -> Bool) -> UIElement? { + for root in roots { + if let element = findElement(in: root, matching: predicate) { + return element + } + } + + return nil + } + + static func findElement(in roots: [UIElement], withIdentifier identifier: String) -> UIElement? { + findElement(in: roots) { element in + element.identifier == identifier + } + } } // MARK: - Test Helpers