Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
797ae4c
Add mobench BrowserStack benchmark pipeline
Apr 3, 2026
85e3e79
Wire Browserstack env into mobile bench CI
Apr 3, 2026
5129b19
Add missing iOS simulator Rust target for CI
Apr 3, 2026
1c52323
Install Rust directly in mobile bench workflow
Apr 3, 2026
7d7ab5c
Fix mobile benchmark artifact builds in CI
Apr 4, 2026
27c4af0
Forward BrowserStack secrets to reusable workflow
Apr 4, 2026
1ef658f
Run PR benchmarks via reusable workflow
Apr 4, 2026
852641b
Fix fork mobile benchmark runners
Apr 4, 2026
52e1a5d
Use portable checks in mobile bench install
Apr 4, 2026
d33dbb5
Add bench-mobile UniFFI exports for CI builds
Apr 4, 2026
f85f034
Fix BrowserStack mobile benchmark recovery
Apr 4, 2026
316951f
Serialize Android mobile benchmark scope
Apr 4, 2026
73c340c
Use mobench 0.1.29 release in CI
Apr 5, 2026
ea7f265
Simplify mobench 0.1.29 release install
Apr 5, 2026
541ceae
Fix mobench 0.1.29 install in Android and summary jobs
Apr 7, 2026
9ecee23
Use short prove-only profile for mobile CI
Apr 8, 2026
c14fac5
Fix mobile bench empty-sample CI handling
Apr 9, 2026
6cea0e5
Fix mobench patch application in CI
Apr 9, 2026
3c27dc9
Clarify mobench 0.1.30 upstream requirements
Apr 9, 2026
c61852c
Accept nested mobench summary layout in validation
Apr 9, 2026
927b69e
Fix iOS embedded bench spec path
Apr 9, 2026
8ce085d
Handle nested summary layout in summary job
Apr 9, 2026
e74d28e
Fix iOS mobench spec propagation
Apr 9, 2026
8206aae
Fix mobench install assertions in summary jobs
Apr 9, 2026
ecf9105
Restore iOS mobench resource metrics
Apr 9, 2026
d21f468
Preserve raw iOS peak memory in summaries
Apr 9, 2026
210837a
Add mobench 0.1.30 upgrade agent brief
Apr 10, 2026
b661dd0
Add worst BrowserStack device profile
Apr 11, 2026
03320df
Adjust worst iOS device profile
Apr 11, 2026
7937d66
Use oldest viable iOS worst profile
Apr 11, 2026
931bb7a
Rewrite mobench 0.1.30 upstream brief
Apr 11, 2026
df8cbda
Upgrade mobench to 0.1.30
Apr 11, 2026
33c6219
Adjust mobench device profiles
Apr 11, 2026
7515da0
Use iPhone SE 2020 for worst iOS bench
Apr 11, 2026
5fc783b
Validate mobench CSV resource metrics
Apr 12, 2026
f5e4d58
Use upstream mobench summary rendering
Apr 12, 2026
9aee147
Pin mobench CPU total formatting fix
Apr 12, 2026
c3286c2
fix mobile bench reporting and CI fetch retries
Apr 13, 2026
499c351
fix mobench 0.1.31 workflow compatibility
Apr 13, 2026
487ee0b
fix mobench android abi selection
Apr 13, 2026
08d56dd
fix: align mobile bench workflow with mobench compatibility
Apr 14, 2026
15019d6
ci: default mobile benchmarks to mobench 0.1.32
Apr 14, 2026
bdffd56
ci: default mobile benchmarks to mobench 0.1.33
Apr 17, 2026
73df4bc
chore: bump mobench to 0.1.34
Apr 23, 2026
767a8e2
chore: bump mobench to 0.1.35
Apr 24, 2026
843fb6b
chore: test mobench 0.1.36 branch
Apr 27, 2026
776cbce
chore: repin mobench harness fix
Apr 27, 2026
68ae241
chore: update mobench integration to 0.1.37
Apr 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
317 changes: 317 additions & 0 deletions .github/scripts/validate_mobile_bench_outputs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
#!/usr/bin/env bash

set -euo pipefail

if [ "$#" -ne 3 ]; then
echo "usage: $0 <platform> <results-dir> <browserstack-dir>" >&2
exit 2
fi

platform="$1"
results_dir="$2"
browserstack_dir="$3"
failed=0
device_summaries_count=0
csv_data_rows=0
recovered_payloads=0
spec_matches_requested=1
requested_spec='{}'
actual_specs='[]'
benchmarks_with_resource_usage=0
benchmarks_missing_resource_usage=0
csv_rows_with_resource_usage=0
csv_rows_missing_resource_usage=0

error() {
echo "::error::$1"
failed=1
}

warn() {
echo "::warning::$1"
}

first_match() {
local search_root="$1"
local name="$2"
find "$search_root" -type f -name "$name" 2>/dev/null | sort | head -1
}

has_valid_bench_payload() {
local report_path="$1"
jq -e '
def valid_result:
((.function? // .spec?.name?) != null)
and (
((.samples? // []) | length) > 0
or (.mean_ns? != null)
or (.median_ns? != null)
or (.p95_ns? != null)
or (.min_ns? != null)
or (.max_ns? != null)
);

if type == "array" then
any(.[]; valid_result)
else
valid_result
end
' "$report_path" >/dev/null 2>&1
}

echo "Inspecting ${platform} results"
echo " results_dir=${results_dir}"
echo " browserstack_dir=${browserstack_dir}"

summary_json="$(first_match "$results_dir" summary.json)"
results_csv="$(first_match "$results_dir" results.csv)"

if [ -n "$summary_json" ]; then
device_summaries_count="$(
jq -r '
[
((.device_summaries // []) | length),
((.summary?.device_summaries // []) | length)
] | max
' "$summary_json"
)"
requested_spec="$(
jq -c '
{
function: (.spec.function // ""),
iterations: (.spec.iterations // -1),
warmup: (.spec.warmup // -1)
}
' "$summary_json"
)"
actual_specs="$(
jq -c '
[
(.benchmark_results // {})
| to_entries[]?
| .value[]?
| {
function: (.function // .spec?.name // ""),
iterations: (.spec?.iterations // .iterations // -1),
warmup: (.spec?.warmup // .warmup // -1)
}
] | unique
' "$summary_json"
)"
if ! jq -e '
def requested:
{
function: (.spec.function // ""),
iterations: (.spec.iterations // -1),
warmup: (.spec.warmup // -1)
};
def actual_specs:
[
(.benchmark_results // {})
| to_entries[]?
| .value[]?
| {
function: (.function // .spec?.name // ""),
iterations: (.spec?.iterations // .iterations // -1),
warmup: (.spec?.warmup // .warmup // -1)
}
] | unique;
requested as $requested
| actual_specs as $actual
| ($actual | length) > 0
and all($actual[]; . == $requested)
' "$summary_json" >/dev/null; then
spec_matches_requested=0
fi
echo " summary_json=${summary_json}"
echo " summary_device_summaries=${device_summaries_count}"
echo " requested_spec=${requested_spec}"
echo " actual_specs=${actual_specs}"
benchmarks_with_resource_usage="$(
jq -r '
[
((.summary?.device_summaries // .device_summaries // [])[]?.benchmarks[]?)
| select(
(.resource_usage?.cpu_total_ms // null) != null
and (.resource_usage?.peak_memory_kb // null) != null
)
] | length
' "$summary_json"
)"
benchmarks_missing_resource_usage="$(
jq -r '
[
((.summary?.device_summaries // .device_summaries // [])[]?.benchmarks[]?)
| select(
(.resource_usage?.cpu_total_ms // null) == null
or (.resource_usage?.peak_memory_kb // null) == null
)
] | length
' "$summary_json"
)"
echo " benchmarks_with_resource_usage=${benchmarks_with_resource_usage}"
echo " benchmarks_missing_resource_usage=${benchmarks_missing_resource_usage}"
else
warn "${platform}: summary.json was not found under ${results_dir}"
fi

if [ -n "$results_csv" ]; then
csv_line_count="$(wc -l < "$results_csv" | tr -d ' ')"
if [ "$csv_line_count" -gt 0 ]; then
csv_data_rows=$((csv_line_count - 1))
fi
read -r csv_rows_with_resource_usage csv_rows_missing_resource_usage < <(
python3 - "$results_csv" <<'PY'
import csv
import sys

path = sys.argv[1]
with_usage = 0
missing = 0

with open(path, newline="", encoding="utf-8") as handle:
reader = csv.DictReader(handle)
for row in reader:
cpu_total_ms = (row.get("cpu_total_ms") or "").strip()
peak_memory_kb = (row.get("peak_memory_kb") or "").strip()
if cpu_total_ms and peak_memory_kb:
with_usage += 1
else:
missing += 1

print(with_usage, missing)
PY
)
echo " results_csv=${results_csv}"
echo " csv_data_rows=${csv_data_rows}"
echo " csv_rows_with_resource_usage=${csv_rows_with_resource_usage}"
echo " csv_rows_missing_resource_usage=${csv_rows_missing_resource_usage}"
else
warn "${platform}: results.csv was not found under ${results_dir}"
fi

build_found=0
has_incomplete_browserstack_state=0

while IFS= read -r build_json; do
[ -n "$build_json" ] || continue
build_found=1
build_dir="$(dirname "$build_json")"
build_id="$(jq -r '.build_id // .id // "unknown"' "$build_json")"
build_status="$(jq -r '.status // "unknown"' "$build_json")"
build_status_lc="$(printf '%s' "$build_status" | tr '[:upper:]' '[:lower:]')"

echo " browserstack_build id=${build_id} status=${build_status} dir=${build_dir}"

case "$build_status_lc" in
running|failed|error|timeout|timedout)
has_incomplete_browserstack_state=1
;;
esac

while IFS=$'\t' read -r session_id device_name session_status; do
[ -n "$session_id" ] || continue

session_dir="${build_dir}/session-${session_id}"
session_json="${session_dir}/session.json"
testcase_status='{}'
testcase_problem_count=0
payload_found=false

if [ -f "$session_json" ]; then
testcase_status="$(jq -c '.testcases.status // {}' "$session_json")"
testcase_problem_count="$(
jq -r '(
(.testcases.status.running // 0)
+ (.testcases.status.failed // 0)
+ (.testcases.status.error // 0)
+ (.testcases.status.timedout // 0)
)' "$session_json"
)"
fi

bench_report="${session_dir}/bench-report.json"
if [ -f "$bench_report" ] && has_valid_bench_payload "$bench_report"; then
payload_found=true
recovered_payloads=$((recovered_payloads + 1))
fi

echo " browserstack_session device=${device_name} session=${session_id} status=${session_status} testcase_status=${testcase_status} payload=${payload_found}"

session_status_lc="$(printf '%s' "$session_status" | tr '[:upper:]' '[:lower:]')"
case "$session_status_lc" in
running|failed|error|timeout|timedout)
has_incomplete_browserstack_state=1
;;
esac

if [ "$testcase_problem_count" -gt 0 ]; then
has_incomplete_browserstack_state=1
fi
done < <(
jq -r '
(.devices // [])[]? as $device
| ($device.device // $device.name // "unknown") as $device_name
| ($device.os_version // "") as $os_version
| ($device.sessions // [])[]?
| [
(.id // .session_id // .sessionId // ""),
($device_name + (if $os_version == "" then "" else "-" + $os_version end)),
(.status // "unknown")
]
| @tsv
' "$build_json"
)
done < <(find "$browserstack_dir" -type f -name build.json 2>/dev/null | sort)

echo " recovered_benchmark_payloads=${recovered_payloads}"

if [ -z "$summary_json" ]; then
error "${platform}: summary.json was not produced"
fi

if [ "$device_summaries_count" -le 0 ]; then
error "${platform}: summary.json has no device_summaries"
fi

if [ "$spec_matches_requested" -eq 0 ]; then
error "${platform}: benchmark results do not match requested spec ${requested_spec}; actual ${actual_specs}"
fi

if [ -z "$results_csv" ]; then
error "${platform}: results.csv was not produced"
fi

if [ "$csv_data_rows" -le 0 ]; then
error "${platform}: results.csv has no benchmark data rows"
fi

if [ "$csv_rows_with_resource_usage" -le 0 ]; then
error "${platform}: no results.csv rows reported both cpu_total_ms and peak_memory_kb"
fi

if [ "$csv_rows_missing_resource_usage" -gt 0 ]; then
error "${platform}: ${csv_rows_missing_resource_usage} results.csv row(s) were missing cpu_total_ms or peak_memory_kb"
fi

if [ "$benchmarks_with_resource_usage" -le 0 ]; then
error "${platform}: no benchmark rows reported both cpu_total_ms and peak_memory_kb"
fi

if [ "$benchmarks_missing_resource_usage" -gt 0 ]; then
error "${platform}: ${benchmarks_missing_resource_usage} benchmark row(s) were missing cpu_total_ms or peak_memory_kb"
fi

if [ "$build_found" -eq 0 ]; then
error "${platform}: no BrowserStack build.json artifacts were fetched"
fi

if [ "$has_incomplete_browserstack_state" -ne 0 ]; then
error "${platform}: BrowserStack build/session state is incomplete or failed"
fi

if [ "$recovered_payloads" -le 0 ]; then
warn "${platform}: no bench-report.json payloads were recovered from fetched BrowserStack artifacts"
fi

exit "$failed"
Loading
Loading