diff --git a/benchmark/MLPerfTiny_Rules.adoc b/benchmark/MLPerfTiny_Rules.adoc index df39cb07..44529521 100644 --- a/benchmark/MLPerfTiny_Rules.adoc +++ b/benchmark/MLPerfTiny_Rules.adoc @@ -177,9 +177,16 @@ The suite includes the following benchmarks: | Visual Wake Words | Binary image classification | Visual Wake Words Dataset | MobileNet | 80% (Top 1) | Image Classification | Small image classification | Cifar10 | ResNet | 85% (Top 1) | Anomaly Detection | Detecting anomalies in machine operating sounds | ToyADMOS | Deep AutoEncoder | 0.85 (AUC) -| Streaming Wakeword | Detecting wakewords in a continuous stream of audio| Custom | 1D DS-CNN | TBD +| Streaming Wakeword | Detecting wakewords in a continuous stream of audio| Custom | 1D DS-CNN | <= 8 FP, <= 8 FN |=== + +For the quality target, keyword spotting, visual wakewords, and image classification all use top-1 accuracy as the key metric. Anomaly detection +uses the area under the ROC curve (true positive rate vs false positive rate), as computed by +https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html[sklearn.metrics.roc_auc_score]. + +The streaming wakeword benchmark uses a combination of false positives and false negatives, requiring no more than 8 of either. + ==== Relaxed constraints for the Open division 1. An Open benchmark must perform a task matching an existing Closed benchmark, and be substitutable in LoadGen for that benchmark. @@ -193,7 +200,8 @@ The suite includes the following benchmarks: === EnergyRunner™ benchmark framework -The benchmark suite is run using the EnergyRunner™ benchmark framework from EEMBC, which detects the DUT, sends inputs, and reads outputs over UART. The EEMBC runner is being phased out. It will be permitted for teh KWS, VWW, IC, and AD benchmarks in the summer 2015 submission. After that, only the MLCommons Runner will be permitted. The EEMBC runner does not support the streaming wakeword benchmark. + +The benchmark suite is run using the EnergyRunner™ benchmark framework from EEMBC, which detects the DUT, sends inputs, and reads outputs over UART. The EEMBC runner is being phased out. It will be permitted for the KWS, VWW, IC, and AD benchmarks in the summer 2025 submission. After that, only the MLCommons Runner will be permitted. The EEMBC runner does not support the streaming wakeword benchmark. The EEMBC runner is available here: https://github.com/eembc/energyrunner The MLCommons runner is available in this repository: https://github.com/mlcommons/tiny/tree/master/benchmark/runner diff --git a/benchmark/runner/README.md b/benchmark/runner/README.md index 86af4b71..ffe879f5 100644 --- a/benchmark/runner/README.md +++ b/benchmark/runner/README.md @@ -164,6 +164,25 @@ The device file defines available devices that are automatically detected by the - **`usb`**: `dict` where the key is `vid` and the value is a `pid` or a list of `pid`s. - **`usb_description`**: A string used to match the USB description. + +#### Adding a New Device +You can use the PySerial module's list_ports function to get the VID and PID of a device as long as it presents as a serial interface +``` +jeremy@macbook-pro-16%>python -m serial.tools.list_ports -v +/dev/cu.Bluetooth-Incoming-Port + desc: n/a + hwid: n/a +/dev/cu.usbmodem1403 <<==== This is the reference DUT + desc: STLINK-V3 + hwid: USB VID:PID=0483:374E SER=005300313532511531333430 LOCATION=0-1.4 +/dev/cu.usbmodem2061398A4D431 <<==== This is the LPM05a power monitor + desc: PowerShield (Virtual ComPort in FS Mode) + hwid: USB VID:PID=0483:5740 SER=2061398A4D43 LOCATION=1-1 +/dev/cu.wlan-debug + desc: n/a + hwid: n/a +4 ports found +``` --- ### Device Under Test Configuration `dut.yml` @@ -272,3 +291,9 @@ If the I2S transfer appears not to be working, here are a few things to try. ### Baud Rate for Interface board: Located in file /application/user/core/usart.c +<<<<<<< HEAD +======= +### A device with vid:pid XX:YY failed to provide a serial number. +In some cases, multiple devices may have the same VID and PID. For example, on an MCU development board, the VID/PID may be linked to the vendors debugger/programmer (e.g. ST-Link) rather than to the development board specifically. To avoid +Workaround: Use a USB-serial converter so that the offending device presents with a different VID:PID. +>>>>>>> streaming_ww_dev diff --git a/benchmark/runner/device_manager.py b/benchmark/runner/device_manager.py index b936940c..2c17560b 100644 --- a/benchmark/runner/device_manager.py +++ b/benchmark/runner/device_manager.py @@ -17,7 +17,7 @@ def precheck_device_name(dev_cfg, serial_device, mode): return True. If the device on does not respond to the "name%" command, or responds but the name does not match check_name return False. If the response matches check_name, return True. - Note that this function uses teh 'check_name' property, not 'name', which + Note that this function uses the 'check_name' property, not 'name', which is mostly arbitrary ** Arguments: - dev_cfg: device configuration dict from devices.yaml @@ -129,10 +129,8 @@ def scan(self): """Scan for both serial and USB-only devices and initialize them.""" pending_serial = [p for p in list_ports.comports(True) if p.vid] matched = [] - comport_serial_numbers = [] for p in pending_serial: - comport_serial_numbers.append(p.serial_number) for d in self._device_defs: found = False for vid, pids in d.get("usb", {}).items(): @@ -154,12 +152,16 @@ def scan(self): # Additional scan for USB-only devices (non-serial) all_usb = usb.core.find(find_all=True) for dev in all_usb: - if dev.serial_number in comport_serial_numbers: - # we already handled this device in the loop on list_ports.comports() - continue vid = dev.idVendor pid = dev.idProduct + for d in self._device_defs: + if d.get("interface", "") != "direct_usb": + # this association logic is only for direct (non-serial) devices, like the JS-220. + # so skip it if interface is unspecified or not "direct_usb" + # Without this block, a VID/PID match that has been previously rejected based on + # "name" mismatch can be incorrectly associated here. + continue for k, v in d.get("usb", {}).items(): if isinstance(v, list): if pid in v and vid == k: diff --git a/benchmark/runner/device_under_test.py b/benchmark/runner/device_under_test.py index c8aab9b9..54333118 100644 --- a/benchmark/runner/device_under_test.py +++ b/benchmark/runner/device_under_test.py @@ -40,6 +40,7 @@ def _retry(self, method, retries=3): def _get_name(self): name_retrieved = False + print("Retrieving name from DUT ...") for l in self._port.send_command("name"): match = re.match(r'^m-(name)-dut-\[([^]]+)]$', l) if match: diff --git a/benchmark/runner/devices_ad.yaml b/benchmark/runner/devices_ad.yaml index d0b665ee..31b89299 100644 --- a/benchmark/runner/devices_ad.yaml +++ b/benchmark/runner/devices_ad.yaml @@ -33,6 +33,7 @@ 0x0483: 0x374B - name: js220 type: power + interface: direct_usb preference: 1 # set to higher preference thatn lpm01a to use js220 raw_sampling_rate: 1000000 virtual_sampling_rate: 1000 diff --git a/benchmark/runner/devices_kws_ic_vww.yaml b/benchmark/runner/devices_kws_ic_vww.yaml index c8c76905..8a4c2264 100644 --- a/benchmark/runner/devices_kws_ic_vww.yaml +++ b/benchmark/runner/devices_kws_ic_vww.yaml @@ -33,6 +33,7 @@ 0x0483: 0x374B - name: js220 type: power + interface: direct_usb preference: 1 # set to higher preference thatn lpm01a to use js220 raw_sampling_rate: 1000000 virtual_sampling_rate: 1000 diff --git a/benchmark/runner/devices_sww.yaml b/benchmark/runner/devices_sww.yaml index f17d69dc..7bd18fdb 100644 --- a/benchmark/runner/devices_sww.yaml +++ b/benchmark/runner/devices_sww.yaml @@ -33,6 +33,7 @@ 0x0483: 0x374B - name: js220 type: power + interface: direct_usb preference: 1 # set to higher preference thatn lpm01a to use js220 raw_sampling_rate: 1000000 virtual_sampling_rate: 1000 diff --git a/benchmark/runner/img/L4R5Zi.png b/benchmark/runner/img/L4R5Zi.png index 809418cd..fb9a838d 100644 Binary files a/benchmark/runner/img/L4R5Zi.png and b/benchmark/runner/img/L4R5Zi.png differ diff --git a/benchmark/runner/main.py b/benchmark/runner/main.py index 24f9b531..2445311b 100644 --- a/benchmark/runner/main.py +++ b/benchmark/runner/main.py @@ -203,7 +203,6 @@ def print_energy_results(l_results, energy_sampling_freq=1000, req_cycles=5, res total_inference_energy = np.sum(inference_energy_samples) num_inferences = res['infer']['iterations'] energy_per_inf = total_inference_energy / num_inferences - latency_per_inf = elapsed_time / num_inferences inf_energies[inf_num] = energy_per_inf inf_times[inf_num] = elapsed_time @@ -226,6 +225,7 @@ def print_energy_results(l_results, energy_sampling_freq=1000, req_cycles=5, res # Summarize results def summarize_result(result, power, mode, results_file=None): + print(20*'-') num_correct_files = 0 total_files = 0 y_pred = [] @@ -252,7 +252,7 @@ def summarize_result(result, power, mode, results_file=None): print_energy_results(result, energy_sampling_freq=1000, results_file=results_file) return - for r in result: + for res_num,r in enumerate(result): if 'infer' not in r or 'class' not in r or 'file' not in r: continue # Skip malformed or error-only entries infer_data = r['infer'] @@ -266,7 +266,13 @@ def summarize_result(result, power, mode, results_file=None): if 'throughput' in infer_data: throughput_values.append(infer_data['throughput']) - + print_tee(f"Performance results for window {res_num+1}", outfile=results_file) + print_tee(f" # Inferences : {infer_data['iterations']}", outfile=results_file) + print_tee(f" Runtime: {infer_data['elapsed_time']/1e6} sec.", outfile=results_file) + print_tee(f" Throughput: {infer_data['throughput']} inf./sec.", outfile=results_file) + if infer_data['elapsed_time']/1e6 > 10.0: + print_tee(f" Runtime requirements have been met.", outfile=results_file) + if file_name not in file_infer_results: file_infer_results[file_name] = {'true_class': true_class, 'results': []} @@ -307,8 +313,11 @@ def summarize_result(result, power, mode, results_file=None): total_files += 1 accuracy = calculate_accuracy(np.array(y_pred), np.array(y_true)) - auc = roc_auc_score(np.array(y_true), np.array(y_pred), multi_class='ovr') - + + if np.array(y_pred).shape[1] == 2: + auc =roc_auc_score(np.array(y_true), np.array(y_pred)[:,1]) + else: + auc =roc_auc_score(np.array(y_true), np.array(y_pred), multi_class='ovr') current_time = datetime.now() formatted_time = current_time.strftime("%m%d.%H%M%S ") diff --git a/benchmark/runner/power_manager/power_manager_js220.py b/benchmark/runner/power_manager/power_manager_js220.py index 67fb936e..289e70f8 100644 --- a/benchmark/runner/power_manager/power_manager_js220.py +++ b/benchmark/runner/power_manager/power_manager_js220.py @@ -1,375 +1,380 @@ -import time -from joulescope import scan -import numpy as np - -class JS220PortWrapper: - def __init__(self, device): - self._device = device - - def __enter__(self): - return self - - def __exit__(self, *args): - return self - - def close(self): - return self._device.close() - - def write_line(self, line): - pass - - def read_line(self, timeout=None): - return None - - -class JoulescopeCommands: - def __init__(self, manager, js_device, config=None): - self.m = manager - self._device = js_device - self._triggered = False - self._last_sample_id = None - self._last_gpi = None - js_config = config or {} - self.raw_rate = int(js_config.get("raw_sampling_rate", 1000000)) - self.virtual_rate = int(js_config.get("virtual_sampling_rate", 1000)) - self.emit_stride = max(1, self.raw_rate // self.virtual_rate) - - self._device.parameter_set("source", "raw") - self._device.parameter_set("sensor_power", "on") - self._device.parameter_set("sampling_frequency", self.raw_rate) - self._device.parameter_set("io_voltage", "3.3V") - self._device.parameter_set("trigger_source", "gpi0") - self._device.parameter_set("current_lsb", "gpi0") - - try: - self._device.start() - except Exception as e: - print(f"[JS220] Failed to start device: {e}") - - def get_port(self): - return JS220PortWrapper(self._device) - - def setup(self): - pass - - def tear_down(self): - print("[JS220] Shutting down stream...") - self.m._running = False - try: - self._device.stop() - except Exception: - pass - try: - self._device.close() - except Exception: - pass - print("[JS220] Shutdown complete.") - - def read_loop(self): - sb = self._device.stream_buffer - print("[JS220] Stream reading started (trigger mimic)...") - - current_buffer = [] - voltage_buffer = [] - timestamp_buffer = [] - - ts_counter = 0 - last_emit_time = time.time() - event_counter = 0 - - while self.m._running: - - sample_id_range = sb.sample_id_range - if sample_id_range is None: - time.sleep(0.001) - continue - - start_id, end_id = sample_id_range - if self._last_sample_id is None or end_id < self._last_sample_id: - self._last_sample_id = start_id - - if end_id <= self._last_sample_id: - time.sleep(0.001) - continue - - try: - data = sb.samples_get(self._last_sample_id, end_id, fields=["current", "voltage", "current_lsb"]) - except ValueError: - self._last_sample_id = None - continue - - current = data["signals"]["current"]["value"] - voltage = data["signals"]["voltage"]["value"] - gpi0_vals = data["signals"]["current_lsb"]["value"] - t0 = time.time() - count = min(len(current), len(voltage), len(gpi0_vals)) - self._last_sample_id = end_id - - for i in range(count): - current_buffer.append(current[i]) - voltage_buffer.append(voltage[i]) - timestamp_buffer.append(t0) - - gpi = int(gpi0_vals[i] > 0) - if self._last_gpi is None: - self._last_gpi = gpi - continue - - # ✅ Emit event string (on rising edge only) - if self._triggered: - if self._last_gpi == 0 and gpi == 1: - event_line = f"event {event_counter:02} ris" - self.m._data_queue.put(event_line) - event_counter += 1 - self._triggered = False - else: - if self._last_gpi == 1 and gpi == 0: - self._triggered = True - - self._last_gpi = gpi - - if len(current_buffer) >= self.emit_stride: - avg_time = float(np.mean(timestamp_buffer)) - # ✅ Compute energy over 1ms: E = I * V * dt - energy = float(np.mean([i * v * 0.001 for i, v in zip(current_buffer, voltage_buffer)])) - self.m._data_queue.put(energy) - current_buffer.clear() - voltage_buffer.clear() - timestamp_buffer.clear() - - print("[JS220] Stream reading loop exited.") - - - - - def power_on(self): - return True - - def power_off(self): - return True - - def start(self): - return True - - def stop(self): - return True - - def get_board_id(self): - return self._device.device_path() - - def get_version(self): - try: - return self._device.info().get("fw") - except Exception: - return None - - def get_status(self): - return None - - def set_lcd(self, *args): - return [None, None] - - - - -""" -################################################################################ -# Joulescope Commands # -################################################################################ -# Parameter: sensor_power -# Options: -# [0] ('off', 0, []) -# [1] ('on', 1, []) - -# Parameter: source -# Options: -# [0] ('off', 0, []) -# [1] ('raw', 192, ['on']) -# [2] ('pattern_usb', 9, []) -# [3] ('pattern_control', 10, []) -# [4] ('pattern_sensor', 175, []) - -# Parameter: i_range -# Options: -# [0] ('auto', 128, ['on']) -# [1] ('10 A', 1, ['0', 0]) -# [2] ('2 A', 2, ['1', 1]) -# [3] ('180 mA', 4, ['2', 2]) -# [4] ('18 mA', 8, ['3', 3]) -# [5] ('1.8 mA', 16, ['4', 4]) -# [6] ('180 µA', 32, ['5', 5]) -# [7] ('18 µA', 64, ['6', 6]) -# [8] ('off', 0, []) - -# Parameter: v_range -# Options: -# [0] ('15V', 0, ['low', 0]) -# [1] ('5V', 1, ['high', 1]) - -# Parameter: ovr_to_lsb -# Options: -# [0] ('off', 0, []) -# [1] ('on', 1, []) - -# Parameter: trigger_source -# Options: -# [0] ('auto', 0, []) -# [1] ('gpi0', 2, []) -# [2] ('gpi1', 3, []) - -# Parameter: io_voltage -# Options: -# [0] ('1.8V', 1800, []) -# [1] ('2.1V', 2100, []) -# [2] ('2.5V', 2500, []) -# [3] ('2.7V', 2700, []) -# [4] ('3.0V', 3000, []) -# [5] ('3.3V', 3300, []) -# [6] ('3.6V', 3600, []) -# [7] ('5.0V', 5000, []) - -# Parameter: gpo0 -# Options: -# [0] ('0', 0, [0]) -# [1] ('1', 1, [1]) - -# Parameter: gpo1 -# Options: -# [0] ('0', 0, [0]) -# [1] ('1', 1, [1]) - -# Parameter: current_lsb -# Options: -# [0] ('normal', 0, []) -# [1] ('gpi0', 2, []) -# [2] ('gpi1', 3, []) - -# Parameter: voltage_lsb -# Options: -# [0] ('normal', 0, []) -# [1] ('gpi0', 2, []) -# [2] ('gpi1', 3, []) - -# Parameter: control_test_mode -# Options: -# [0] ('normal', 3, []) -# [1] ('usb', 129, []) -# [2] ('fpga', 130, []) -# [3] ('both', 131, []) - -# Parameter: transfer_length -# Options: -# [0] ('1', 1, []) -# [1] ('2', 2, []) -# [2] ('4', 4, []) -# [3] ('8', 8, []) -# [4] ('16', 16, []) -# [5] ('32', 32, []) -# [6] ('64', 64, []) -# [7] ('128', 128, []) -# [8] ('256', 256, []) - -# Parameter: transfer_outstanding -# Options: -# [0] ('1', 1, []) -# [1] ('2', 2, []) -# [2] ('4', 4, []) -# [3] ('8', 8, []) - -# Parameter: current_ranging -# Current Value: interp_1_n_1 - -# Parameter: current_ranging_type -# Options: -# [0] ('off', 'off', []) -# [1] ('mean', 'mean', []) -# [2] ('interp', 'interp', ['interpolate']) -# [3] ('NaN', 'nan', ['nan']) - -# Parameter: current_ranging_samples_pre -# Options: -# [0] ('0', 0, [0]) -# [1] ('1', 1, [1]) -# [2] ('2', 2, [2]) -# [3] ('3', 3, [3]) -# [4] ('4', 4, [4]) -# [5] ('5', 5, [5]) -# [6] ('6', 6, [6]) -# [7] ('7', 7, [7]) -# [8] ('8', 8, [8]) - -# Parameter: current_ranging_samples_window -# Options: -# [0] ('m', 'm', []) -# [1] ('n', 'n', []) -# [2] ('0', 0, [0]) -# [3] ('1', 1, [1]) -# [4] ('2', 2, [2]) -# [5] ('3', 3, [3]) -# [6] ('4', 4, [4]) -# [7] ('5', 5, [5]) -# [8] ('6', 6, [6]) -# [9] ('7', 7, [7]) -# [10] ('8', 8, [8]) -# [11] ('9', 9, [9]) -# [12] ('10', 10, [10]) -# [13] ('11', 11, [11]) -# [14] ('12', 12, [12]) - -# Parameter: current_ranging_samples_post -# Options: -# [0] ('0', 0, [0]) -# [1] ('1', 1, [1]) -# [2] ('2', 2, [2]) -# [3] ('3', 3, [3]) -# [4] ('4', 4, [4]) -# [5] ('5', 5, [5]) -# [6] ('6', 6, [6]) -# [7] ('7', 7, [7]) -# [8] ('8', 8, [8]) - -# Parameter: buffer_duration -# Options: -# [0] ('15 seconds', 15, [15]) -# [1] ('30 seconds', 30, [30]) -# [2] ('1 minute', 60, [60]) -# [3] ('2 minutes', 120, [120]) -# [4] ('5 minutes', 300, [300]) -# [5] ('10 minutes', 600, [600]) -# [6] ('20 minutes', 1200, [1200]) -# [7] ('1 hour', 3600, [3600]) -# [8] ('2 hours', 7200, [7200]) -# [9] ('5 hours', 18000, [18000]) -# [10] ('10 hours', 36000, [36000]) -# [11] ('1 day', 86400, [86400]) - -# Parameter: reduction_frequency -# Options: -# [0] ('100 Hz', 100, [100]) -# [1] ('50 Hz', 50, [50]) -# [2] ('20 Hz', 20, [20]) -# [3] ('10 Hz', 10, [10]) -# [4] ('5 Hz', 5, [5]) -# [5] ('2 Hz', 2, [2]) -# [6] ('1 Hz', 1, [1]) - -# Parameter: sampling_frequency -# Options: -# [0] ('2 MHz', 2000000, [2000000, 'auto', None, 'default']) -# [1] ('1 MHz', 1000000, [1000000]) -# [2] ('500 kHz', 500000, [500000]) -# [3] ('200 kHz', 200000, [200000]) -# [4] ('100 kHz', 100000, [100000]) -# [5] ('50 kHz', 50000, [50000]) -# [6] ('20 kHz', 20000, [20000]) -# [7] ('10 kHz', 10000, [10000]) -# [8] ('5 kHz', 5000, [5000]) -# [9] ('2 kHz', 2000, [2000]) -# [10] ('1 kHz', 1000, [1000]) -# [11] ('500 Hz', 500, [500]) -# [12] ('200 Hz', 200, [200]) -# [13] ('100 Hz', 100, [100]) -# [14] ('50 Hz', 50, [50]) -# [15] ('20 Hz', 20, [20]) -# [16] ('10 Hz', 10, [10]) +import time +from joulescope import scan +import numpy as np + + +class JS220PortWrapper: + def __init__(self, device): + self._device = device + + def __enter__(self): + return self + + def __exit__(self, *args): + return self + + def close(self): + return self._device.close() + + def write_line(self, line): + pass + + def read_line(self, timeout=None): + return None + + +class JoulescopeCommands: + def __init__(self, manager, js_device, config=None): + self.m = manager + self._device = js_device + self._triggered = False + self._last_sample_id = None + self._last_gpi = None + self._shutdown_in_progress = False + + js_config = config or {} + self.raw_rate = int(js_config.get("raw_sampling_rate", 1000000)) + self.virtual_rate = int(js_config.get("virtual_sampling_rate", 1000)) + self.emit_stride = max(1, self.raw_rate // self.virtual_rate) + + self._device.parameter_set("source", "raw") + self._device.parameter_set("sensor_power", "on") + self._device.parameter_set("sampling_frequency", self.raw_rate) + self._device.parameter_set("io_voltage", "3.3V") + self._device.parameter_set("trigger_source", "gpi0") + self._device.parameter_set("current_lsb", "gpi0") + + try: + self._device.start() + except Exception as e: + print(f"[JS220] Failed to start device: {e}") + + def get_port(self): + return JS220PortWrapper(self._device) + + def setup(self): + pass + + def tear_down(self): + print("[JS220] Shutting down stream...") + self._shutdown_in_progress = True + self.m._running = False + + # Let read loop catch the stop signal + time.sleep(0.05) + + try: + self._device.stop() + except Exception: + pass + + try: + self._device.close() + except Exception: + pass + + print("[JS220] Shutdown complete.") + + def read_loop(self): + print("[JS220] Stream reading started...") + self._last_sample_id = None + self._last_gpi = None + event_counter = 0 + + energy_acc = 0.0 + sample_counter = 0 + pending_event_in_stride = False + + try: + while self.m._running and not self._shutdown_in_progress: + sb = self._device.stream_buffer + if sb is None: + break + + sample_id_range = sb.sample_id_range + if sample_id_range is None: + time.sleep(0.001) + continue + + start_id, end_id = sample_id_range + + if self._last_sample_id is None or self._last_sample_id < start_id: + self._last_sample_id = start_id + + if self._last_sample_id >= end_id: + time.sleep(0.0005) + continue + + try: + data = sb.samples_get( + self._last_sample_id, + end_id, + fields=["current", "voltage", "current_lsb"] + ) + except ValueError as e: + print(f"[JS220] Sample fetch failed: {e}") + self._last_sample_id = end_id + continue + + self._last_sample_id = end_id + current = data["signals"]["current"]["value"] + voltage = data["signals"]["voltage"]["value"] + gpi0_vals = data["signals"]["current_lsb"]["value"] + + count = min(len(current), len(voltage), len(gpi0_vals)) + + for i in range(count): + cur = current[i] + volt = voltage[i] + gpi = int(gpi0_vals[i] > 0) + + energy_acc += cur * volt / self.raw_rate + sample_counter += 1 + + if self._last_gpi is not None and self._last_gpi == 0 and gpi == 1: + pending_event_in_stride = True + + self._last_gpi = gpi + + if sample_counter >= self.emit_stride or (pending_event_in_stride and sample_counter > 0): + self.m._data_queue.put(float(energy_acc)) + if pending_event_in_stride: + self.m._data_queue.put(f"event {event_counter:02} ris") + event_counter += 1 + energy_acc = 0.0 + sample_counter = 0 + pending_event_in_stride = False + finally: + print("[JS220] Stream reading loop exited.") + + def power_on(self): + return True + + def power_off(self): + return True + + def start(self): + return True + + def stop(self): + return True + + def get_board_id(self): + return self._device.device_path() + + def get_version(self): + try: + return self._device.info().get("fw") + except Exception: + return None + + def get_status(self): + return None + + def set_lcd(self, *args): + return [None, None] + + + + +""" +################################################################################ +# Joulescope Commands # +################################################################################ +# Parameter: sensor_power +# Options: +# [0] ('off', 0, []) +# [1] ('on', 1, []) + +# Parameter: source +# Options: +# [0] ('off', 0, []) +# [1] ('raw', 192, ['on']) +# [2] ('pattern_usb', 9, []) +# [3] ('pattern_control', 10, []) +# [4] ('pattern_sensor', 175, []) + +# Parameter: i_range +# Options: +# [0] ('auto', 128, ['on']) +# [1] ('10 A', 1, ['0', 0]) +# [2] ('2 A', 2, ['1', 1]) +# [3] ('180 mA', 4, ['2', 2]) +# [4] ('18 mA', 8, ['3', 3]) +# [5] ('1.8 mA', 16, ['4', 4]) +# [6] ('180 µA', 32, ['5', 5]) +# [7] ('18 µA', 64, ['6', 6]) +# [8] ('off', 0, []) + +# Parameter: v_range +# Options: +# [0] ('15V', 0, ['low', 0]) +# [1] ('5V', 1, ['high', 1]) + +# Parameter: ovr_to_lsb +# Options: +# [0] ('off', 0, []) +# [1] ('on', 1, []) + +# Parameter: trigger_source +# Options: +# [0] ('auto', 0, []) +# [1] ('gpi0', 2, []) +# [2] ('gpi1', 3, []) + +# Parameter: io_voltage +# Options: +# [0] ('1.8V', 1800, []) +# [1] ('2.1V', 2100, []) +# [2] ('2.5V', 2500, []) +# [3] ('2.7V', 2700, []) +# [4] ('3.0V', 3000, []) +# [5] ('3.3V', 3300, []) +# [6] ('3.6V', 3600, []) +# [7] ('5.0V', 5000, []) + +# Parameter: gpo0 +# Options: +# [0] ('0', 0, [0]) +# [1] ('1', 1, [1]) + +# Parameter: gpo1 +# Options: +# [0] ('0', 0, [0]) +# [1] ('1', 1, [1]) + +# Parameter: current_lsb +# Options: +# [0] ('normal', 0, []) +# [1] ('gpi0', 2, []) +# [2] ('gpi1', 3, []) + +# Parameter: voltage_lsb +# Options: +# [0] ('normal', 0, []) +# [1] ('gpi0', 2, []) +# [2] ('gpi1', 3, []) + +# Parameter: control_test_mode +# Options: +# [0] ('normal', 3, []) +# [1] ('usb', 129, []) +# [2] ('fpga', 130, []) +# [3] ('both', 131, []) + +# Parameter: transfer_length +# Options: +# [0] ('1', 1, []) +# [1] ('2', 2, []) +# [2] ('4', 4, []) +# [3] ('8', 8, []) +# [4] ('16', 16, []) +# [5] ('32', 32, []) +# [6] ('64', 64, []) +# [7] ('128', 128, []) +# [8] ('256', 256, []) + +# Parameter: transfer_outstanding +# Options: +# [0] ('1', 1, []) +# [1] ('2', 2, []) +# [2] ('4', 4, []) +# [3] ('8', 8, []) + +# Parameter: current_ranging +# Current Value: interp_1_n_1 + +# Parameter: current_ranging_type +# Options: +# [0] ('off', 'off', []) +# [1] ('mean', 'mean', []) +# [2] ('interp', 'interp', ['interpolate']) +# [3] ('NaN', 'nan', ['nan']) + +# Parameter: current_ranging_samples_pre +# Options: +# [0] ('0', 0, [0]) +# [1] ('1', 1, [1]) +# [2] ('2', 2, [2]) +# [3] ('3', 3, [3]) +# [4] ('4', 4, [4]) +# [5] ('5', 5, [5]) +# [6] ('6', 6, [6]) +# [7] ('7', 7, [7]) +# [8] ('8', 8, [8]) + +# Parameter: current_ranging_samples_window +# Options: +# [0] ('m', 'm', []) +# [1] ('n', 'n', []) +# [2] ('0', 0, [0]) +# [3] ('1', 1, [1]) +# [4] ('2', 2, [2]) +# [5] ('3', 3, [3]) +# [6] ('4', 4, [4]) +# [7] ('5', 5, [5]) +# [8] ('6', 6, [6]) +# [9] ('7', 7, [7]) +# [10] ('8', 8, [8]) +# [11] ('9', 9, [9]) +# [12] ('10', 10, [10]) +# [13] ('11', 11, [11]) +# [14] ('12', 12, [12]) + +# Parameter: current_ranging_samples_post +# Options: +# [0] ('0', 0, [0]) +# [1] ('1', 1, [1]) +# [2] ('2', 2, [2]) +# [3] ('3', 3, [3]) +# [4] ('4', 4, [4]) +# [5] ('5', 5, [5]) +# [6] ('6', 6, [6]) +# [7] ('7', 7, [7]) +# [8] ('8', 8, [8]) + +# Parameter: buffer_duration +# Options: +# [0] ('15 seconds', 15, [15]) +# [1] ('30 seconds', 30, [30]) +# [2] ('1 minute', 60, [60]) +# [3] ('2 minutes', 120, [120]) +# [4] ('5 minutes', 300, [300]) +# [5] ('10 minutes', 600, [600]) +# [6] ('20 minutes', 1200, [1200]) +# [7] ('1 hour', 3600, [3600]) +# [8] ('2 hours', 7200, [7200]) +# [9] ('5 hours', 18000, [18000]) +# [10] ('10 hours', 36000, [36000]) +# [11] ('1 day', 86400, [86400]) + +# Parameter: reduction_frequency +# Options: +# [0] ('100 Hz', 100, [100]) +# [1] ('50 Hz', 50, [50]) +# [2] ('20 Hz', 20, [20]) +# [3] ('10 Hz', 10, [10]) +# [4] ('5 Hz', 5, [5]) +# [5] ('2 Hz', 2, [2]) +# [6] ('1 Hz', 1, [1]) + +# Parameter: sampling_frequency +# Options: +# [0] ('2 MHz', 2000000, [2000000, 'auto', None, 'default']) +# [1] ('1 MHz', 1000000, [1000000]) +# [2] ('500 kHz', 500000, [500000]) +# [3] ('200 kHz', 200000, [200000]) +# [4] ('100 kHz', 100000, [100000]) +# [5] ('50 kHz', 50000, [50000]) +# [6] ('20 kHz', 20000, [20000]) +# [7] ('10 kHz', 10000, [10000]) +# [8] ('5 kHz', 5000, [5000]) +# [9] ('2 kHz', 2000, [2000]) +# [10] ('1 kHz', 1000, [1000]) +# [11] ('500 Hz', 500, [500]) +# [12] ('200 Hz', 200, [200]) +# [13] ('100 Hz', 100, [100]) +# [14] ('50 Hz', 50, [50]) +# [15] ('20 Hz', 20, [20]) +# [16] ('10 Hz', 10, [10]) """ \ No newline at end of file diff --git a/benchmark/runner/streaming_ww_utils.py b/benchmark/runner/streaming_ww_utils.py index fac3543f..2700049b 100644 --- a/benchmark/runner/streaming_ww_utils.py +++ b/benchmark/runner/streaming_ww_utils.py @@ -46,7 +46,8 @@ def array_from_strings(raw_info, header_str, end_str='m-ready', data_type=None): except ValueError: # Just replace an invalid value with the previous value to avoid corrupting the timing print(f"WARNING: Invalid element '{val}' for conversion to {converter} at element {len(number_lists)}. Replacing with previous value.") - number_lists.append(number_lists[-1]) + if len(number_lists) > 0: + number_lists.append(number_lists[-1]) if len(number_lists) == 1: number_lists = number_lists[0] # only 1 array, don't make it 2D @@ -87,7 +88,8 @@ def process_dutycycle(raw_result): # end of start-times, beginning of stop-times. target_list = proc_stop_times continue - if line.find('m-ready') > 0: + + if line.find('m-ready') >= 0: line = line.replace('m-ready', '') endstr_found = True @@ -95,11 +97,17 @@ def process_dutycycle(raw_result): proc_start_times = np.array(proc_start_times)*10e-6 proc_stop_times = np.array(proc_stop_times)*10e-6 - + if len(proc_stop_times) != len(proc_start_times): err_str = f"Number of start times ({len(proc_start_times)}) and number of " err_str += f"stop times ({len(proc_stop_times)}) should be equal" - raise RuntimeError(err_str) + # raise RuntimeError(err_str) + print(f"Warning: {err_str}") + num_pulses = min(len(proc_stop_times), len(proc_start_times)) + proc_start_times = proc_start_times[:num_pulses] + proc_stop_times = proc_stop_times[:num_pulses] + + on_times = proc_stop_times - proc_start_times periods = np.diff(proc_start_times) periods_fractional_var = (np.max(periods) - np.min(periods))/np.mean(periods) @@ -186,13 +194,14 @@ def summarize_sww_result(results_list, power, results_file=None): inf_res["detections"], inf_res["detection_windows"]) print(f"== File {inf_res['wav_file']} ({inf_res['length_sec']:2.1f} s) == ") with np.printoptions(precision=3): - print_tee(f" True positives: {true_pos_sec}", outfile=results_file) - print_tee(f" False negatives: {false_neg_sec}", outfile=results_file) - print_tee(f" False positives: {false_pos_sec}", outfile=results_file) - print_tee(f"{len(true_pos_sec)} True positives, {len(false_neg_sec)} False negatives, {len(false_pos_sec)} False positives", outfile=results_file) - + print_tee(f"Accuracy: {len(true_pos_sec)} True positives, {len(false_neg_sec)} False negatives, {len(false_pos_sec)} False positives", outfile=results_file) + print(f" False negatives: {false_neg_sec}") # these are useful for debugging but are not needed in results.txt + print(f" False positives: {false_pos_sec}") + if 'dutycycle' in res: - print_tee(f" Average duty cycle: {res['dutycycle'].get('duty_cycle'):1.5}") - print_tee(f" Average period: {res['dutycycle'].get('period'):1.5} s") + throughput = 1.0/res['dutycycle']['processing_time'] + print_tee(f"Average duty cycle: {res['dutycycle'].get('duty_cycle'):1.5}", outfile=results_file) + print_tee(f"Average period: {res['dutycycle'].get('period'):1.5} s", outfile=results_file) + print_tee(f"Estimated throughput: {throughput:1.5} inf./sec.", outfile=results_file) else: print_tee(f"No duty cycle data recorded") \ No newline at end of file diff --git a/benchmark/runner/tests_performance.yaml b/benchmark/runner/tests_performance.yaml index cd66948f..4dd23264 100644 --- a/benchmark/runner/tests_performance.yaml +++ b/benchmark/runner/tests_performance.yaml @@ -3,7 +3,7 @@ ad01: model: ad01 truth_file: y_labels.csv script: - - loop 10: + - loop 5: - download - infer 1500 10 ic01: @@ -11,7 +11,7 @@ ic01: model: ic01 truth_file: y_labels.csv script: - - loop 10: + - loop 5: - download - infer 20 5 kws01: @@ -19,7 +19,7 @@ kws01: model: kws01 truth_file: y_labels.csv script: - - loop 10: + - loop 5: - download - infer 70 10 vww01: @@ -27,12 +27,12 @@ vww01: model: vww01 truth_file: y_labels.csv script: - - loop 10: + - loop 5: - download - infer 20 5 sww01: name: streaming_wakeword model: sww01 - truth_file: sww_short_test.json + truth_file: sww_long_test.json script: - stream \ No newline at end of file diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 0ee5e9f0..4b612a82 100644 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -4,6 +4,9 @@ import os import re import sys +import pandas as pd +# can't import csv because we have a variable named csv, so ... +from csv import QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE logging.basicConfig(level=logging.INFO) log = logging.getLogger("main") @@ -11,7 +14,7 @@ MODEL_CONFIG = { "v1.0": { - "models": ["ad", "ic", "kws", "vww"], + "models": ["ad", "ic", "kws", "vww"], "required-scenarios": { # anything goes }, @@ -30,6 +33,7 @@ "image_classification": "ic", "keyword_spotting": "kws", "visual_wake_words": "vww", + "streaming_wakeword_detection": "sww", }, }, "v1.1": { @@ -65,27 +69,67 @@ }, "model_mapping": { }, + }, + "v1.3": { + "models": ["ad", "ic", "kws", "vww", "sww"], + "required-scenarios": { + # anything goes + }, + "optional-scenarios": { + # anything goes + }, + "accuracy-target": { + "ad": ("auc", 0.85), + "ic": ("top-1", 85), + "kws": ("top-1", 90), + "vww": ("top-1", 80), + "sww": ("fps_fns", (8,8)), + }, + "model_mapping": { + }, + "required_tests": { + "ad": ["accuracy", "performance"], + "ic": ["accuracy", "performance"], + "kws": ["accuracy", "performance"], + "vww": ["accuracy", "performance"], + "sww": [] + }, + "optional_tests": { + "ad": ["energy"], + "ic": ["energy"], + "kws": ["energy"], + "vww": ["energy"], + "sww": ["energy", "performance"] + }, + "required_files": ["log.txt", "results.json", "results.txt"] } } VALID_DIVISIONS = ["open", "closed"] VALID_AVAILABILITIES = ["available", "preview", "rdi"] -REQUIRED_ACC_FILES = [ +EEMBC_REQUIRED_ACC_FILES = [ "log.txt", "results.txt", "script.async", ] +MLC_REQUIRED_ACC_FILES = [ + "log.txt", "results.txt", + "results.json", +] ACC_FILE = "results.txt" ACC_PATTERN = { "top-1": - r".* Top-1: ([\d\.]+).*", + r".* Top[- ]1%?\s?[:=] ([\d\.]+).*", # match "Top-1: 91.1%" (old) or "Top 1% = 85.4" (new) "auc": - r".* AUC: ([\d\.]+).*", + r".* AUC\s?[:=] ([\d\.]+).*", # match "AUC: 0.93" (old) or "AUC = 0.862" (new) } FILE_SIZE_LIMIT_MB = 500 MB_TO_BYTES = 1024*1024 -REQUIRED_PERF_FILES = REQUIRED_ACC_FILES -OPTIONAL_PERF_FILES = [""] -REQUIRED_PERF_POWER_FILES = REQUIRED_ACC_FILES +EEMBC_REQUIRED_PERF_FILES = EEMBC_REQUIRED_ACC_FILES +EEMBC_REQUIRED_PERF_POWER_FILES = EEMBC_REQUIRED_ACC_FILES +MLC_REQUIRED_PERF_FILES = MLC_REQUIRED_ACC_FILES +MLC_REQUIRED_PERF_POWER_FILES = MLC_REQUIRED_ACC_FILES + +OPTIONAL_PERF_FILES = [""] def list_dir(*path): path = os.path.join(*path) @@ -115,6 +159,64 @@ def list_files_recursively(*path): def split_path(m): return m.replace("\\", "/").split("/") +def compare_versions(ver_a, ver_b): + """ + compare versions ver_a and ver_b + if ver_a < ver_b => return -1 + if ver_a == ver_b => return 0 + if ver_a > ver_b => return +1 + Versions should be strings that look like + "vA.B.C..." or "A.B.C" + The leading "v" is optional and meaningless: "A.B.C" == "vA.B.C" + Numbers earlier in the string take precedence. "v1.2" > "v0.9" + Any subversion at a given level (even 0) is greater than nothing: "v1.2.0" > "v1.2" +""" + ## These should all pass + # assert compare_versions("v1.2.5", "v1.2") == +1 + # assert compare_versions("v1.2.5", "v1.2.6") == -1 + # assert compare_versions("v1.2.5", "v1.2.10") == -1 + # assert compare_versions("1.2.5.55", "v1.2.10") == -1 + # assert compare_versions("1.3", "1.2.10") == +1 + # assert compare_versions("72.23.99", "v72.23.99") == 0 + + parts_a = ver_a.lstrip("v").split('.') + parts_b = ver_b.lstrip("v").split('.') + num_sub_vers = max(len(parts_a), len(parts_b)) + for i in range(num_sub_vers): + + if len(parts_a) > i: + try: + sub_ver_a = int(parts_a[i]) + except: + raise ValueError(f"Could not convert subfield {parts_a[i]} of version {ver_a}") + else: + sub_ver_a = None + if len(parts_b) > i: + try: + sub_ver_b = int(parts_b[i]) + except: + raise ValueError(f"Could not convert subfield {parts_b[i]} of version {ver_b}") + else: + sub_ver_b = None + + # print(f"Step {i}: Comparing A: {sub_ver_a} to B: {sub_ver_b}") + if sub_ver_a and sub_ver_b is None: + return +1 + elif sub_ver_b and sub_ver_a is None: + return -1 + elif sub_ver_a is None and sub_ver_b is None: + return 0 # should not reach this line + + if sub_ver_a > sub_ver_b: + return +1 + elif sub_ver_a < sub_ver_b: + return -1 + # we made it all the way through the version string without breaking the comparison, + # so they should be equivalent + return 0 + + + class Config(): """Select config value by mlperf version and submission type.""" @@ -139,7 +241,7 @@ def __init__(self, self.more_power_check = more_power_check def set_type(self, submission_type): - if submission_type is None and self.version in ["v1.0", "v1.1", "v1.2"]: + if submission_type is None and self.version in ["v1.0", "v1.1", "v1.2", "v1.3"]: self.required = self.base["required-scenarios"] self.optional = self.base["optional-scenarios"] else: @@ -226,7 +328,7 @@ def get_args(): parser.add_argument("--input", required=True, help="submission directory") parser.add_argument( "--version", - default="v1.2", + default="v1.3", choices=list(MODEL_CONFIG.keys()), help="mlperf version") parser.add_argument("--submitter", help="filter to submitter") @@ -257,7 +359,7 @@ def get_args(): def check_results_dir(config, filter_submitter, skip_compliance, - csv, + df_results, debug=False): """ Walk the results directory and do the checking. @@ -280,6 +382,7 @@ def check_results_dir(config, if there are errors write a None as result so we can report later what failed """ + head = [ "Organization", "Availability", "Division", "BoardName", "SystemDesc", "Model", "MlperfModel", @@ -290,8 +393,16 @@ def check_results_dir(config, "HardwareNotes", "InferenceFramework", "SoftwareLibraries", "SoftwareNotes", ] - fmt = ",".join(["\"{}\""] * len(head)) + "\n" - csv.write(",".join(head) + "\n") + # Add each column in 'head', appending to the right side + for col in head: + df_results.insert(len(df_results.columns), col, None) + + if compare_versions(config.version, "v1.3") >= 0: # version <+ 1.3 + df_results.insert(len(df_results.columns), "FalsePositives", None) + df_results.insert(len(df_results.columns), "FalseNegatives", None) + df_results.insert(len(df_results.columns), "DutyCycle", None) + + results = {} def log_result(submitter, @@ -310,14 +421,16 @@ def log_result(submitter, errors, config, inferred=0, - power_metric=0): - + power_metric=0, + results_dict={}): + notes = system_json.get("hw_notes", "") if system_json.get("sw_notes"): notes = notes + ". " if notes else "" notes = notes + system_json.get("sw_notes") unit_dict = { "": "inf./sec.", + "streaming": "inf./sec.", "SingleStream": "Latency (ms)", "MultiStream": "Latency (ms)", "Offline": "Samples/s", @@ -325,6 +438,7 @@ def log_result(submitter, } power_unit_dict = { "": "uJ/inf.", + "streaming": "mW", "SingleStream": "millijoules", "MultiStream": "millijoules", "Offline": "Watts", @@ -333,48 +447,36 @@ def log_result(submitter, unit = unit_dict[scenario_fixed] power_unit = power_unit_dict[scenario_fixed] - csv.write( - fmt.format(submitter, - available, - division, - system_json.get("Board Name"), - system_desc, - model_name, - mlperf_model, - r, - unit, - acc, - power_metric > 0, - power_metric, - power_unit, - system_json.get("Processor(s) Name"), # HostProcessorModelName - system_json.get("Processor(s) Frequencies"), - system_json.get("Processor memory type and capacity"), - system_json.get("Accelerator"), - system_json.get("Accelerator(s) Frequencies"), - system_json.get("Accelerator memory type and capacity"), - system_json.get("Hardware Notes"), - system_json.get("Inference Framework"), - system_json.get("Software Libraries"), - system_json.get("Software Notes"), - )) - - # if power_metric > 0: - # csv.write( - # fmt.format(submitter, available, division, '\"' + system_type + '\"', - # '\"' + system_name + '\"', system_desc, model_name, - # mlperf_model, scenario_fixed, power_metric, acc, - # system_json.get("number_of_nodes"), - # '"' + system_json.get("host_processor_model_name") + '"', - # system_json.get("host_processors_per_node"), - # system_json.get("host_processor_core_count"), - # '"' + system_json.get("accelerator_model_name") + '"', - # '"' + str(system_json.get("accelerators_per_node")) + '"', - # name.replace("\\", "/"), - # '"' + system_json.get("framework", "") + '"', - # '"' + system_json.get("operating_system", "") + '"', - # '"' + notes + '"', compliance, errors, config.version, - # inferred, power_metric > 0, power_unit)) + df_results.loc[len(df_results)] = { + "Organization": submitter, + "Availability": available, + "Division": division, + "BoardName": system_json.get("Board Name"), + "SystemDesc": system_desc, + "Model": model_name, + "MlperfModel": mlperf_model, + "Result": r, + "ResultUnit": unit, + "Accuracy": acc, + "HasPower": power_metric > 0, + "Power": power_metric, + "PowerUnit": power_unit, + "HostProcessorModelName": system_json.get("Processor(s) Name"), # HostProcessorModelName + "HostProcessorFrequency": system_json.get("Processor(s) Frequencies"), + "HostProcessorMemory": system_json.get("Processor memory type and capacity"), + "AcceleratorModelName": system_json.get("Accelerator"), + "AcceleratorFrequency": system_json.get("Accelerator(s) Frequencies"), + "AcceleratorMemory": system_json.get("Accelerator memory type and capacity"), + "HardwareNotes": system_json.get("Hardware Notes"), + "InferenceFramework": system_json.get("Inference Framework"), + "SoftwareLibraries": system_json.get("Software Libraries"), + "SoftwareNotes": system_json.get("Software Notes"), + "FalsePositives": results_dict.get("fp"), + "FalseNegatives": results_dict.get("fn"), + "DutyCycle": results_dict.get("duty_cycle") + } + print() + ## end of log_result() # we are at the top of the submission directory for division in list_dir("."): @@ -506,20 +608,7 @@ def log_result(submitter, results[name] = None continue system_type = system_json.get("system_type") - # if config.version not in ["v0.5"]: - # valid_system_types = ["datacenter", "edge"] - # if config.version not in ["v0.7"]: - # valid_system_types += ["datacenter,edge", "edge,datacenter"] - # if system_type not in valid_system_types: - # log.error("%s has invalid system type (%s)", system_id_json, - # system_type) - # results[name] = None - # continue config.set_type(system_type) - # if not check_system_desc_id(name, system_json, submitter, division, - # config.version): - # results[name] = None - # continue # # Look at each model @@ -529,6 +618,16 @@ def log_result(submitter, # we are looking at ./$division/$submitter/results/$system_desc/$model, # ie ./closed/mlperf_org/results/t4-ort/bert name = os.path.join(results_path, system_desc, model_name) + if os.path.exists(os.path.join(name, "EEMBC_RUNNER")): + runner_type = "EEMBC_RUNNER" + REQUIRED_ACC_FILES = EEMBC_REQUIRED_ACC_FILES + REQUIRED_PERF_FILES = EEMBC_REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES = EEMBC_REQUIRED_PERF_POWER_FILES + else: + runner_type = "MLC_RUNNER" + REQUIRED_ACC_FILES = MLC_REQUIRED_ACC_FILES + REQUIRED_PERF_FILES = MLC_REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES = MLC_REQUIRED_PERF_POWER_FILES mlperf_model = config.get_mlperf_model(model_name, extra_model_mapping) if is_closed_or_network and mlperf_model not in config.models: @@ -543,53 +642,21 @@ def log_result(submitter, # Look at each scenario # required_scenarios = config.get_required(mlperf_model) - # if required_scenarios is None: - # log.error("%s has an invalid model %s, system_type=%s", name, - # mlperf_model, system_type) - # results[name] = None - # continue errors = 0 # all_scenarios = set( # # list(required_scenarios) + # list(config.get_optional(mlperf_model))) - for scenario in [""]: + scenario_list = ["streaming"] if mlperf_model == "sww" else [""] + for scenario in scenario_list: scenario_fixed = scenario - # for scenario in list_dir(results_path, system_desc, model_name): - # # some submissions in v0.5 use lower case scenarios - map them for now - # scenario_fixed = SCENARIO_MAPPING.get(scenario, scenario) - - # we are looking at ./$division/$submitter/results/$system_desc/$model/$scenario, - # ie ./closed/mlperf_org/results/t4-ort/bert/Offline - # name = os.path.join(results_path, system_desc, model_name, scenario) - # results[name] = None - # if is_closed_or_network and scenario_fixed not in all_scenarios: - # log.warning( - # "%s ignoring scenario %s (neither required nor optional)", - # name, scenario) - # continue - - # check if measurement_dir is good. - # measurement_dir = os.path.join(division, submitter, "measurements", - # system_desc, model_name, scenario) - # if not os.path.exists(measurement_dir): - # log.error("no measurement_dir for %s", measurement_dir) - # results[measurement_dir] = None - # errors += 1 - # else: - # if not check_measurement_dir(measurement_dir, name, system_desc, - # os.path.join(division, submitter), - # model_name, scenario): - # log.error("%s measurement_dir has issues", measurement_dir) - # # results[measurement_dir] = None - # errors += 1 - # # FIXME: we should not accept this submission - # # continue - # check accuracy accuracy_is_valid = False acc_path = os.path.join(name, "accuracy") - if not os.path.exists(os.path.join(acc_path, ACC_FILE)): + + if "accuracy" not in config.base["required_tests"][model_name]: + pass # accuracy run is not required for this benchmark + elif not os.path.exists(os.path.join(acc_path, ACC_FILE)): log.error( "%s has no results.txt.", acc_path) else: @@ -622,39 +689,85 @@ def log_result(submitter, log.info("Detected power logs for %s", name) for i in n: + results_dict = {} perf_path = os.path.join(name, "performance", i) - if not os.path.exists(perf_path): - log.error("%s is missing", perf_path) - continue + has_performance = os.path.exists(perf_path) + requires_performance = "performance" in config.base["required_tests"][model_name] + allowed_tests = set.union( + set(config.base["required_tests"][model_name]), + set(config.base["optional_tests"][model_name]) + ) + if has_power: required_perf_files = REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES else: required_perf_files = REQUIRED_PERF_FILES - diff = files_diff( - list_files(perf_path), required_perf_files, - OPTIONAL_PERF_FILES) - if diff: - log.error("%s has file list mismatch (%s)", perf_path, diff) - - try: - is_valid, r, is_inferred = check_performance_dir( - config, mlperf_model, perf_path, scenario_fixed, division, - system_json) - if is_inferred: - inferred = 1 - log.info("%s has inferred results, qps=%s", perf_path, r) - except Exception as e: - log.error("%s caused exception in check_performance_dir: %s", - perf_path, e) - is_valid, r = False, None + if requires_performance and not has_performance: + log.error("%s is missing", perf_path) + continue + elif model_name == "sww": + if has_power: + sww_results_path = power_path + extra_required_files = ["energy_inf_000.png"] + else: + sww_results_path = perf_path + extra_required_files = [] + diff = files_diff(list_files(sww_results_path), + required_perf_files + extra_required_files, + OPTIONAL_PERF_FILES + ) + if diff: + log.error("%s has file list mismatch (%s)", sww_results_path, diff) + + try: + is_valid, results_dict = sww_perf_acc( + config, mlperf_model, sww_results_path) + r = results_dict['throughput'] + acc = results_dict['accuracy'] # this is the F1 score for sww + if results_dict['fp'] > 8 or results_dict['fn'] > 8: + if is_closed_or_network: + log.error(f"FP={results_dict['fp']} and FN={results_dict['fn']} should both be <8 in closed division SWW.") + accuracy_is_valid = False + else: + log.info(f"FP={results_dict['fp']} and FN={results_dict['fn']} exceed closed-division limits, but acceptable since this is an open division submission.") + accuracy_is_valid = True + else: + accuracy_is_valid = True + + except Exception as e: + log.error("%s caused exception in sww_perf_acc(): %s", + perf_path, str(e)) + is_valid, r = False, None + + elif has_performance: + diff = files_diff(list_files(perf_path), + required_perf_files, + OPTIONAL_PERF_FILES + ) + if diff: + log.error("%s has file list mismatch (%s)", perf_path, diff) + + try: + is_valid, r, is_inferred = check_performance_dir( + config, mlperf_model, perf_path, scenario_fixed, division, + system_json) + if is_inferred: + inferred = 1 + log.info("%s has inferred results, qps=%s", perf_path, r) + except Exception as e: + log.error("%s caused exception in check_performance_dir: %s", + perf_path, str(e)) + is_valid, r = False, None + else: + is_valid = False + log.warning("The script should never reach this point. This is an unaccounted for condition.") power_metric = 0 if has_power: try: ranging_path = os.path.join(name, "performance", "ranging") power_is_valid, power_metric = check_power_dir( - power_path, scenario_fixed, - config) + power_path, model_name) if not power_is_valid: is_valid = False power_metric = 0 @@ -711,7 +824,8 @@ def log_result(submitter, errors, config, inferred=inferred, - power_metric=power_metric) + power_metric=power_metric, + results_dict=results_dict) else: results[name] = None log.error("%s is OK but accuracy has issues", name) @@ -726,7 +840,7 @@ def log_result(submitter, log.warning("%s ignoring missing scenarios in open division (%s)", name, required_scenarios) - return results + return results, is_valid def main(): @@ -741,12 +855,14 @@ def main(): # compliance not yet supported args.skip_compliance = True + df_results = pd.DataFrame() with open(args.csv, "w") as csv: os.chdir(args.input) # check results directory - results = check_results_dir(config, args.submitter, args.skip_compliance, - csv, args.debug) - + results, is_valid = check_results_dir(config, args.submitter, args.skip_compliance, + df_results, args.debug) + df_results[:0].to_csv(csv, index=False, quoting=QUOTE_NONE) # just headers w/o "" + df_results.to_csv(csv, index=False, header=False, quoting=QUOTE_ALL, quotechar='"') # log results log.info("---") with_results = 0 @@ -763,7 +879,7 @@ def main(): log.info("---") log.info("Results=%d, NoResults=%d", with_results, len(results) - with_results) - if len(results) != with_results: + if len(results) != with_results or not is_valid: log.error("SUMMARY: submission has errors") return 1 else: @@ -795,6 +911,8 @@ def check_accuracy_dir(config, model, path, verbose): return is_valid, acc + + def check_performance_dir(config, model, path, scenario_fixed, division, system_json): is_valid = False @@ -804,22 +922,63 @@ def check_performance_dir(config, model, path, scenario_fixed, division, fname = os.path.join(path, "results.txt") with open(fname, "r") as f: for line in f: - m = re.match(r".* Median throughput is ([\d\.]+) inf\./sec\..*", line) + m = re.match(r".* Median throughput is\s+([\d\.]+) inf\./sec\..*", line) if m: is_valid = True res = m.group(1) return is_valid, float(res), inferred -def check_power_dir(power_path, scenario_fixed, - config): +def sww_perf_acc(config, model, path): + """ + Extract performance and accuracy data from the energy dir. Currently (July 2025) this + is really only for the streaming wakeword benchmark. + """ + is_valid = False + has_throughput = has_accuracy = has_duty_cyle = False + throughput=tp=fp=f1_acc=None + + fname = os.path.join(path, "results.txt") + with open(fname, "r") as f: + for line in f: + m = re.search(r"Estimated throughput:\s*([\d\.]+) inf\./sec\..*", line) + if m: + throughput = float(m.group(1)) + has_throughput=True + continue + m = re.search(r"Accuracy: ([\d]+) True positives, ([\d]+) False negatives, ([\d]+) False positives", line) + if m: + groups = m.groups() + if len(groups) == 3: + tp,fn,fp = (int(mm) for mm in groups) + f1_acc = 2*tp/(2*tp+fp+fn) + if tp+fn == 50: + has_accuracy = True + else: + log.error(f"True Positives ({tp}) + False Negatives ({fn}) != 50") + else: + log.warning(f"Accuracy line should have three integers: TP, FN, FP.\n{line}") + m = re.search(r"Average duty cycle:\s*([\d\.]+)", line) + if m: + duty_cycle = float(m.group(1)) + has_duty_cyle = True + is_valid = has_throughput and has_accuracy and has_duty_cyle + return is_valid, dict(throughput=throughput,tp=tp,fn=fn,fp=fp,accuracy=f1_acc, duty_cycle=duty_cycle) + + +def check_power_dir(power_path, model_name): is_valid = False res = None fname = os.path.join(power_path, "results.txt") + + if model_name == "sww": + pattern = r"Power\s+: ([\d\.]+) mW.*" + else: + pattern = r"Median energy cost is ([\d\.]+) uJ/inf" with open(fname, "r") as f: for line in f: - m = re.match(r".* Median energy cost is ([\d\.]+) uJ/inf\..*", line) + m = re.search(pattern, line) if m: is_valid = True res = m.group(1) @@ -833,4 +992,6 @@ def files_diff(list1, list2, optional=None): return set(list1).symmetric_difference(set(list2)) - set(optional) if __name__ == "__main__": - sys.exit(main()) + main_result = main() + print(f"function main() returned {main_result}") + # sys.exit(main_result)