diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 2a67a86af5dce..9ce0eae0e9ae7 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -307,6 +307,7 @@ def run_onnxruntime( device, data_type, warm_up_repeat, + provider=provider, ) logger.info(result) results.append(result) diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index a6716c8df3bc2..d88cc62988f6f 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -338,48 +338,70 @@ def inference_ort_with_io_binding( device, data_type=numpy.longlong, warm_up_repeat=0, + provider=None, ): result = {} - # Bind inputs and outputs to onnxruntime session - io_binding = ort_session.io_binding() - # Bind inputs to device - for name in ort_inputs: - np_input = torch.from_numpy(ort_inputs[name]).to(device) - input_type = IO_BINDING_DATA_TYPE_MAP.get(str(ort_inputs[name].dtype), data_type) - io_binding.bind_input( - name, - np_input.device.type, - 0, - input_type, - np_input.shape, - np_input.data_ptr(), - ) - # Bind outputs buffers with the sizes needed if not allocated already - if len(output_buffers) == 0: - allocateOutputBuffers(output_buffers, output_buffer_max_sizes, device) - - for i, ort_output_name in enumerate(ort_output_names): - io_binding.bind_output( - ort_output_name, - output_buffers[i].device.type, - 0, - numpy.float32, - ort_outputs[i].shape, - output_buffers[i].data_ptr(), - ) + if provider == "migraphx": + input_tensors = {} + for name in ort_inputs: + input_tensors[name] = { + "tensor": torch.from_numpy(ort_inputs[name]).to(device), + "type": IO_BINDING_DATA_TYPE_MAP.get(str(ort_inputs[name].dtype), data_type), + } - timeit.repeat( - lambda: ort_session.run_with_iobinding(io_binding), - number=1, - repeat=warm_up_repeat, - ) # Dry run + def run_migraphx(): + iob = ort_session.io_binding() + for name, info in input_tensors.items(): + t = info["tensor"] + iob.bind_input(name, t.device.type, 0, info["type"], list(t.shape), t.data_ptr()) + for oname in ort_output_names: + iob.bind_output(oname, device) + ort_session.run_with_iobinding(iob) + + timeit.repeat(run_migraphx, number=1, repeat=warm_up_repeat) # Dry run + latency_list = timeit.repeat(run_migraphx, number=1, repeat=repeat_times) + else: + # Bind inputs and outputs to onnxruntime session + io_binding = ort_session.io_binding() + # Bind inputs to device + for name in ort_inputs: + np_input = torch.from_numpy(ort_inputs[name]).to(device) + input_type = IO_BINDING_DATA_TYPE_MAP.get(str(ort_inputs[name].dtype), data_type) + io_binding.bind_input( + name, + np_input.device.type, + 0, + input_type, + np_input.shape, + np_input.data_ptr(), + ) + # Bind outputs buffers with the sizes needed if not allocated already + if len(output_buffers) == 0: + allocateOutputBuffers(output_buffers, output_buffer_max_sizes, device) + + for i, ort_output_name in enumerate(ort_output_names): + io_binding.bind_output( + ort_output_name, + output_buffers[i].device.type, + 0, + numpy.float32, + ort_outputs[i].shape, + output_buffers[i].data_ptr(), + ) + + timeit.repeat( + lambda: ort_session.run_with_iobinding(io_binding), + number=1, + repeat=warm_up_repeat, + ) # Dry run + + latency_list = timeit.repeat( + lambda: ort_session.run_with_iobinding(io_binding), + number=1, + repeat=repeat_times, + ) - latency_list = timeit.repeat( - lambda: ort_session.run_with_iobinding(io_binding), - number=1, - repeat=repeat_times, - ) result.update(result_template) result.update({"io_binding": True}) result.update(get_latency_result(latency_list, batch_size))