diff --git a/src/xtc/graphs/xtc/operators.py b/src/xtc/graphs/xtc/operators.py index ff2bd16d..ca9a9f4f 100644 --- a/src/xtc/graphs/xtc/operators.py +++ b/src/xtc/graphs/xtc/operators.py @@ -452,14 +452,20 @@ def __init__(self, **attrs: XTCOperatorAttr) -> None: if isinstance(padding, int): padding = {axes[0]: (padding, padding), axes[1]: (padding, padding)} else: - assert isinstance(padding, tuple), ( - f"padding for pad2d of wrong type, expect int or tuple: {padding}" + assert isinstance(padding, (tuple, dict)), ( + f"padding for pad2d of wrong type, expect int or tuple or dict: {padding}" ) if len(padding) == 1: padding = { axes[0]: (padding[0], padding[0]), axes[1]: (padding[0], padding[0]), } + elif isinstance(padding, dict) and len(padding) == 2: + padding = { + i: (pad, pad) if isinstance(pad, int) else pad + for i, pad in padding.items() + } + pass elif all(isinstance(pad, int) for pad in padding) and len(padding) == 2: padding = { axes[0]: (padding[0], padding[1]), diff --git a/tests/filecheck/backends/padding/test_pad2d_dict_matmul_unpad_mlir.py b/tests/filecheck/backends/padding/test_pad2d_dict_matmul_unpad_mlir.py new file mode 100644 index 00000000..90961dc8 --- /dev/null +++ b/tests/filecheck/backends/padding/test_pad2d_dict_matmul_unpad_mlir.py @@ -0,0 +1,239 @@ +# RUN: python %s 2>&1 | filecheck %s +# REQUIRES: module_mlir + +import xtc.graphs.xtc.op as O +from xtc.backends.mlir import Backend + +I, J, K, dtype = 14, 14, 14, "float32" +a = O.tensor((I, K), dtype, name="A") +b = O.tensor((K, J), dtype, name="B") + +with O.graph(name="pad_matmul_unpad") as gb: + p1 = O.pad2d(a, padding={-2: (0, 2), -1: (0, 2)}, name="A_pad") + p2 = O.pad2d(b, padding=(0, 2), axes=(-2, -1), name="B_pad") + m_pad = O.matmul(p1, p2, name="matmul_padded") + O.unpad(m_pad, padding={-2: (0, 2), -1: (0, 2)}, name="C") +graph = gb.graph +print(graph) + +impl = Backend(graph) +sch = impl.get_scheduler(default_node="matmul_padded") +sched = sch.schedule() + +comp = impl.get_compiler( + shared_lib=True, + dump_file="pad2d_dict_matmul_unpad_mlir", + print_source_ir=True, + print_transformed_ir=True, +) +module = comp.compile(sched) +executor = module.get_executor(validate=True) +res = executor.execute() +print(f"CODE: {res}") + +# CHECK: // -----// IR Dump Before transform //----- // +# CHECK-NEXT: module attributes {transform.with_named_sequence} { +# CHECK-NEXT: func.func @pad_matmul_unpad(%arg0: memref<14x14xf32> {llvm.noalias}, %arg1: memref<14x14xf32> {llvm.noalias}, %arg2: memref<14x14xf32> {llvm.noalias}) { +# CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%alloca : memref<16x16xf32>) +# CHECK-NEXT: %subview = memref.subview %alloca[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%arg0 : memref<14x14xf32>) outs(%subview : memref<14x14xf32, strided<[16, 1]>>) +# CHECK-NEXT: %alloca_0 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_1 : f32) outs(%alloca_0 : memref<16x16xf32>) +# CHECK-NEXT: %subview_2 = memref.subview %alloca_0[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%arg1 : memref<14x14xf32>) outs(%subview_2 : memref<14x14xf32, strided<[16, 1]>>) +# CHECK-NEXT: %alloca_3 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_4 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_4 : f32) outs(%alloca_3 : memref<16x16xf32>) +# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%alloca, %alloca_0 : memref<16x16xf32>, memref<16x16xf32>) outs(%alloca_3 : memref<16x16xf32>) +# CHECK-NEXT: %subview_5 = memref.subview %alloca_3[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_5 : memref<14x14xf32, strided<[16, 1]>>) outs(%arg2 : memref<14x14xf32>) +# CHECK-NEXT: return +# CHECK-NEXT: } +# CHECK-NEXT: transform.named_sequence @_vecto(%arg0: !transform.any_op {transform.consumed}) { +# CHECK-NEXT: transform.structured.vectorize %arg0 : !transform.any_op +# CHECK-NEXT: transform.yield +# CHECK-NEXT: } +# CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./h" : !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./b" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./h" : !transform.any_op +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_B_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op +# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./b" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./h" : !transform.any_op +# CHECK-NEXT: %4 = transform.structured.match attributes {__xtc_id_matmul_padded_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %4 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_17 "./j" : !transform.any_op +# CHECK-NEXT: %5 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %5 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_19 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_21 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %tiled_linalg_op_20 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_23 "./k" : !transform.any_op +# CHECK-NEXT: %6 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %6 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_25 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_27 "./j" : !transform.any_op +# CHECK-NEXT: transform.yield +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: +# CHECK-NEXT: // -----// IR Dump After transform //----- // +# CHECK-NEXT: module attributes {transform.with_named_sequence} { +# CHECK-NEXT: func.func @pad_matmul_unpad(%arg0: memref<14x14xf32> {llvm.noalias}, %arg1: memref<14x14xf32> {llvm.noalias}, %arg2: memref<14x14xf32> {llvm.noalias}) { +# CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %c16 = arith.constant 16 : index +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c16 step %c1 { +# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index +# CHECK-NEXT: %c16_25 = arith.constant 16 : index +# CHECK-NEXT: %c1_26 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: } {"./h"} +# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: %subview = memref.subview %alloca[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c14 = arith.constant 14 : index +# CHECK-NEXT: %c1_1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c14 step %c1_1 { +# CHECK-NEXT: %subview_23 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_25 = arith.constant 0 : index +# CHECK-NEXT: %c14_26 = arith.constant 14 : index +# CHECK-NEXT: %c1_27 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: } {"./h"} +# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: %alloca_2 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_3 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: %c0_4 = arith.constant 0 : index +# CHECK-NEXT: %c16_5 = arith.constant 16 : index +# CHECK-NEXT: %c1_6 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_4 to %c16_5 step %c1_6 { +# CHECK-NEXT: %subview_23 = memref.subview %alloca_2[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index +# CHECK-NEXT: %c16_25 = arith.constant 16 : index +# CHECK-NEXT: %c1_26 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_3 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: } {"./h"} +# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: %subview_7 = memref.subview %alloca_2[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index +# CHECK-NEXT: %c14_9 = arith.constant 14 : index +# CHECK-NEXT: %c1_10 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c14_9 step %c1_10 { +# CHECK-NEXT: %subview_23 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %subview_7[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_25 = arith.constant 0 : index +# CHECK-NEXT: %c14_26 = arith.constant 14 : index +# CHECK-NEXT: %c1_27 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: } {"./h"} +# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: %alloca_11 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_12 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %c16_14 = arith.constant 16 : index +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_13 to %c16_14 step %c1_15 { +# CHECK-NEXT: %subview_23 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index +# CHECK-NEXT: %c16_25 = arith.constant 16 : index +# CHECK-NEXT: %c1_26 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_12 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: } {"./j"} +# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c16_17 = arith.constant 16 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_16 to %c16_17 step %c1_18 { +# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %alloca_2[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> +# CHECK-NEXT: %subview_25 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_26 = arith.constant 0 : index +# CHECK-NEXT: %c16_27 = arith.constant 16 : index +# CHECK-NEXT: %c1_28 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_26 to %c16_27 step %c1_28 { +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_32 = arith.constant 0 : index +# CHECK-NEXT: %c16_33 = arith.constant 16 : index +# CHECK-NEXT: %c1_34 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_32 to %c16_33 step %c1_34 { +# CHECK-NEXT: %subview_35 = memref.subview %subview_29[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_36 = memref.subview %subview_30[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_37 = memref.subview %subview_31[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_35, %subview_36 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_37 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: } {"./k"} +# CHECK-NEXT: } {"./j"} +# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %subview_19 = memref.subview %alloca_11[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_20 = arith.constant 0 : index +# CHECK-NEXT: %c14_21 = arith.constant 14 : index +# CHECK-NEXT: %c1_22 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_20 to %c14_21 step %c1_22 { +# CHECK-NEXT: %subview_23 = memref.subview %subview_19[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %c0_25 = arith.constant 0 : index +# CHECK-NEXT: %c14_26 = arith.constant 14 : index +# CHECK-NEXT: %c1_27 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[14, 1], offset: ?>>) +# CHECK-NEXT: } {"./j"} +# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: return +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: +# CHECK-NEXT: graph: +# CHECK-NEXT: name: pad_matmul_unpad +# CHECK-NEXT: inputs: +# CHECK-NEXT: - %0 : 14x14xfloat32 +# CHECK-NEXT: - %1 : 14x14xfloat32 +# CHECK-NEXT: outputs: +# CHECK-NEXT: - %5 : 14x14xfloat32 +# CHECK-NEXT: nodes: +# CHECK-NEXT: - %2: pad2d(%0, padding={-2: (0, 2), -1: (0, 2)}, constant_value=0) {name = 'A_pad'} : [14x14xfloat32] -> [16x16xfloat32] +# CHECK-NEXT: - %3: pad2d(%1, padding={-2: (0, 2), -1: (0, 2)}, constant_value=0) {name = 'B_pad'} : [14x14xfloat32] -> [16x16xfloat32] +# CHECK-NEXT: - %4: matmul(%2, %3) {name = 'matmul_padded'} : [16x16xfloat32, 16x16xfloat32] -> [16x16xfloat32] +# CHECK-NEXT: - %5: unpad(%4, padding={-2: (0, 2), -1: (0, 2)}) {name = 'C'} : [16x16xfloat32] -> [14x14xfloat32] +# CHECK-NEXT: +# CHECK-NEXT: CODE: 0