Added base scheduler example

Alwaysproblem · Alwaysproblem · commit a6a56d57c180 · 2026-03-09T05:14:02.000Z
diff --git a/mlir/optimization/CMakeLists.txt b/mlir/optimization/CMakeLists.txt
@@ -35,4 +35,4 @@ include(AddLLVM)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fno-rtti")
 # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
 
-add_subdirectory(explore)
+add_subdirectory(scheduler)
diff --git a/mlir/optimization/scheduler/CMakeLists.txt b/mlir/optimization/scheduler/CMakeLists.txt
@@ -0,0 +1,45 @@
+# For a better template to copy, see examples/standalone
+include_directories(include)
+add_subdirectory(include)
+
+set(LLVM_LINK_COMPONENTS Core Support nativecodegen OrcJIT)
+
+# set(LLVM_TARGET_DEFINITIONS mlir/ToyCombine.td)
+# mlir_tablegen(ToyCombine.inc -gen-rewriters)
+# add_public_tablegen_target(ToyCh6CombineIncGen)
+
+add_executable(
+  lab-scheduler
+  lab-opt.cpp
+  lib/OpStatsPass.cpp
+  )
+
+# add_dependencies(lab-scheduler ToyCh6ShapeInferenceInterfaceIncGen
+#                  ToyCh6OpsIncGen ToyCh6CombineIncGen)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/include/)
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
+target_link_libraries(
+  lab-scheduler
+  PRIVATE ${dialect_libs}
+          ${conversion_libs}
+          ${extension_libs}
+          MLIRAnalysis
+          MLIRBuiltinToLLVMIRTranslation
+          MLIRCallInterfaces
+          MLIRCastInterfaces
+          MLIRExecutionEngine
+          MLIRIR
+          MLIRLLVMCommonConversion
+          MLIRLLVMDialect
+          MLIRLLVMToLLVMIRTranslation
+          MLIRMemRefDialect
+          MLIRParser
+          MLIRPass
+          MLIRSideEffectInterfaces
+          MLIRSupport
+          MLIRTargetLLVMIRExport
+          MLIRTransforms)
diff --git a/mlir/optimization/scheduler/include/CMakeLists.txt b/mlir/optimization/scheduler/include/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(lab)
diff --git a/mlir/optimization/scheduler/include/lab/CMakeLists.txt b/mlir/optimization/scheduler/include/lab/CMakeLists.txt
@@ -0,0 +1 @@
+
diff --git a/mlir/optimization/scheduler/include/lab/LabPasses.h b/mlir/optimization/scheduler/include/lab/LabPasses.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "mlir/Pass/Pass.h"
+#include <memory>
+
+namespace mlir {
+
+class Pass;
+
+std::unique_ptr<Pass> createLabOpStatsPass();
+std::unique_ptr<Pass> createLabBufferStatsPass();
+std::unique_ptr<Pass> createLabFusionFeasibilityPass();
+std::unique_ptr<Pass> createLabMatmulTilePass();
+std::unique_ptr<Pass> createLabPipelinePlanPass();
+
+} // namespace mlir
diff --git a/mlir/optimization/scheduler/lab-opt.cpp b/mlir/optimization/scheduler/lab-opt.cpp
@@ -0,0 +1,30 @@
+#include "lab/LabPasses.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/InitAllPasses.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "mlir/Tools/mlir-opt/MlirOptMain.h"
+
+int main(int argc, char **argv) {
+  mlir::DialectRegistry registry;
+  registry.insert<mlir::func::FuncDialect, mlir::linalg::LinalgDialect,
+                  mlir::arith::ArithDialect, mlir::tensor::TensorDialect,
+                  mlir::memref::MemRefDialect, mlir::scf::SCFDialect,
+                  mlir::affine::AffineDialect>();
+
+  mlir::registerAllPasses();
+  mlir::PassPipelineRegistration<>("lab-op-stats", "Lab Op Stats Pass",
+                                   [](mlir::OpPassManager &pm) {
+                                     pm.addPass(mlir::createLabOpStatsPass());
+                                   });
+
+  return mlir::asMainReturnCode(
+      mlir::MlirOptMain(argc, argv, "Lab optimizer\n", registry));
+}
diff --git a/mlir/optimization/scheduler/lib/OpStatsPass.cpp b/mlir/optimization/scheduler/lib/OpStatsPass.cpp
@@ -0,0 +1,88 @@
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+
+namespace {
+struct LabOpStatsPass
+    : public PassWrapper<LabOpStatsPass, OperationPass<func::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LabOpStatsPass)
+
+  void runOnOperation() override {
+    func::FuncOp func = getOperation();
+
+    func.walk([&](Operation *op) {
+      if (auto matmul = dyn_cast<linalg::MatmulOp>(op)) {
+        analyzeMatmul(matmul);
+      } else if (auto generic = dyn_cast<linalg::GenericOp>(op)) {
+        analyzeGeneric(generic);
+      }
+    });
+  }
+
+  static int64_t getElementBytes(Type t) {
+    if (auto ft = dyn_cast<FloatType>(t))
+      return ft.getWidth() / 8;
+    if (auto it = dyn_cast<IntegerType>(t))
+      return it.getWidth() / 8;
+    return 0;
+  }
+
+  void analyzeMatmul(linalg::MatmulOp op) {
+    auto aType =
+        dyn_cast<ShapedType>(op.getDpsInputOperand(0)->get().getType());
+    auto bType =
+        dyn_cast<ShapedType>(op.getDpsInputOperand(1)->get().getType());
+    auto cType = dyn_cast<ShapedType>(op.getDpsInitOperand(0)->get().getType());
+
+    if (!aType || !bType || !cType || !aType.hasStaticShape() ||
+        !bType.hasStaticShape() || !cType.hasStaticShape()) {
+      op.emitRemark() << "[lab-op-stats] dynamic shape matmul, skip";
+      return;
+    }
+
+    int64_t M = aType.getShape()[0];
+    int64_t K = aType.getShape()[1];
+    int64_t N = bType.getShape()[1];
+
+    int64_t elemBytes = getElementBytes(aType.getElementType());
+    if (elemBytes == 0) {
+      op.emitRemark() << "[lab-op-stats] unsupported element type";
+      return;
+    }
+
+    int64_t flops = 2 * M * N * K;
+    int64_t aBytes = aType.getNumElements() * elemBytes;
+    int64_t bBytes = bType.getNumElements() * elemBytes;
+    int64_t cBytes = cType.getNumElements() * elemBytes;
+    int64_t totalBytes = aBytes + bBytes + cBytes;
+
+    double intensity = totalBytes > 0 ? static_cast<double>(flops) /
+                                            static_cast<double>(totalBytes)
+                                      : 0.0;
+
+    op.emitRemark() << "[lab-op-stats] matmul "
+                    << "M=" << M << " N=" << N << " K=" << K
+                    << " flops=" << flops << " bytes=" << totalBytes
+                    << " intensity=" << intensity;
+  }
+
+  void analyzeGeneric(linalg::GenericOp op) {
+    unsigned numLoops = op.getNumLoops();
+    unsigned numParallel = op.getNumParallelLoops();
+    unsigned numReduction = numLoops - numParallel;
+
+    op.emitRemark() << "[lab-op-stats] generic "
+                    << "loops=" << numLoops << " parallel=" << numParallel
+                    << " reduction=" << numReduction;
+  }
+};
+} // namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createLabOpStatsPass() {
+  return std::make_unique<LabOpStatsPass>();
+}
+} // namespace mlir
diff --git a/mlir/optimization/scheduler/tests/conv_relu.mlir b/mlir/optimization/scheduler/tests/conv_relu.mlir
@@ -0,0 +1,27 @@
+module {
+  func.func @conv_relu(%input: tensor<1x16x32x32xf32>,
+                       %filter: tensor<32x16x3x3xf32>,
+                       %init: tensor<1x32x30x30xf32>) -> tensor<1x32x30x30xf32> {
+    %0 = linalg.conv_2d_nchw_fchw
+      ins(%input, %filter : tensor<1x16x32x32xf32>, tensor<32x16x3x3xf32>)
+      outs(%init : tensor<1x32x30x30xf32>) -> tensor<1x32x30x30xf32>
+
+    %cst = arith.constant 0.0 : f32
+    %1 = linalg.generic
+      {indexing_maps = [
+        affine_map<(n, c, h, w) -> (n, c, h, w)>,
+        affine_map<(n, c, h, w) -> ()>,
+        affine_map<(n, c, h, w) -> (n, c, h, w)>
+      ],
+      iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+      ins(%0, %cst : tensor<1x32x30x30xf32>, f32)
+      outs(%init : tensor<1x32x30x30xf32>) {
+      ^bb0(%x: f32, %zero: f32, %out: f32):
+        %cmp = arith.cmpf oge, %x, %zero : f32
+        %sel = arith.select %cmp, %x, %zero : f32
+        linalg.yield %sel : f32
+      } -> tensor<1x32x30x30xf32>
+
+    return %1 : tensor<1x32x30x30xf32>
+  }
+}
diff --git a/mlir/optimization/scheduler/tests/matmul.mlir b/mlir/optimization/scheduler/tests/matmul.mlir
@@ -0,0 +1,10 @@
+module {
+  func.func @matmul(%A: tensor<128x256xf32>,
+                    %B: tensor<256x128xf32>,
+                    %C: tensor<128x128xf32>) -> tensor<128x128xf32> {
+    %0 = linalg.matmul
+      ins(%A, %B : tensor<128x256xf32>, tensor<256x128xf32>)
+      outs(%C : tensor<128x128xf32>) -> tensor<128x128xf32>
+    return %0 : tensor<128x128xf32>
+  }
+}