Added the make tensor view

Alwaysproblem · Alwaysproblem · commit 07c722f3e45a · 2026-01-05T09:52:51.000Z
diff --git a/mlir/cuda-tile/Toy/mlir/LowerToCudaTile.cpp b/mlir/cuda-tile/Toy/mlir/LowerToCudaTile.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Support/TypeID.h"
 #include "toy/Dialect.h"
 #include "toy/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -27,6 +28,7 @@
 #include "cuda_tile/Dialect/CudaTile/IR/Dialect.h"
 #include "cuda_tile/Dialect/CudaTile/IR/Ops.h"
 
+#include <cstdint>
 #include <memory>
 #include <string>
 
@@ -60,7 +62,7 @@ mlir::cuda_tile::ModuleOp createCudaModuleOp(mlir::OpBuilder &builder,
   auto cudaTileModuleOp = mlir::cuda_tile::ModuleOp::create(
       builder, moduleOp.getLoc(), "cuda_tile_module");
 
-  LDBG() << "Created CudaTile Module: \n" << cudaTileModuleOp << "\n";
+  LDBG() << "Created CudaTile Module: \n" << cudaTileModuleOp;
   return cudaTileModuleOp;
 }
 
@@ -70,7 +72,7 @@ void ToyToCudaTileLoweringPass::runOnOperation() {
   // Here we would implement the actual lowering logic from Toy GPUFuncOp
   // to CudaTile operations. For now, we just log that the pass is running.
   // LDBG() << "Running Toy to CudaTile lowering on GPUFuncOp: " << moduleOp
-  //        << "\n";
+  //        ;
 
   mlir::OpBuilder builder(moduleOp.getContext());
   // 1. Create new cuda_tile.module Op in the last section.
@@ -86,28 +88,33 @@ void ToyToCudaTileLoweringPass::runOnOperation() {
         gfunOp->getAttrOfType<mlir::StringAttr>("sym_name").getValue();
     llvm::SmallVector<mlir::Type, 8> newArgTypes;
 
-    LDBG() << "Lowering GPU function: " << gfunc_name << "\n";
-    LDBG() << "Converting input type into cuda tile type" << "\n";
+    LDBG() << "Lowering GPU function: " << gfunc_name;
+    LDBG() << "Converting input type into cuda tile type";
+
+    llvm::SmallVector<llvm::ArrayRef<int64_t>, 4> inputShapes;
+    // llvm::SmallVector<llvm::ArrayRef<int64_t>, 4> resultShapes;
 
     for (mlir::Type t : gfunOp.getFunctionType().getInputs()) {
-      LDBG() << "Original arg type: " << t << "\n";
+      LDBG() << "Original arg type: " << t;
       auto tt = llvm::dyn_cast<mlir::TensorType>(t);
       auto elemType = tt.getElementType();
       auto ptrElem = mlir::cuda_tile::PointerType::get(elemType);
       auto newType = mlir::cuda_tile::TileType::get({}, ptrElem);
-      LDBG() << "The new arg type for cuda tile: " << newType << "\n";
+      LDBG() << "The new arg type for cuda tile: " << newType;
       newArgTypes.push_back(newType);
+      inputShapes.push_back(tt.getShape());
     }
 
-    LDBG() << "Converting result type into cuda tile type" << "\n";
+    LDBG() << "Converting result type into cuda tile type";
     for (mlir::Type t : gfunOp.getFunctionType().getResults()) {
-      LDBG() << "Original result type: " << t << "\n";
+      LDBG() << "Original result type: " << t;
       auto tt = llvm::dyn_cast<mlir::TensorType>(t);
       auto elemType = tt.getElementType();
       auto ptrElem = mlir::cuda_tile::PointerType::get(elemType);
       auto newType = mlir::cuda_tile::TileType::get({}, ptrElem);
-      LDBG() << "The new arg type for cuda tile: " << newType << "\n";
+      LDBG() << "The new arg type for cuda tile: " << newType;
       newArgTypes.push_back(newType);
+      inputShapes.push_back(tt.getShape());
     }
 
     auto newFnType = builder.getFunctionType(newArgTypes, {});
@@ -118,9 +125,35 @@ void ToyToCudaTileLoweringPass::runOnOperation() {
         /*arg_attrs=*/{}, /*res_attrs=*/{}, {});
     auto bb = cudaEntryOp.addEntryBlock();
     builder.setInsertionPointToStart(bb);
+    // 1. create a get_tile_block_id op
+    auto tileBlockId = mlir::cuda_tile::GetTileBlockIdOp::create(
+        builder, gfunOp->getLoc(),
+        {mlir::cuda_tile::TileType::get({}, builder.getI32Type()),
+         mlir::cuda_tile::TileType::get({}, builder.getI32Type()),
+         mlir::cuda_tile::TileType::get({}, builder.getI32Type())});
+    for (auto [idx, arg] : llvm::enumerate(bb->getArguments())) {
+      // 2. create a make_tensor_view op
+      auto resultType = builder.getI64ArrayAttr(inputShapes[idx]);
+      LDBG() << "Argument " << idx << " : " << arg << ", shape: " << resultType;
+      auto ptrElem = llvm::dyn_cast<mlir::cuda_tile::TileType>(arg.getType())
+                         .getElementType();
+      auto eleType = llvm::dyn_cast<mlir::cuda_tile::PointerType>(ptrElem)
+                         .getPointeeType();
+      mlir::cuda_tile::TensorViewType tensorViewType =
+          mlir::cuda_tile::TensorViewType::get(
+              builder.getContext(), eleType, inputShapes[idx],
+              /*strides=*/{inputShapes[idx].back(), 1});
+      // LDBG() << "Creating TensorViewType: " << tensorViewType;
+      auto make_tensor_view = mlir::cuda_tile::MakeTensorViewOp::create(
+          builder, gfunOp->getLoc(), tensorViewType, arg,
+          /*dynamicShape=*/mlir::ValueRange{},
+          /*dynamicStrides=*/mlir::ValueRange{});
+      // LDBG() << "Created MakeTensorViewOp: \n" << make_tensor_view  ;
+    }
+
     auto retOp = mlir::cuda_tile::ReturnOp::create(builder, gfunOp.getLoc());
 
-    LDBG() << "Created CudaTile Entry Op: \n" << cudaEntryOp << "\n";
+    LDBG() << "Created CudaTile Entry Op: \n" << cudaEntryOp;
   });
 }
 
diff --git a/mlir/cuda-tile/Toy/mlir/LowerToGpu.cpp b/mlir/cuda-tile/Toy/mlir/LowerToGpu.cpp
@@ -58,9 +58,7 @@ struct GpuOutlinePass
 
   llvm::StringRef getArgument() const override { return "toy-gpu-outline"; }
 
-  void initializeOptions(std::string grid) {
-    this->grid = grid;
-  }
+  void initializeOptions(std::string grid) { this->grid = grid; }
 
   void runOnOperation() override {
     auto func = getOperation();
@@ -124,11 +122,11 @@ struct GpuOutlinePass
     }
 
     for (const auto &gpuSubgraph : gpuSubgraphs) {
-      LDBG() << "----GPU subgraph----\n";
+      LDBG() << "----GPU subgraph----";
       for (const auto &op : gpuSubgraph) {
-        LDBG() << *op << "\n";
+        LDBG() << *op;
       }
-      LDBG() << "--------------------\n";
+      LDBG() << "--------------------";
     }
 
     llvm::SmallVector<std::string> outlinedFuncNames;
@@ -144,9 +142,9 @@ struct GpuOutlinePass
 
     for (const auto &[index, gpuSubgraph] : llvm::enumerate(gpuSubgraphs)) {
       if (!gpuSubgraph.empty()) {
-        LDBG() << "----GPU subgraph----\n";
+        LDBG() << "----GPU subgraph----";
         for (const auto &op : gpuSubgraph) {
-          LDBG() << *op << "\n";
+          LDBG() << *op;
         }
 
         // Identify its operands.
@@ -162,9 +160,9 @@ struct GpuOutlinePass
           }
         }
 
-        LDBG() << "Operands:\n";
+        LDBG() << "Operands:";
         for (mlir::Value &operand : Operands) {
-          LDBG() << "  " << operand << "\n";
+          LDBG() << "  " << operand;
         }
 
         llvm::SmallVector<mlir::Value, 2> Results;
@@ -181,16 +179,16 @@ struct GpuOutlinePass
           }
         }
 
-        LDBG() << "Results:\n";
+        LDBG() << "Results:";
         for (mlir::Value &result : Results) {
-          LDBG() << "  " << result << "\n";
+          LDBG() << "  " << result;
         }
 
         if (Results.size() != 1) {
           llvm::errs()
               << "Currently only support single result GPU kernel "
               << "Since the toy return op only supports single return value "
-              << "Found " << Results.size() << " results\n";
+              << "Found " << Results.size() << " results";
           return signalPassFailure();
         }
 
@@ -244,7 +242,7 @@ struct GpuOutlinePass
           mlir::toy::ReturnOp::create(kernelBuilder, func.getLoc(),
                                       mappedResults);
 
-          LDBG() << "Created GPU kernel: " << gpuFunc << "\n";
+          LDBG() << "Created GPU kernel: " << gpuFunc;
         }
 
         outlinedFuncNames.push_back(outline_func_name);
@@ -269,9 +267,9 @@ struct GpuOutlinePass
 
           for (mlir::Operation *op : llvm::reverse(gpuSubgraph))
             op->erase();
-          LDBG() << "Inserted LaunchGpuOp: " << launch << "\n";
+          LDBG() << "Inserted LaunchGpuOp: " << launch;
         }
-        LDBG() << "--------------------\n";
+        LDBG() << "--------------------";
       }
     }
   };

Original file line number	Diff line number	Diff line change
`@@ -58,9 +58,7 @@ struct GpuOutlinePass`
`58`	`58`
`59`	`59`	`llvm::StringRef getArgument() const override { return "toy-gpu-outline"; }`
`60`	`60`
`61`		`- void initializeOptions(std::string grid) {`
`62`		`- this->grid = grid;`
`63`		`- }`
	`61`	`+ void initializeOptions(std::string grid) { this->grid = grid; }`
`64`	`62`
`65`	`63`	`void runOnOperation() override {`
`66`	`64`	`auto func = getOperation();`
`@@ -124,11 +122,11 @@ struct GpuOutlinePass`
`124`	`122`	`}`
`125`	`123`
`126`	`124`	`for (const auto &gpuSubgraph : gpuSubgraphs) {`
`127`		`- LDBG() << "----GPU subgraph----\n";`
	`125`	`+ LDBG() << "----GPU subgraph----";`
`128`	`126`	`for (const auto &op : gpuSubgraph) {`
`129`		`- LDBG() << *op << "\n";`
	`127`	`+ LDBG() << *op;`
`130`	`128`	`}`
`131`		`- LDBG() << "--------------------\n";`
	`129`	`+ LDBG() << "--------------------";`
`132`	`130`	`}`
`133`	`131`
`134`	`132`	`llvm::SmallVector<std::string> outlinedFuncNames;`
`@@ -144,9 +142,9 @@ struct GpuOutlinePass`
`144`	`142`
`145`	`143`	`for (const auto &[index, gpuSubgraph] : llvm::enumerate(gpuSubgraphs)) {`
`146`	`144`	`if (!gpuSubgraph.empty()) {`
`147`		`- LDBG() << "----GPU subgraph----\n";`
	`145`	`+ LDBG() << "----GPU subgraph----";`
`148`	`146`	`for (const auto &op : gpuSubgraph) {`
`149`		`- LDBG() << *op << "\n";`
	`147`	`+ LDBG() << *op;`
`150`	`148`	`}`
`151`	`149`
`152`	`150`	`// Identify its operands.`
`@@ -162,9 +160,9 @@ struct GpuOutlinePass`
`162`	`160`	`}`
`163`	`161`	`}`
`164`	`162`
`165`		`- LDBG() << "Operands:\n";`
	`163`	`+ LDBG() << "Operands:";`
`166`	`164`	`for (mlir::Value &operand : Operands) {`
`167`		`- LDBG() << " " << operand << "\n";`
	`165`	`+ LDBG() << " " << operand;`
`168`	`166`	`}`
`169`	`167`
`170`	`168`	`llvm::SmallVector<mlir::Value, 2> Results;`
`@@ -181,16 +179,16 @@ struct GpuOutlinePass`
`181`	`179`	`}`
`182`	`180`	`}`
`183`	`181`
`184`		`- LDBG() << "Results:\n";`
	`182`	`+ LDBG() << "Results:";`
`185`	`183`	`for (mlir::Value &result : Results) {`
`186`		`- LDBG() << " " << result << "\n";`
	`184`	`+ LDBG() << " " << result;`
`187`	`185`	`}`
`188`	`186`
`189`	`187`	`if (Results.size() != 1) {`
`190`	`188`	`llvm::errs()`
`191`	`189`	`<< "Currently only support single result GPU kernel "`
`192`	`190`	`<< "Since the toy return op only supports single return value "`
`193`		`- << "Found " << Results.size() << " results\n";`
	`191`	`+ << "Found " << Results.size() << " results";`
`194`	`192`	`return signalPassFailure();`
`195`	`193`	`}`
`196`	`194`
`@@ -244,7 +242,7 @@ struct GpuOutlinePass`
`244`	`242`	`mlir::toy::ReturnOp::create(kernelBuilder, func.getLoc(),`
`245`	`243`	`mappedResults);`
`246`	`244`
`247`		`- LDBG() << "Created GPU kernel: " << gpuFunc << "\n";`
	`245`	`+ LDBG() << "Created GPU kernel: " << gpuFunc;`
`248`	`246`	`}`
`249`	`247`
`250`	`248`	`outlinedFuncNames.push_back(outline_func_name);`
`@@ -269,9 +267,9 @@ struct GpuOutlinePass`
`269`	`267`
`270`	`268`	`for (mlir::Operation *op : llvm::reverse(gpuSubgraph))`
`271`	`269`	`op->erase();`
`272`		`- LDBG() << "Inserted LaunchGpuOp: " << launch << "\n";`
	`270`	`+ LDBG() << "Inserted LaunchGpuOp: " << launch;`
`273`	`271`	`}`
`274`		`- LDBG() << "--------------------\n";`
	`272`	`+ LDBG() << "--------------------";`
`275`	`273`	`}`
`276`	`274`	`}`
`277`	`275`	`};`