Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -2406,6 +2406,10 @@ defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32<MAX_GR
def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;

def int_nvvm_read_ptx_sreg_total_smem_size : PTXReadSRegIntrinsicNB_r32;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: perhaps we should give these things names like "nvvm.read.prx.sreg.total_smem_size". The underscore is more consistent with PTX while the "." seems to imply that each word is a subfield.

def int_nvvm_read_ptx_sreg_aggr_smem_size : PTXReadSRegIntrinsicNB_r32;
def int_nvvm_read_ptx_sreg_dynamic_smem_size : PTXReadSRegIntrinsicNB_r32;

//
// SHUFFLE
//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -4761,6 +4761,14 @@ def INT_PTX_SREG_CLUSTER_NCTARANK:
int_nvvm_read_ptx_sreg_cluster_nctarank,
[hasSM<90>, hasPTX<78>]>;

def INT_PTX_SREG_TOTAL_SMEM_SIZE :
PTX_READ_SREG_R32<"total_smem_size", int_nvvm_read_ptx_sreg_total_smem_size>;
def INT_PTX_SREG_DYNAMIC_SMEM_SIZE :
PTX_READ_SREG_R32<"dynamic_smem_size", int_nvvm_read_ptx_sreg_dynamic_smem_size>;
def INT_PTX_SREG_AGGR_SMEM_SIZE :
PTX_READ_SREG_R32<"aggr_smem_size",
int_nvvm_read_ptx_sreg_aggr_smem_size,
[hasSM<90>, hasPTX<81>]>;

def SREG_LANEID : PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
def SREG_WARPID : PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/NVPTX/intrinsics-sm90-ptx81.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx81| FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-sm_90 && ptxas-isa-8.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx81| %ptxas-verify -arch=sm_90 %}

define i32 @test_aggr_smem_size() {
; CHECK-LABEL: test_aggr_smem_size(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.u32 %r1, %aggr_smem_size;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.aggr.smem.size()
ret i32 %a
}

declare i32 @llvm.nvvm.read.ptx.sreg.aggr.smem.size()
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/NVPTX/intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,32 @@ define i64 @test_steadycounter() {
ret i64 %ret
}

define i32 @test_total_smem_size() {
; CHECK-LABEL: test_total_smem_size(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.u32 %r1, %total_smem_size;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.total.smem.size()
ret i32 %a
}

define i32 @test_dynamic_smem_size() {
; CHECK-LABEL: test_dynamic_smem_size(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.u32 %r1, %dynamic_smem_size;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.dynamic.smem.size()
ret i32 %a
}

declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
Expand All @@ -335,3 +361,5 @@ declare void @llvm.nvvm.exit()
declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
declare i64 @llvm.readcyclecounter()
declare i64 @llvm.readsteadycounter()
declare i32 @llvm.nvvm.read.ptx.sreg.total.smem.size()
declare i32 @llvm.nvvm.read.ptx.sreg.dynamic.smem.size()
6 changes: 6 additions & 0 deletions mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ def NVVM_ClusterDimBlocksZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sre
def NVVM_ClusterId : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctarank", [NVVMRequiresSM<90>]>;
def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctarank">;

//===----------------------------------------------------------------------===//
// Various configurations of Shared memory sizes
def NVVM_TotalSmemSize : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.total.smem.size">;
def NVVM_DynamicSmemSize : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.dynamic.smem.size">;
def NVVM_AggrSmemSize : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.aggr.smem.size", [NVVMRequiresSM<90>]>;

//===----------------------------------------------------------------------===//
// Clock registers
def NVVM_ClockOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock">;
Expand Down
6 changes: 6 additions & 0 deletions mlir/test/Target/LLVMIR/nvvmir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ llvm.func @nvvm_special_regs() -> i32 {
%76 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 0> : i32
// CHECK: %77 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%77 = nvvm.read.ptx.sreg.tid.x range <i32, 4294967295, 4294967295> : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.total.smem.size()
%78 = nvvm.read.ptx.sreg.total.smem.size : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.dynamic.smem.size()
%79 = nvvm.read.ptx.sreg.dynamic.smem.size : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.aggr.smem.size()
%80 = nvvm.read.ptx.sreg.aggr.smem.size : i32
llvm.return %1 : i32
}

Expand Down