Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(RCID, type) && !hasModifiers();
}

bool isVGPR32_Lo128() const;

bool isSCSrcB16() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
}
Expand Down Expand Up @@ -2243,7 +2245,19 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
}

bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
if (!isRegKind() ||
!AsmParser->getMRI()->getRegClass(RCID).contains(getReg()))
return false;
if (RCID == AMDGPU::VGPR_32_Lo128RegClassID ||
RCID == AMDGPU::VS_32_Lo128RegClassID)
return !AMDGPU::isHi128VGPR32(getReg());
return true;
}

bool AMDGPUOperand::isVGPR32_Lo128() const {
if (!isRegKind())
return false;
return AMDGPU::isLo128VGPR32(getReg());
}

bool AMDGPUOperand::isVRegWithInputMods() const {
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,11 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
const MCRegisterClass &RC = MRI.getRegClass(RCID);
auto Reg = mc2PseudoReg(Op.getReg());
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
bool Err = !RC.contains(Reg) && !isInlineValue(Reg);
if (!Err && (RCID == AMDGPU::VGPR_32_Lo128RegClassID ||
RCID == AMDGPU::VS_32_Lo128RegClassID))
Err = AMDGPU::isHi128VGPR32(Reg);
if (Err) {
bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
(OpInfo.RegClass == AMDGPU::SReg_1 ||
OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@ def CPolBit {

class VOPDstOperand<RegisterClassLike rc> : RegisterOperand<rc, "printVOPDst">;

def Vgpr32Lo128 : AsmOperandClass {
let Name = "Vgpr32Lo128";
let PredicateMethod = "isVGPR32_Lo128";
let RenderMethod = "addRegOperands";
}

def VOPDstOperand_t16 : VOPDstOperand <VGPR_16> {
let EncoderMethod = "getMachineOpValueT16";
let DecoderMethod = "DecodeVGPR_16RegisterClass";
Expand All @@ -333,12 +339,27 @@ def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
}

def VOPDstOperand_Vgpr32Lo128 : VOPDstOperand <VGPR_32_Lo128> {
let ParserMatchClass = Vgpr32Lo128;
}

// Source-encoded destination operand for instructions like v_swap_b16.
def VOPSrcEncodedDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
let EncoderMethod = VSrcT_b16_Lo128.EncoderMethod;
let DecoderMethod = VSrcT_b16_Lo128.DecoderMethod;
}


def VGPROp_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
let EncoderMethod = "getMachineOpValueT16Lo128";
}

def VGPROp_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
let ParserMatchClass = Vgpr32Lo128;
}

class VINTRPe <bits<2> op> : Enc32 {
bits<8> vdst;
bits<8> vsrc;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,7 @@ class getVALUDstForVT_fake16<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128_AlignTarget>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64_AlignTarget>,
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
!if(!eq(VT.Size, 16), VOPDstOperand_Vgpr32Lo128,
VOPDstS64orS32)))); // else VT == i1
}

Expand Down
20 changes: 9 additions & 11 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -632,13 +632,20 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
let BaseClassOrder = 32;
}

// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
// Identical to VGPR_32 except only the low 128 (Lo128) registers in each
// register bank are allocatable.
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 127))> {
(add (sequence "VGPR%u", 0, 895))> {
let AltOrders = [(add (sequence "VGPR%u", 0, 127),
(sequence "VGPR%u", 256, 383),
(sequence "VGPR%u", 512, 639),
(sequence "VGPR%u", 768, 895))];
let AltOrderSelect = [{ return 1; }];
let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let GeneratePressureSet = 0;
let Size = 32;
let Weight = 1;
let BaseClassOrder = 33;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we go with say 64 to leave us some space between 32 and this one?
(Happy to go with 33, just asking)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Frankly it does not work, whatever number I set. In theory it has to be higher than VGPR_32. In practice it does not work and matter, I've tried 10000. Thus I have shrunk the RC size to 896 registers instead of 1024 so tablegen will not make it a base class for anything. I have to note, it is counterintuitive, but if it were 1024 registers, the size of generated reginfo will be 1/3 less. Because it will become indistinguishable from VGPR_32. I wish we could get rid of this RC altogether though, and pass operand type to the getRawAllocationOrder() instead, but its current uses by RA does not seem to collect operands at all. At least some of them.

Copy link
Collaborator

@qcolombet qcolombet Dec 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish we could get rid of this RC altogether though, and pass operand type to the getRawAllocationOrder()

What would the best solution look like if you had infinite time?

Where I'm going is RA is definitely not flexible enough and may need an overhaul, so I'm trying to gauge what would be your ideas on this side.
That said, here it is more a TableGen issue or more precisely how we can compose subregs, but still.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good question, but I do not have a good answer. For one it looks like subreg mechanism does not work well for our huge register file and huge register tuples. The amount of permutations is just insanely large. A separate issue is that we have subreg liveness on the one hand, but have to collect reg/subreg pairs from MOs on another, which duplicates logic in many places. We might have something simpler, but llvm has to serve all other targets too with their own register file organization.

This case is specifically tough because it requires different register numbers allocated for specific operand types in specific instruction forms, even though it is a GPR. So, theoretically a GPR shall be useful for any use, but it is not.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we need to review all our uses of the TRI interfaces and see if we even need many of these generated functions matching sub- and super-regs. I have a gut feeling we are using it often as a substitute to a simpler things in many cases. Like 'do they even alias'? Then if we see we can get rid of most of such uses, we could probably ask tablegen not to generate the whole permutation matrix. Of course, when you need to extract a subreg from a tuple you need to do it, but do we really need to extract something like sub3_sub4_sub5 from a 1024-bit wide register?

}

// Identical to VGPR_32 except it only contains the low 256 (Lo256) registers.
Expand Down Expand Up @@ -1487,15 +1494,6 @@ foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "320", "35
def VGPROp_#size#_Align2 : RegisterOperand<!cast<RegisterClassLike>("VReg_"#size#_Align2)>;
}

def VGPROp_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
let EncoderMethod = "getMachineOpValueT16Lo128";
}

def VGPROp_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
}

//===----------------------------------------------------------------------===//
// ASrc_* Operands with an AccVGPR
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
"True16 Instructions post-RA");
if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
!AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
!llvm::is_contained(
AMDGPU::VGPR_32_Lo128RegClass.getRawAllocationOrder(*MF), Reg))
return false;

if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3632,6 +3632,17 @@ bool isPackedFP32Inst(unsigned Opc) {
}
}

bool isLo128VGPR32(MCPhysReg R) {
return R >= AMDGPU::VGPR0 && R <= AMDGPU::VGPR127;
}

bool isHi128VGPR32(MCPhysReg R) {
return ((R >= AMDGPU::VGPR128 && R < AMDGPU::VGPR256) ||
(R >= AMDGPU::VGPR384 && R < AMDGPU::VGPR512) ||
(R >= AMDGPU::VGPR640 && R < AMDGPU::VGPR768) ||
(R >= AMDGPU::VGPR896 && R < AMDGPU::VGPR1023));
}

const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
assert(isFixedDims() && "expect kind to be FixedDims");
return Dims;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1828,6 +1828,13 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
/// must be defined in terms of bytes.
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);

/// @return true is register \R belongs to the range of [v0-v127].
bool isLo128VGPR32(MCPhysReg R);

/// @return true is register \R belongs to the range of [v128-v255] or it is any
/// other VGPR with bit 8 of address equal to 1, for example [v384-v511].
bool isHi128VGPR32(MCPhysReg R);

class ClusterDimsAttr {
public:
enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
define i32 @asm_vgpr_early_clobber() {
; CHECK-LABEL: name: asm_vgpr_early_clobber
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1245195 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1245195 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1376267 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1376267 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this magic number?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are RC IDs encoded in MIR. I do not like these tests, but every time you touch register info these has to be updated. It does not have anything to do with the patch itself.

; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
Expand Down Expand Up @@ -94,7 +94,7 @@ entry:
define i32 @test_single_vgpr_output() nounwind {
; CHECK-LABEL: name: test_single_vgpr_output
; CHECK: bb.1.entry:
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand All @@ -106,7 +106,7 @@ entry:
define i32 @test_single_sgpr_output_s32() nounwind {
; CHECK-LABEL: name: test_single_sgpr_output_s32
; CHECK: bb.1.entry:
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand All @@ -119,7 +119,7 @@ entry:
define float @test_multiple_register_outputs_same() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_same
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8, 1245194 /* regdef:VGPR_32 */, def %9
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8, 1376266 /* regdef:VGPR_32 */, def %9
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
Expand All @@ -136,7 +136,7 @@ define float @test_multiple_register_outputs_same() #0 {
define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8, 2818058 /* regdef:VReg_64 */, def %9
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8, 2883594 /* regdef:VReg_64 */, def %9
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
Expand Down Expand Up @@ -171,7 +171,7 @@ define amdgpu_kernel void @test_input_vgpr_imm() {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32)
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY1]]
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1376265 /* reguse:VGPR_32 */, [[COPY1]]
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42)
ret void
Expand All @@ -185,7 +185,7 @@ define amdgpu_kernel void @test_input_sgpr_imm() {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32)
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:SReg_32 */, [[COPY1]]
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:SReg_32 */, [[COPY1]]
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42)
ret void
Expand All @@ -212,7 +212,7 @@ define float @test_input_vgpr(i32 %src) nounwind {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %9, 1245193 /* reguse:VGPR_32 */, [[COPY1]]
; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %9, 1376265 /* reguse:VGPR_32 */, [[COPY1]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand All @@ -227,7 +227,7 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind {
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1245194 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1376266 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand All @@ -244,7 +244,7 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1245194 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1376266 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand All @@ -256,13 +256,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
define i32 @test_sgpr_matching_constraint() nounwind {
; CHECK-LABEL: name: test_sgpr_matching_constraint
; CHECK: bb.1.entry:
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %10
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %12, 1835017 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %12, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand All @@ -285,7 +285,7 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1245194 /* regdef:VGPR_32 */, def %11, 1245194 /* regdef:VGPR_32 */, def %12, 1245194 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1376266 /* regdef:VGPR_32 */, def %11, 1376266 /* regdef:VGPR_32 */, def %12, 1376266 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13
Expand All @@ -306,10 +306,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint
; CHECK: bb.1.entry:
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %5(s32)
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %5(s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[FMUL]], %5, [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
Expand All @@ -33,7 +33,7 @@ body: |
%2:vgpr(s32) = COPY %1(s32)
%3:vgpr(s32) = G_FMUL %0, %2
%4:sgpr(s32) = G_FCONSTANT float 1.000000e+00
INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %5:vgpr_32
INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %5:vgpr_32
%6:vgpr(s32) = COPY %4(s32)
%7:vgpr(s32) = nnan G_AMDGPU_FMED3 %3(s32), %5(s32), %6(s32)
$vgpr0 = COPY %7(s32)
Expand Down
Loading