-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AMDGPU] Allow allocation of lo128 registers from all banks #172614
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { | |
| define i32 @asm_vgpr_early_clobber() { | ||
| ; CHECK-LABEL: name: asm_vgpr_early_clobber | ||
| ; CHECK: bb.1 (%ir-block.0): | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1245195 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1245195 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1376267 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1376267 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's this magic number?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are RC IDs encoded in MIR. I do not like these tests, but every time you touch register info these has to be updated. It does not have anything to do with the patch itself. |
||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 | ||
| ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] | ||
|
|
@@ -94,7 +94,7 @@ entry: | |
| define i32 @test_single_vgpr_output() nounwind { | ||
| ; CHECK-LABEL: name: test_single_vgpr_output | ||
| ; CHECK: bb.1.entry: | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8 | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
@@ -106,7 +106,7 @@ entry: | |
| define i32 @test_single_sgpr_output_s32() nounwind { | ||
| ; CHECK-LABEL: name: test_single_sgpr_output_s32 | ||
| ; CHECK: bb.1.entry: | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8 | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
@@ -119,7 +119,7 @@ entry: | |
| define float @test_multiple_register_outputs_same() #0 { | ||
| ; CHECK-LABEL: name: test_multiple_register_outputs_same | ||
| ; CHECK: bb.1 (%ir-block.0): | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8, 1245194 /* regdef:VGPR_32 */, def %9 | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8, 1376266 /* regdef:VGPR_32 */, def %9 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 | ||
| ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] | ||
|
|
@@ -136,7 +136,7 @@ define float @test_multiple_register_outputs_same() #0 { | |
| define double @test_multiple_register_outputs_mixed() #0 { | ||
| ; CHECK-LABEL: name: test_multiple_register_outputs_mixed | ||
| ; CHECK: bb.1 (%ir-block.0): | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8, 2818058 /* regdef:VReg_64 */, def %9 | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8, 2883594 /* regdef:VReg_64 */, def %9 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9 | ||
| ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) | ||
|
|
@@ -171,7 +171,7 @@ define amdgpu_kernel void @test_input_vgpr_imm() { | |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 | ||
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY1]] | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1376265 /* reguse:VGPR_32 */, [[COPY1]] | ||
| ; CHECK-NEXT: S_ENDPGM 0 | ||
| call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) | ||
| ret void | ||
|
|
@@ -185,7 +185,7 @@ define amdgpu_kernel void @test_input_sgpr_imm() { | |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 | ||
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32) | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:SReg_32 */, [[COPY1]] | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:SReg_32 */, [[COPY1]] | ||
| ; CHECK-NEXT: S_ENDPGM 0 | ||
| call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) | ||
| ret void | ||
|
|
@@ -212,7 +212,7 @@ define float @test_input_vgpr(i32 %src) nounwind { | |
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) | ||
| ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %9, 1245193 /* reguse:VGPR_32 */, [[COPY1]] | ||
| ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %9, 1376265 /* reguse:VGPR_32 */, [[COPY1]] | ||
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
@@ -227,7 +227,7 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind { | |
| ; CHECK-NEXT: liveins: $vgpr0 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 | ||
| ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1245194 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) | ||
| ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1376266 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
@@ -244,7 +244,7 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { | |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 | ||
| ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) | ||
| ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1245194 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) | ||
| ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1376266 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) | ||
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
@@ -256,13 +256,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { | |
| define i32 @test_sgpr_matching_constraint() nounwind { | ||
| ; CHECK-LABEL: name: test_sgpr_matching_constraint | ||
| ; CHECK: bb.1.entry: | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8 | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %10 | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %10 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10 | ||
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) | ||
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) | ||
| ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %12, 1835017 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) | ||
| ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %12, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) | ||
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
@@ -285,7 +285,7 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { | |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) | ||
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) | ||
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) | ||
| ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1245194 /* regdef:VGPR_32 */, def %11, 1245194 /* regdef:VGPR_32 */, def %12, 1245194 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) | ||
| ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1376266 /* regdef:VGPR_32 */, def %11, 1376266 /* regdef:VGPR_32 */, def %12, 1376266 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) | ||
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11 | ||
| ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12 | ||
| ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13 | ||
|
|
@@ -306,10 +306,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { | |
| define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { | ||
| ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint | ||
| ; CHECK: bb.1.entry: | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8 | ||
| ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) | ||
| ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) | ||
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 | ||
| ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) | ||
| ; CHECK-NEXT: SI_RETURN implicit $vgpr0 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we go with say 64 to leave us some space between 32 and this one?
(Happy to go with 33, just asking)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Frankly it does not work, whatever number I set. In theory it has to be higher than VGPR_32. In practice it does not work and matter, I've tried 10000. Thus I have shrunk the RC size to 896 registers instead of 1024 so tablegen will not make it a base class for anything. I have to note, it is counterintuitive, but if it were 1024 registers, the size of generated reginfo will be 1/3 less. Because it will become indistinguishable from VGPR_32. I wish we could get rid of this RC altogether though, and pass operand type to the
getRawAllocationOrder()instead, but its current uses by RA does not seem to collect operands at all. At least some of them.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What would the best solution look like if you had infinite time?
Where I'm going is RA is definitely not flexible enough and may need an overhaul, so I'm trying to gauge what would be your ideas on this side.
That said, here it is more a TableGen issue or more precisely how we can compose subregs, but still.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a good question, but I do not have a good answer. For one it looks like subreg mechanism does not work well for our huge register file and huge register tuples. The amount of permutations is just insanely large. A separate issue is that we have subreg liveness on the one hand, but have to collect reg/subreg pairs from MOs on another, which duplicates logic in many places. We might have something simpler, but llvm has to serve all other targets too with their own register file organization.
This case is specifically tough because it requires different register numbers allocated for specific operand types in specific instruction forms, even though it is a GPR. So, theoretically a GPR shall be useful for any use, but it is not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we need to review all our uses of the TRI interfaces and see if we even need many of these generated functions matching sub- and super-regs. I have a gut feeling we are using it often as a substitute to a simpler things in many cases. Like 'do they even alias'? Then if we see we can get rid of most of such uses, we could probably ask tablegen not to generate the whole permutation matrix. Of course, when you need to extract a subreg from a tuple you need to do it, but do we really need to extract something like
sub3_sub4_sub5from a 1024-bit wide register?