From 534b6fec9ecbb6bc3d494e7a146b47df80928ef4 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 19 Dec 2025 12:27:16 -0800 Subject: [PATCH] [AMDGPU] Limit allocation of lo128 registers for occupancy Parent change allows allocation of lo128 VGPRs from all 4 banks. That may result in the undesired allocation leaving a hole of maximum 128 registers in case if for example v0-v127 are allocated, and v128-v255 are free. Limit the available allocation order to the occupancy. Both hard occupancy limits and occupancy achieved during scheduling are considered. That is better to spill a register than to drop occupancy in this case. --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 16 ++- .../AMDGPU/regalloc-vgpr_lo128-gfx1250.mir | 100 +++++++++++++++++- .../AMDGPU/shrink-vgpr_lo128-gfx1250.mir | 29 +++++ 3 files changed, 141 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 838d31df5bacd..b727c4680ad5b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -639,8 +639,20 @@ def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg1 let AltOrders = [(add (sequence "VGPR%u", 0, 127), (sequence "VGPR%u", 256, 383), (sequence "VGPR%u", 512, 639), - (sequence "VGPR%u", 768, 895))]; - let AltOrderSelect = [{ return 1; }]; + (sequence "VGPR%u", 768, 895)), + (add (sequence "VGPR%u", 0, 127), + (sequence "VGPR%u", 256, 383), + (sequence "VGPR%u", 512, 639)), + (add (sequence "VGPR%u", 0, 127), + (sequence "VGPR%u", 256, 383)), + (add (sequence "VGPR%u", 0, 127))]; + let AltOrderSelect = [{ + const GCNSubtarget &ST = MF.getSubtarget(); + unsigned N = ST.getMaxNumVGPRs(MF); + unsigned SchedWaves = MF.getInfo()->getMinAllowedOccupancy(); + N = std::min(N, ST.getMaxNumVGPRs(SchedWaves, 0)); + return N > 768 ? 1 : N > 512 ? 2 : N > 256 ? 3 : 4; + }]; let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)); let GeneratePressureSet = 0; let Size = 32; diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir index ba0bfadb248c1..2f199409a8dc1 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir @@ -6,8 +6,14 @@ define amdgpu_kernel void @rcp_f16_above_256_vregs() #0 { ret void } define amdgpu_kernel void @rcp_f16_above_512_vregs() #0 { ret void } define amdgpu_kernel void @rcp_f16_above_768_vregs() #0 { ret void } - - attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-waves-per-eu"="1" } + define amdgpu_kernel void @rcp_f16_above_128_vregs_occ4() #2 { ret void } + define amdgpu_kernel void @rcp_f16_above_256_vregs_occ2() #1 { ret void } + define amdgpu_kernel void @rcp_f16_above_128_vregs_sched_occ4() #0 { ret void } + define amdgpu_kernel void @rcp_f16_above_256_vregs_sched_occ2() #0 { ret void } + + attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-waves-per-eu"="1,1" } + attributes #1 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2" } + attributes #2 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4,4" } ... # GCN-LABEL: name: rcp_f16_above_128_vregs @@ -21,6 +27,8 @@ --- name: rcp_f16_above_128_vregs tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: ; Occupy all low 128 VGPRs: @@ -43,6 +51,8 @@ body: | --- name: rcp_f16_above_256_vregs tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383 @@ -64,6 +74,8 @@ body: | --- name: rcp_f16_above_512_vregs tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383, $vgpr512_vgpr513_vgpr514_vgpr515_vgpr516_vgpr517_vgpr518_vgpr519_vgpr520_vgpr521_vgpr522_vgpr523_vgpr524_vgpr525_vgpr526_vgpr527, $vgpr528_vgpr529_vgpr530_vgpr531_vgpr532_vgpr533_vgpr534_vgpr535_vgpr536_vgpr537_vgpr538_vgpr539_vgpr540_vgpr541_vgpr542_vgpr543, $vgpr544_vgpr545_vgpr546_vgpr547_vgpr548_vgpr549_vgpr550_vgpr551_vgpr552_vgpr553_vgpr554_vgpr555_vgpr556_vgpr557_vgpr558_vgpr559, $vgpr560_vgpr561_vgpr562_vgpr563_vgpr564_vgpr565_vgpr566_vgpr567_vgpr568_vgpr569_vgpr570_vgpr571_vgpr572_vgpr573_vgpr574_vgpr575, $vgpr576_vgpr577_vgpr578_vgpr579_vgpr580_vgpr581_vgpr582_vgpr583_vgpr584_vgpr585_vgpr586_vgpr587_vgpr588_vgpr589_vgpr590_vgpr591, $vgpr592_vgpr593_vgpr594_vgpr595_vgpr596_vgpr597_vgpr598_vgpr599_vgpr600_vgpr601_vgpr602_vgpr603_vgpr604_vgpr605_vgpr606_vgpr607, $vgpr608_vgpr609_vgpr610_vgpr611_vgpr612_vgpr613_vgpr614_vgpr615_vgpr616_vgpr617_vgpr618_vgpr619_vgpr620_vgpr621_vgpr622_vgpr623, $vgpr624_vgpr625_vgpr626_vgpr627_vgpr628_vgpr629_vgpr630_vgpr631_vgpr632_vgpr633_vgpr634_vgpr635_vgpr636_vgpr637_vgpr638_vgpr639 @@ -83,6 +95,8 @@ body: | --- name: rcp_f16_above_768_vregs tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383, $vgpr512_vgpr513_vgpr514_vgpr515_vgpr516_vgpr517_vgpr518_vgpr519_vgpr520_vgpr521_vgpr522_vgpr523_vgpr524_vgpr525_vgpr526_vgpr527, $vgpr528_vgpr529_vgpr530_vgpr531_vgpr532_vgpr533_vgpr534_vgpr535_vgpr536_vgpr537_vgpr538_vgpr539_vgpr540_vgpr541_vgpr542_vgpr543, $vgpr544_vgpr545_vgpr546_vgpr547_vgpr548_vgpr549_vgpr550_vgpr551_vgpr552_vgpr553_vgpr554_vgpr555_vgpr556_vgpr557_vgpr558_vgpr559, $vgpr560_vgpr561_vgpr562_vgpr563_vgpr564_vgpr565_vgpr566_vgpr567_vgpr568_vgpr569_vgpr570_vgpr571_vgpr572_vgpr573_vgpr574_vgpr575, $vgpr576_vgpr577_vgpr578_vgpr579_vgpr580_vgpr581_vgpr582_vgpr583_vgpr584_vgpr585_vgpr586_vgpr587_vgpr588_vgpr589_vgpr590_vgpr591, $vgpr592_vgpr593_vgpr594_vgpr595_vgpr596_vgpr597_vgpr598_vgpr599_vgpr600_vgpr601_vgpr602_vgpr603_vgpr604_vgpr605_vgpr606_vgpr607, $vgpr608_vgpr609_vgpr610_vgpr611_vgpr612_vgpr613_vgpr614_vgpr615_vgpr616_vgpr617_vgpr618_vgpr619_vgpr620_vgpr621_vgpr622_vgpr623, $vgpr624_vgpr625_vgpr626_vgpr627_vgpr628_vgpr629_vgpr630_vgpr631_vgpr632_vgpr633_vgpr634_vgpr635_vgpr636_vgpr637_vgpr638_vgpr639, $vgpr768_vgpr769_vgpr770_vgpr771_vgpr772_vgpr773_vgpr774_vgpr775_vgpr776_vgpr777_vgpr778_vgpr779_vgpr780_vgpr781_vgpr782_vgpr783, $vgpr784_vgpr785_vgpr786_vgpr787_vgpr788_vgpr789_vgpr790_vgpr791_vgpr792_vgpr793_vgpr794_vgpr795_vgpr796_vgpr797_vgpr798_vgpr799, $vgpr800_vgpr801_vgpr802_vgpr803_vgpr804_vgpr805_vgpr806_vgpr807_vgpr808_vgpr809_vgpr810_vgpr811_vgpr812_vgpr813_vgpr814_vgpr815, $vgpr816_vgpr817_vgpr818_vgpr819_vgpr820_vgpr821_vgpr822_vgpr823_vgpr824_vgpr825_vgpr826_vgpr827_vgpr828_vgpr829_vgpr830_vgpr831, $vgpr832_vgpr833_vgpr834_vgpr835_vgpr836_vgpr837_vgpr838_vgpr839_vgpr840_vgpr841_vgpr842_vgpr843_vgpr844_vgpr845_vgpr846_vgpr847, $vgpr848_vgpr849_vgpr850_vgpr851_vgpr852_vgpr853_vgpr854_vgpr855_vgpr856_vgpr857_vgpr858_vgpr859_vgpr860_vgpr861_vgpr862_vgpr863, $vgpr864_vgpr865_vgpr866_vgpr867_vgpr868_vgpr869_vgpr870_vgpr871_vgpr872_vgpr873_vgpr874_vgpr875_vgpr876_vgpr877_vgpr878_vgpr879, $vgpr880_vgpr881_vgpr882_vgpr883_vgpr884_vgpr885_vgpr886_vgpr887_vgpr888_vgpr889_vgpr890_vgpr891_vgpr892_vgpr893_vgpr894_vgpr895 @@ -92,3 +106,85 @@ body: | S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, implicit $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, implicit $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, implicit $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, implicit $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, implicit $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, implicit $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, implicit $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383, implicit $vgpr512_vgpr513_vgpr514_vgpr515_vgpr516_vgpr517_vgpr518_vgpr519_vgpr520_vgpr521_vgpr522_vgpr523_vgpr524_vgpr525_vgpr526_vgpr527, implicit $vgpr528_vgpr529_vgpr530_vgpr531_vgpr532_vgpr533_vgpr534_vgpr535_vgpr536_vgpr537_vgpr538_vgpr539_vgpr540_vgpr541_vgpr542_vgpr543, implicit $vgpr544_vgpr545_vgpr546_vgpr547_vgpr548_vgpr549_vgpr550_vgpr551_vgpr552_vgpr553_vgpr554_vgpr555_vgpr556_vgpr557_vgpr558_vgpr559, implicit $vgpr560_vgpr561_vgpr562_vgpr563_vgpr564_vgpr565_vgpr566_vgpr567_vgpr568_vgpr569_vgpr570_vgpr571_vgpr572_vgpr573_vgpr574_vgpr575, implicit $vgpr576_vgpr577_vgpr578_vgpr579_vgpr580_vgpr581_vgpr582_vgpr583_vgpr584_vgpr585_vgpr586_vgpr587_vgpr588_vgpr589_vgpr590_vgpr591, implicit $vgpr592_vgpr593_vgpr594_vgpr595_vgpr596_vgpr597_vgpr598_vgpr599_vgpr600_vgpr601_vgpr602_vgpr603_vgpr604_vgpr605_vgpr606_vgpr607, implicit $vgpr608_vgpr609_vgpr610_vgpr611_vgpr612_vgpr613_vgpr614_vgpr615_vgpr616_vgpr617_vgpr618_vgpr619_vgpr620_vgpr621_vgpr622_vgpr623, implicit $vgpr624_vgpr625_vgpr626_vgpr627_vgpr628_vgpr629_vgpr630_vgpr631_vgpr632_vgpr633_vgpr634_vgpr635_vgpr636_vgpr637_vgpr638_vgpr639, implicit $vgpr768_vgpr769_vgpr770_vgpr771_vgpr772_vgpr773_vgpr774_vgpr775_vgpr776_vgpr777_vgpr778_vgpr779_vgpr780_vgpr781_vgpr782_vgpr783, implicit $vgpr784_vgpr785_vgpr786_vgpr787_vgpr788_vgpr789_vgpr790_vgpr791_vgpr792_vgpr793_vgpr794_vgpr795_vgpr796_vgpr797_vgpr798_vgpr799, implicit $vgpr800_vgpr801_vgpr802_vgpr803_vgpr804_vgpr805_vgpr806_vgpr807_vgpr808_vgpr809_vgpr810_vgpr811_vgpr812_vgpr813_vgpr814_vgpr815, implicit $vgpr816_vgpr817_vgpr818_vgpr819_vgpr820_vgpr821_vgpr822_vgpr823_vgpr824_vgpr825_vgpr826_vgpr827_vgpr828_vgpr829_vgpr830_vgpr831, implicit $vgpr832_vgpr833_vgpr834_vgpr835_vgpr836_vgpr837_vgpr838_vgpr839_vgpr840_vgpr841_vgpr842_vgpr843_vgpr844_vgpr845_vgpr846_vgpr847, implicit $vgpr848_vgpr849_vgpr850_vgpr851_vgpr852_vgpr853_vgpr854_vgpr855_vgpr856_vgpr857_vgpr858_vgpr859_vgpr860_vgpr861_vgpr862_vgpr863, implicit $vgpr864_vgpr865_vgpr866_vgpr867_vgpr868_vgpr869_vgpr870_vgpr871_vgpr872_vgpr873_vgpr874_vgpr875_vgpr876_vgpr877_vgpr878_vgpr879, implicit $vgpr880_vgpr881_vgpr882_vgpr883_vgpr884_vgpr885_vgpr886_vgpr887_vgpr888_vgpr889_vgpr890_vgpr891_vgpr892_vgpr893_vgpr894_vgpr895 ... + +# GCN-LABEL: name: rcp_f16_above_128_vregs_occ4 + +# VGPR budget is 256 registers, do not use v256. + +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN: $vgpr0 = V_RCP_F16_fake16_e32 undef $vgpr0 +--- +name: rcp_f16_above_128_vregs_occ4 +tracksRegLiveness: true +body: | + bb.0: + ; Occupy all low 128 VGPRs: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127 + + %0:vgpr_32_lo128 = IMPLICIT_DEF + %1:vgpr_32_lo128 = V_RCP_F16_fake16_e32 killed %0, implicit $mode, implicit $exec + + S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127 +... + +# GCN-LABEL: name: rcp_f16_above_256_vregs_occ2 + +# VGPR budget is 512 registers, do not use v512. + +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN: $vgpr0 = V_RCP_F16_fake16_e32 undef $vgpr0 +--- +name: rcp_f16_above_256_vregs_occ2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383 + + %0:vgpr_32_lo128 = IMPLICIT_DEF + %1:vgpr_32_lo128 = V_RCP_F16_fake16_e32 killed %0, implicit $mode, implicit $exec + + S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, implicit $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, implicit $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, implicit $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, implicit $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, implicit $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, implicit $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, implicit $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383 +... + +# GCN-LABEL: name: rcp_f16_above_128_vregs_sched_occ4 + +# VGPR budget is 256 registers, do not use v256. + +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN: $vgpr0 = V_RCP_F16_fake16_e32 undef $vgpr0 +--- +name: rcp_f16_above_128_vregs_sched_occ4 +tracksRegLiveness: true +machineFunctionInfo: + occupancy: 4 +body: | + bb.0: + ; Occupy all low 128 VGPRs: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127 + + %0:vgpr_32_lo128 = IMPLICIT_DEF + %1:vgpr_32_lo128 = V_RCP_F16_fake16_e32 killed %0, implicit $mode, implicit $exec + + S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127 +... + +# GCN-LABEL: name: rcp_f16_above_256_vregs_sched_occ2 + +# VGPR budget is 512 registers, do not use v512. + +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN: $vgpr0 = V_RCP_F16_fake16_e32 undef $vgpr0 +--- +name: rcp_f16_above_256_vregs_sched_occ2 +tracksRegLiveness: true +machineFunctionInfo: + occupancy: 2 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383 + + %0:vgpr_32_lo128 = IMPLICIT_DEF + %1:vgpr_32_lo128 = V_RCP_F16_fake16_e32 killed %0, implicit $mode, implicit $exec + + S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, implicit $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, implicit $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, implicit $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, implicit $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, implicit $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, implicit $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, implicit $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383 +... diff --git a/llvm/test/CodeGen/AMDGPU/shrink-vgpr_lo128-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/shrink-vgpr_lo128-gfx1250.mir index 8a2447d31297b..1bc759d3e4ec9 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-vgpr_lo128-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-vgpr_lo128-gfx1250.mir @@ -1,10 +1,25 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=si-shrink-instructions -o - %s | FileCheck --check-prefix=GCN %s +--- | + define amdgpu_kernel void @rcp_f16_v127() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v128() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v256() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v384() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v512() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v767() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v768() #0 { ret void } + define amdgpu_kernel void @rcp_f16_v1023() #0 { ret void } + + attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-waves-per-eu"="1,1" } +... + # GCN-LABEL: name: rcp_f16_v127 # GCN: $vgpr127 = V_RCP_F16_fake16_e32 killed $vgpr127 --- name: rcp_f16_v127 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr127 = IMPLICIT_DEF @@ -16,6 +31,8 @@ body: | --- name: rcp_f16_v128 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr128 = IMPLICIT_DEF @@ -27,6 +44,8 @@ body: | --- name: rcp_f16_v256 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr256 = IMPLICIT_DEF @@ -38,6 +57,8 @@ body: | --- name: rcp_f16_v384 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr384 = IMPLICIT_DEF @@ -49,6 +70,8 @@ body: | --- name: rcp_f16_v512 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr512 = IMPLICIT_DEF @@ -60,6 +83,8 @@ body: | --- name: rcp_f16_v767 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr767 = IMPLICIT_DEF @@ -71,6 +96,8 @@ body: | --- name: rcp_f16_v768 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr768 = IMPLICIT_DEF @@ -82,6 +109,8 @@ body: | --- name: rcp_f16_v1023 tracksRegLiveness: true +machineFunctionInfo: + occupancy: 1 body: | bb.0: $vgpr1023 = IMPLICIT_DEF