1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s 4 5define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 6 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32 7 ; CHECK: bb.1 (%ir-block.0): 8 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 9 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 10 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 11 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 12 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 13 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 14 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 15 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 16 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 17 ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 18 ; CHECK: S_ENDPGM 0 19 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 20 ret void 21} 22 23define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { 24 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32 25 ; CHECK: bb.1 (%ir-block.0): 26 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 27 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 28 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 29 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 30 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 31 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 32 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 33 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 34 ; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) 35 ; CHECK: S_ENDPGM 0 36 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 37 ret void 38} 39 40define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 41 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 42 ; CHECK: bb.1 (%ir-block.0): 43 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 44 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 45 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 46 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 47 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 48 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 49 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 50 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 51 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 52 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 53 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 54 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 55 ; CHECK: S_ENDPGM 0 56 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 57 ret void 58} 59 60define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) { 61 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 62 ; CHECK: bb.1 (%ir-block.0): 63 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 64 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 65 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 66 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 67 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 68 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 69 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 70 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 71 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 72 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 73 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 74 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 75 ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4) 76 ; CHECK: S_ENDPGM 0 77 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 78 ret void 79} 80 81define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 82 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 83 ; CHECK: bb.1 (%ir-block.0): 84 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 85 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 86 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 87 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 88 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 89 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 90 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 91 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 92 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 93 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 94 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 95 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 96 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 97 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) 98 ; CHECK: S_ENDPGM 0 99 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 100 ret void 101} 102 103define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 104 ; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 105 ; CHECK: bb.1 (%ir-block.0): 106 ; CHECK: successors: %bb.2(0x80000000) 107 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 108 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 109 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 110 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 111 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 112 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 113 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 114 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 115 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 116 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 117 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 118 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 119 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 120 ; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 121 ; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 122 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 123 ; CHECK: bb.2: 124 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 125 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec 126 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec 127 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 128 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec 129 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec 130 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec 131 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 132 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec 133 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 134 ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 135 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) 136 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 137 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 138 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 139 ; CHECK: bb.3: 140 ; CHECK: successors: %bb.4(0x80000000) 141 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 142 ; CHECK: bb.4: 143 ; CHECK: S_ENDPGM 0 144 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 145 ret void 146} 147 148define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 149 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095 150 ; CHECK: bb.1 (%ir-block.0): 151 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 152 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 153 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 154 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 155 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 156 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 157 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 158 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 159 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 160 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 161 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 162 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 163 ; CHECK: S_ENDPGM 0 164 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) 165 ret void 166} 167 168define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 169 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096 170 ; CHECK: bb.1 (%ir-block.0): 171 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 172 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 173 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 174 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 175 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 176 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 177 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 178 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 179 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 180 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 181 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 182 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 183 ; CHECK: S_ENDPGM 0 184 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) 185 ret void 186} 187 188define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 189 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16 190 ; CHECK: bb.1 (%ir-block.0): 191 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 192 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 193 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 194 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 195 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 196 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 197 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 198 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 199 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 200 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 201 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 202 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 16, align 1, addrspace 4) 203 ; CHECK: S_ENDPGM 0 204 %voffset.add = add i32 %voffset, 16 205 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 206 ret void 207} 208 209define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 210 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095 211 ; CHECK: bb.1 (%ir-block.0): 212 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 213 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 214 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 215 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 216 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 217 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 218 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 219 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 220 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 221 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 222 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 223 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4095, align 1, addrspace 4) 224 ; CHECK: S_ENDPGM 0 225 %voffset.add = add i32 %voffset, 4095 226 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 227 ret void 228} 229 230define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 231 ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096 232 ; CHECK: bb.1 (%ir-block.0): 233 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 234 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 235 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 236 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 237 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 238 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 239 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 240 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 241 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 242 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 243 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 244 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 245 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 246 ; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec 247 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4) 248 ; CHECK: S_ENDPGM 0 249 %voffset.add = add i32 %voffset, 4096 250 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 251 ret void 252} 253 254 255; Check what happens with offset add inside a waterfall loop 256define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 257 ; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096 258 ; CHECK: bb.1 (%ir-block.0): 259 ; CHECK: successors: %bb.2(0x80000000) 260 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 261 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 262 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 263 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 264 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 265 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 266 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 267 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 268 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 269 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 270 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 271 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 272 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 273 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 274 ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 275 ; CHECK: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec 276 ; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 277 ; CHECK: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 278 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 279 ; CHECK: bb.2: 280 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 281 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec 282 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec 283 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 284 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec 285 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec 286 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec 287 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 288 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec 289 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 290 ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 291 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 4096, align 1, addrspace 4) 292 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 293 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 294 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 295 ; CHECK: bb.3: 296 ; CHECK: successors: %bb.4(0x80000000) 297 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 298 ; CHECK: bb.4: 299 ; CHECK: S_ENDPGM 0 300 %voffset.add = add i32 %voffset, 4096 301 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 302 ret void 303} 304 305declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg) 306declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg) 307declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg) 308declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) 309