1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s 4 5define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(half %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 6 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 7 ; UNPACKED: bb.1 (%ir-block.0): 8 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 9 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 10 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 11 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 12 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 13 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 14 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 15 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 16 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 17 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 18 ; UNPACKED: S_ENDPGM 0 19 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 20 ; PACKED: bb.1 (%ir-block.0): 21 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 22 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 23 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 24 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 25 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 26 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 27 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 28 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 29 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 30 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 31 ; PACKED: S_ENDPGM 0 32 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 33 ret void 34} 35 36define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 37 ; UNPACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 38 ; UNPACKED: bb.1 (%ir-block.0): 39 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 40 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 41 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 42 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 43 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 44 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 45 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 46 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 47 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 48 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 49 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 50 ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec 51 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 52 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 53 ; UNPACKED: S_ENDPGM 0 54 ; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 55 ; PACKED: bb.1 (%ir-block.0): 56 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 57 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 58 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 59 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 60 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 61 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 62 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 63 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 64 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 65 ; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 66 ; PACKED: S_ENDPGM 0 67 call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 68 ret void 69} 70 71; FIXME: Crashes 72; define amdgpu_ps void @raw_tbuffer_store_v3f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<3 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 73; call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 74; ret void 75; } 76 77define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 78 ; UNPACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 79 ; UNPACKED: bb.1 (%ir-block.0): 80 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 81 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 82 ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 83 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 84 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 85 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 86 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 87 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 88 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 89 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 90 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 91 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 92 ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec 93 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 94 ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec 95 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 96 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 97 ; UNPACKED: S_ENDPGM 0 98 ; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 99 ; PACKED: bb.1 (%ir-block.0): 100 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 101 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 102 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 103 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 104 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 105 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 106 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 107 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 108 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 109 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 110 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 111 ; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 112 ; PACKED: S_ENDPGM 0 113 call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 114 ret void 115} 116 117; Waterfall for rsrc 118define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset(half %val, <4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) { 119 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset 120 ; UNPACKED: bb.1 (%ir-block.0): 121 ; UNPACKED: successors: %bb.2(0x80000000) 122 ; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 123 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 124 ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 125 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 126 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 127 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 128 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 129 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 130 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 131 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 132 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 133 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 134 ; UNPACKED: bb.2: 135 ; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 136 ; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 137 ; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 138 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 139 ; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 140 ; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 141 ; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 142 ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 143 ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 144 ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 145 ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 146 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 147 ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 148 ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 149 ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 150 ; UNPACKED: bb.3: 151 ; UNPACKED: successors: %bb.4(0x80000000) 152 ; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 153 ; UNPACKED: bb.4: 154 ; UNPACKED: S_ENDPGM 0 155 ; PACKED-LABEL: name: raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset 156 ; PACKED: bb.1 (%ir-block.0): 157 ; PACKED: successors: %bb.2(0x80000000) 158 ; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 159 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 160 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 161 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 162 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 163 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 164 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 165 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 166 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 167 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 168 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 169 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 170 ; PACKED: bb.2: 171 ; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 172 ; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 173 ; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 174 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 175 ; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 176 ; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 177 ; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 178 ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 179 ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 180 ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 181 ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 182 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 183 ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 184 ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 185 ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 186 ; PACKED: bb.3: 187 ; PACKED: successors: %bb.4(0x80000000) 188 ; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 189 ; PACKED: bb.4: 190 ; PACKED: S_ENDPGM 0 191 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 192 ret void 193} 194 195; Waterfall for rsrc and soffset 196define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soffset(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 197 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soffset 198 ; UNPACKED: bb.1 (%ir-block.0): 199 ; UNPACKED: successors: %bb.2(0x80000000) 200 ; UNPACKED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 201 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 202 ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 203 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 204 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 205 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 206 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 207 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 208 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 209 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 210 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 211 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 212 ; UNPACKED: bb.2: 213 ; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 214 ; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 215 ; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 216 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 217 ; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 218 ; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 219 ; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 220 ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 221 ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 222 ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 223 ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 224 ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 225 ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 226 ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 227 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 228 ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 229 ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 230 ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 231 ; UNPACKED: bb.3: 232 ; UNPACKED: successors: %bb.4(0x80000000) 233 ; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 234 ; UNPACKED: bb.4: 235 ; UNPACKED: S_ENDPGM 0 236 ; PACKED-LABEL: name: raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soffset 237 ; PACKED: bb.1 (%ir-block.0): 238 ; PACKED: successors: %bb.2(0x80000000) 239 ; PACKED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 240 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 241 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 242 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 243 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 244 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 245 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 246 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 247 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 248 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 249 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 250 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 251 ; PACKED: bb.2: 252 ; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 253 ; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 254 ; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 255 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 256 ; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 257 ; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 258 ; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 259 ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 260 ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 261 ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 262 ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 263 ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 264 ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 265 ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 266 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 267 ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 268 ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 269 ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 270 ; PACKED: bb.3: 271 ; PACKED: successors: %bb.4(0x80000000) 272 ; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 273 ; PACKED: bb.4: 274 ; PACKED: S_ENDPGM 0 275 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 276 ret void 277} 278 279; Waterfall for rsrc and soffset, copy for voffset 280define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset(half %val, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 281 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset 282 ; UNPACKED: bb.1 (%ir-block.0): 283 ; UNPACKED: successors: %bb.2(0x80000000) 284 ; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 285 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 286 ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 287 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 288 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 289 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 290 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 291 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 292 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 293 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 294 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 295 ; UNPACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 296 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 297 ; UNPACKED: bb.2: 298 ; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 299 ; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 300 ; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 301 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 302 ; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec 303 ; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 304 ; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 305 ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 306 ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec 307 ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 308 ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 309 ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 310 ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 311 ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 312 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 313 ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 314 ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 315 ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 316 ; UNPACKED: bb.3: 317 ; UNPACKED: successors: %bb.4(0x80000000) 318 ; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 319 ; UNPACKED: bb.4: 320 ; UNPACKED: S_ENDPGM 0 321 ; PACKED-LABEL: name: raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset 322 ; PACKED: bb.1 (%ir-block.0): 323 ; PACKED: successors: %bb.2(0x80000000) 324 ; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 325 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 326 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 327 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 328 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 329 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 330 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 331 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 332 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 333 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 334 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 335 ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 336 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 337 ; PACKED: bb.2: 338 ; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 339 ; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 340 ; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 341 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 342 ; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec 343 ; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 344 ; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 345 ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 346 ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec 347 ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 348 ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 349 ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 350 ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 351 ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 352 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 353 ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 354 ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 355 ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 356 ; PACKED: bb.3: 357 ; PACKED: successors: %bb.4(0x80000000) 358 ; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 359 ; PACKED: bb.4: 360 ; PACKED: S_ENDPGM 0 361 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 362 ret void 363} 364 365define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(half %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 366 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 367 ; UNPACKED: bb.1 (%ir-block.0): 368 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 369 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 370 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 371 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 372 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 373 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 374 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 375 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 376 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 377 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 378 ; UNPACKED: S_ENDPGM 0 379 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 380 ; PACKED: bb.1 (%ir-block.0): 381 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 382 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 383 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 384 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 385 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 386 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 387 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 388 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 389 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 390 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 391 ; PACKED: S_ENDPGM 0 392 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) 393 ret void 394} 395 396define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(half %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 397 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 398 ; UNPACKED: bb.1 (%ir-block.0): 399 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 400 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 401 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 402 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 403 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 404 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 405 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 406 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 407 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 408 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 409 ; UNPACKED: S_ENDPGM 0 410 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 411 ; PACKED: bb.1 (%ir-block.0): 412 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 413 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 414 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 415 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 416 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 417 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 418 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 419 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 420 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 421 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 422 ; PACKED: S_ENDPGM 0 423 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) 424 ret void 425} 426 427define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(half %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 428 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 429 ; UNPACKED: bb.1 (%ir-block.0): 430 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 431 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 432 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 433 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 434 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 435 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 436 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 437 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 438 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 439 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 440 ; UNPACKED: S_ENDPGM 0 441 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 442 ; PACKED: bb.1 (%ir-block.0): 443 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 444 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 445 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 446 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 447 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 448 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 449 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 450 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 451 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 452 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 453 ; PACKED: S_ENDPGM 0 454 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) 455 ret void 456} 457 458define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(half %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 459 ; UNPACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 460 ; UNPACKED: bb.1 (%ir-block.0): 461 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 462 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 463 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 464 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 465 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 466 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 467 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 468 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 469 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 470 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 471 ; UNPACKED: S_ENDPGM 0 472 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 473 ; PACKED: bb.1 (%ir-block.0): 474 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 475 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 476 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 477 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 478 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 479 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 480 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 481 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 482 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 483 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 484 ; PACKED: S_ENDPGM 0 485 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) 486 ret void 487} 488 489declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 490declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 491declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 492declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 493