1# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s 2# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 3 4# If there's a base offset, check that SILoadStoreOptimizer creates 5# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than 6# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently 7# clobbered. 8 9# GCN-LABEL: name: ds_combine_base_offset{{$}} 10 11# VI: V_ADD_CO_U32_e64 %6, %0, 12# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8, 13# VI: V_ADD_CO_U32_e64 %10, %3, 14# VI-NEXT: DS_READ2_B32 killed %11, 0, 8, 15 16# GFX9: V_ADD_U32_e64 %6, %0, 17# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8, 18# GFX9: V_ADD_U32_e64 %9, %3, 19# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8, 20 21--- | 22 @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4 23 24 define amdgpu_kernel void @ds_combine_base_offset() { 25 bb.0: 26 br label %bb2 27 28 bb1: 29 ret void 30 31 bb2: 32 %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 33 %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 34 %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 35 %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 36 br label %bb1 37 } 38 39 define amdgpu_kernel void @ds_combine_base_offset_subreg() { 40 bb.0: 41 br label %bb2 42 43 bb1: 44 ret void 45 46 bb2: 47 %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 48 %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 49 %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 50 %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 51 br label %bb1 52 } 53 54 define amdgpu_kernel void @ds_combine_subreg() { 55 bb.0: 56 br label %bb2 57 58 bb1: 59 ret void 60 61 bb2: 62 %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 63 %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 64 %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 65 %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 66 br label %bb1 67 } 68--- 69name: ds_combine_base_offset 70body: | 71 bb.0: 72 %0:vgpr_32 = IMPLICIT_DEF 73 S_BRANCH %bb.2 74 75 bb.1: 76 S_ENDPGM 0 77 78 bb.2: 79 %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec 80 %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec 81 V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec 82 DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) 83 %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 84 DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) 85 %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) 86 %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) 87 $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc 88 S_CBRANCH_VCCNZ %bb.1, implicit $vcc 89 S_BRANCH %bb.1 90... 91 92# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}} 93 94# VI: V_ADD_CO_U32_e64 %6, %0.sub0, 95# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8, 96# VI: V_ADD_CO_U32_e64 %10, %3.sub0, 97# VI-NEXT: DS_READ2_B32 killed %11, 0, 8, 98 99# GFX9: V_ADD_U32_e64 %6, %0.sub0, 100# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8, 101# GFX9: V_ADD_U32_e64 %9, %3.sub0, 102# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8, 103--- 104name: ds_combine_base_offset_subreg 105body: | 106 bb.0: 107 %0:vreg_64 = IMPLICIT_DEF 108 S_BRANCH %bb.2 109 110 bb.1: 111 S_ENDPGM 0 112 113 bb.2: 114 %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec 115 %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec 116 V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec 117 DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) 118 %3:vreg_64 = V_LSHLREV_B64 0, 0, implicit $exec 119 DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) 120 %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) 121 %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) 122 $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc 123 S_CBRANCH_VCCNZ %bb.1, implicit $vcc 124 S_BRANCH %bb.1 125... 126 127# GCN-LABEL: name: ds_combine_subreg{{$}} 128 129# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8, 130# VI: DS_READ2_B32 %3.sub0, 0, 8, 131 132# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8, 133# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8, 134--- 135name: ds_combine_subreg 136body: | 137 bb.0: 138 %0:vreg_64 = IMPLICIT_DEF 139 S_BRANCH %bb.2 140 141 bb.1: 142 S_ENDPGM 0 143 144 bb.2: 145 %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec 146 %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec 147 V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec 148 DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) 149 %3:vreg_64 = V_LSHLREV_B64 0, 0, implicit $exec 150 DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) 151 %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) 152 %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) 153 $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc 154 S_CBRANCH_VCCNZ %bb.1, implicit $vcc 155 S_BRANCH %bb.1 156... 157