1# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI 2# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI 3# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI 4 5--- | 6 define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void } 7 define amdgpu_kernel void @smrd_wide() { ret void } 8 define amdgpu_kernel void @constant_address_positive() { ret void } 9... 10--- 11 12name: smrd_imm 13legalized: true 14regBankSelected: true 15 16# GCN: body: 17# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 18 19# Immediate offset: 20# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0 21# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0, 0 22 23# Max immediate offset for SI 24# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0, 0 25# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0, 0 26 27# Immediate overflow for SI 28# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 29# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 30# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 31# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 32 33# Max immediate offset for VI 34# SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572 35# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 36# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 37 38# 39# Immediate overflow for VI 40# SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 41# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 42# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 43 44# Max immediate for CI 45# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 46# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 47# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 48# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 49# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 50# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] 51# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 52# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 53# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] 54# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 55# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0 56# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0 57 58# Immediate overflow for CI 59# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 60# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 61# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 62# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 63# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 64# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] 65# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 66# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 67# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] 68# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 69# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0 70 71# Max 32-bit byte offset 72# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 73# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 74# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 75 76# Overflow 32-bit byte offset 77# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 78# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 79# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 80# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 81# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 82# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] 83# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 84# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 85# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] 86# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 87# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0 88# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0 89 90# Pointer loads 91# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 92# GCN: $sgpr0_sgpr1 = COPY [[AS0]] 93# GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 94# GCN: $sgpr0_sgpr1 = COPY [[AS1]] 95# GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 96# GCN: $sgpr0_sgpr1 = COPY [[AS4]] 97 98body: | 99 bb.0: 100 liveins: $sgpr0_sgpr1 101 102 %0:sgpr(p4) = COPY $sgpr0_sgpr1 103 104 %1:sgpr(s64) = G_CONSTANT i64 4 105 %2:sgpr(p4) = G_PTR_ADD %0, %1 106 %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0, addrspace 4) 107 $sgpr0 = COPY %3 108 109 %4:sgpr(s64) = G_CONSTANT i64 1020 110 %5:sgpr(p4) = G_PTR_ADD %0, %4 111 %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0, addrspace 4) 112 $sgpr0 = COPY %6 113 114 %7:sgpr(s64) = G_CONSTANT i64 1024 115 %8:sgpr(p4) = G_PTR_ADD %0, %7 116 %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0, addrspace 4) 117 $sgpr0 = COPY %9 118 119 %10:sgpr(s64) = G_CONSTANT i64 1048572 120 %11:sgpr(p4) = G_PTR_ADD %0, %10 121 %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0, addrspace 4) 122 $sgpr0 = COPY %12 123 124 %13:sgpr(s64) = G_CONSTANT i64 1048576 125 %14:sgpr(p4) = G_PTR_ADD %0, %13 126 %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0, addrspace 4) 127 $sgpr0 = COPY %15 128 129 %16:sgpr(s64) = G_CONSTANT i64 17179869180 130 %17:sgpr(p4) = G_PTR_ADD %0, %16 131 %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0, addrspace 4) 132 $sgpr0 = COPY %18 133 134 %19:sgpr(s64) = G_CONSTANT i64 17179869184 135 %20:sgpr(p4) = G_PTR_ADD %0, %19 136 %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0, addrspace 4) 137 $sgpr0 = COPY %21 138 139 %22:sgpr(s64) = G_CONSTANT i64 4294967292 140 %23:sgpr(p4) = G_PTR_ADD %0, %22 141 %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0, addrspace 4) 142 $sgpr0 = COPY %24 143 144 %25:sgpr(s64) = G_CONSTANT i64 4294967296 145 %26:sgpr(p4) = G_PTR_ADD %0, %25 146 %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0, addrspace 4) 147 $sgpr0 = COPY %27 148 149 %28:sgpr(p0) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) 150 $sgpr0_sgpr1 = COPY %28 151 152 %29:sgpr(p1) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) 153 $sgpr0_sgpr1 = COPY %29 154 155 %30:sgpr(p4) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) 156 $sgpr0_sgpr1 = COPY %30 157 158... 159--- 160 161name: smrd_wide 162legalized: true 163regBankSelected: true 164 165body: | 166 bb.0: 167 liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 168 %0:sgpr(p4) = COPY $sgpr0_sgpr1 169 %1:sgpr(p1) = COPY $sgpr2_sgpr3 170 171 ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 172 ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 173 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] 174 %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) 175 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 176 177 ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]] 178 %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4) 179 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3 180 181 ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]] 182 %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load 32, addrspace 1) 183 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 184 185 ; CHECK s_load_dwordx16 [[GLOBAL_PTR]] 186 %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load 64, addrspace 1) 187 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5 188... 189 190 191# Test a load of an offset from a constant base address 192# GCN-LABEL: name: constant_address_positive{{$}} 193# GCN: %0:sreg_64 = S_MOV_B64 44 194 195# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4) 196# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4) 197 198--- 199 200name: constant_address_positive 201legalized: true 202regBankSelected: true 203 204body: | 205 bb.0: 206 liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 207 %0:sgpr(p4) = G_CONSTANT i64 44 208 %1:sgpr(s64) = G_CONSTANT i64 64 209 %2:sgpr(p4) = G_PTR_ADD %0, %1 210 %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load 4, align 4, addrspace 4) 211 S_ENDPGM 0, implicit %3 212... 213