1; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s 2 3@lds0 = addrspace(3) global [512 x float] undef 4@lds1 = addrspace(3) global [256 x float] undef 5@lds2 = addrspace(3) global [4096 x float] undef 6@lds3 = addrspace(3) global [67 x i8] undef 7 8@dynamic_shared0 = external addrspace(3) global [0 x float] 9@dynamic_shared1 = external addrspace(3) global [0 x double] 10@dynamic_shared2 = external addrspace(3) global [0 x double], align 4 11@dynamic_shared3 = external addrspace(3) global [0 x double], align 16 12 13; CHECK-LABEL: {{^}}dynamic_shared_array_0: 14; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}} 15define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) { 16 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 17 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x 18 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 19 %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 20 store float %val0, float addrspace(3)* %arrayidx1, align 4 21 ret void 22} 23 24; CHECK-LABEL: {{^}}dynamic_shared_array_1: 25; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} 26; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} 27; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 28; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] 29define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { 30entry: 31 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 32 %idx.0 = add nsw i32 %tid.x, 64 33 %tmp = icmp eq i32 %cond, 0 34 br i1 %tmp, label %if, label %else 35 36if: ; preds = %entry 37 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 38 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 39 br label %endif 40 41else: ; preds = %entry 42 %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0 43 %val1 = load float, float addrspace(3)* %arrayidx1, align 4 44 br label %endif 45 46endif: ; preds = %else, %if 47 %val = phi float [ %val0, %if ], [ %val1, %else ] 48 %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 49 store float %val, float addrspace(3)* %arrayidx, align 4 50 ret void 51} 52 53; CHECK-LABEL: {{^}}dynamic_shared_array_2: 54; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 55; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x4000, [[IDX]] 56define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { 57 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 58 %vidx = add i32 %tid.x, %idx 59 %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx 60 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 61 %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 62 store float %val0, float addrspace(3)* %arrayidx1, align 4 63 ret void 64} 65 66; The offset to the dynamic shared memory array should be aligned on the type 67; specified. 68; CHECK-LABEL: {{^}}dynamic_shared_array_3: 69; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 70; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]] 71define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { 72 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 73 %vidx = add i32 %tid.x, %idx 74 %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 75 %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 76 %val1 = uitofp i8 %val0 to float 77 %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 78 store float %val1, float addrspace(3)* %arrayidx1, align 4 79 ret void 80} 81 82; The offset to the dynamic shared memory array should be aligned on the 83; maximal one. 84; CHECK-LABEL: {{^}}dynamic_shared_array_4: 85; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48 86; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 87; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]] 88define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { 89 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 90 %vidx = add i32 %tid.x, %idx 91 %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 92 %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 93 %val1 = uitofp i8 %val0 to float 94 %val2 = uitofp i8 %val0 to double 95 %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 96 store float %val1, float addrspace(3)* %arrayidx1, align 4 97 %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x 98 store double %val2, double addrspace(3)* %arrayidx2, align 4 99 ret void 100} 101 102; Honor the explicit alignment from the specified variable. 103; CHECK-LABEL: {{^}}dynamic_shared_array_5: 104; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 105; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 106; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]] 107define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { 108 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 109 %vidx = add i32 %tid.x, %idx 110 %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 111 %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 112 %val1 = uitofp i8 %val0 to float 113 %val2 = uitofp i8 %val0 to double 114 %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 115 store float %val1, float addrspace(3)* %arrayidx1, align 4 116 %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x 117 store double %val2, double addrspace(3)* %arrayidx2, align 4 118 ret void 119} 120 121; Honor the explicit alignment from the specified variable. 122; CHECK-LABEL: {{^}}dynamic_shared_array_6: 123; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50 124; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 125; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]] 126define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { 127 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 128 %vidx = add i32 %tid.x, %idx 129 %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 130 %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 131 %val1 = uitofp i8 %val0 to float 132 %val2 = uitofp i8 %val0 to double 133 %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 134 store float %val1, float addrspace(3)* %arrayidx1, align 4 135 %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x 136 store double %val2, double addrspace(3)* %arrayidx2, align 4 137 ret void 138} 139 140declare i32 @llvm.amdgcn.workitem.id.x() 141