1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 3 4%struct.ByValStruct = type { [4 x i32] } 5 6; GCN-LABEL: {{^}}void_func_byval_struct: 7; GCN: s_mov_b32 s5, s32 8; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} 9; GCN-NOT: s32 10; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}} 11; GCN-NOT: s32 12 13; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} 14; GCN-NOT: s32 15; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}} 16; GCN-NOT: s32 17define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { 18entry: 19 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 20 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 21 %add = add nsw i32 %tmp, 1 22 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 23 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 24 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 25 %add3 = add nsw i32 %tmp1, 2 26 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 27 store volatile i32 9, i32 addrspace(1)* null, align 4 28 ret void 29} 30 31; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: 32; GCN: s_mov_b32 s5, s32 33; GCN-DAG: buffer_store_dword v32 34; GCN-DAG: buffer_store_dword v33 35; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32 36; GCN-DAG: v_writelane_b32 37; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} 38; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} 39; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] 40; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}} 41 42; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} 43; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] 44 45; GCN: s_swappc_b64 46 47; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}} 48 49; GCN: v_readlane_b32 50; GCN-NOT: v_readlane_b32 s32 51; GCN: buffer_load_dword v32, 52; GCN: buffer_load_dword v33, 53; GCN: s_sub_u32 s32, s32, 0xc00{{$}} 54; GCN: s_setpc_b64 55define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { 56entry: 57 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 58 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 59 %add = add nsw i32 %tmp, 1 60 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 61 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 62 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 63 %add3 = add nsw i32 %tmp1, 2 64 call void @external_void_func_void() 65 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 66 store volatile i32 9, i32 addrspace(1)* null, align 4 67 ret void 68} 69 70; GCN-LABEL: {{^}}call_void_func_byval_struct_func: 71; GCN: s_mov_b32 s5, s32 72; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} 73; GCN-DAG: v_writelane_b32 74 75; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 76; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 77 78; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 79; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 80 81; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 82; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 83; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 84; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 85 86; GCN-NOT: s_add_u32 s32, s32, 0x800 87 88; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} 89; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 90; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 91; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 92 93; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 94; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 95; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 96; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 97 98; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 99; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 100; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 101; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 102 103; GCN: s_swappc_b64 104; GCN-NOT: v_readlane_b32 s32 105; GCN: v_readlane_b32 106; GCN-NOT: v_readlane_b32 s32 107 108; GCN-NOT: s_sub_u32 s32, s32, 0x800 109 110; GCN: s_sub_u32 s32, s32, 0xc00{{$}} 111; GCN-NEXT: s_waitcnt 112; GCN-NEXT: s_setpc_b64 113define void @call_void_func_byval_struct_func() #0 { 114entry: 115 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) 116 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) 117 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* 118 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) 119 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* 120 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) 121 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 122 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 123 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 124 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 125 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) 126 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) 127 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) 128 ret void 129} 130 131; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: 132; GCN: s_mov_b32 s33, s7 133; GCN: s_add_u32 s32, s33, 0xc00{{$}} 134 135; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 136; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 137; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 138; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 139 140; GCN-NOT: s_add_u32 s32, s32, 0x800 141 142; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 143; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 144; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 145; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 146 147; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} 148; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 149; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 150; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 151 152; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 153; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 154; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 155; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 156 157; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 158; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 159; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 160; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 161 162 163; GCN: s_swappc_b64 164; GCN-NOT: s_sub_u32 s32 165; GCN: s_endpgm 166define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 { 167entry: 168 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) 169 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) 170 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* 171 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) 172 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* 173 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) 174 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 175 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 176 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 177 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 178 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) 179 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) 180 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) 181 ret void 182} 183 184; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim: 185define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 { 186entry: 187 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) 188 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) 189 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* 190 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) 191 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* 192 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) 193 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 194 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 195 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 196 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 197 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) 198 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) 199 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) 200 ret void 201} 202 203declare void @external_void_func_void() #0 204 205declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3 206declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3 207 208attributes #0 = { nounwind } 209attributes #1 = { noinline norecurse nounwind } 210attributes #2 = { nounwind norecurse "no-frame-pointer-elim"="true" } 211