• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
3
4%struct.ByValStruct = type { [4 x i32] }
5
6; GCN-LABEL: {{^}}void_func_byval_struct:
7; GCN: s_mov_b32 s5, s32
8; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
9; GCN-NOT: s32
10; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}}
11; GCN-NOT: s32
12
13; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
14; GCN-NOT: s32
15; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}}
16; GCN-NOT: s32
17define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
18entry:
19  %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
20  %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
21  %add = add nsw i32 %tmp, 1
22  store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
23  %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
24  %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
25  %add3 = add nsw i32 %tmp1, 2
26  store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
27  store volatile i32 9, i32 addrspace(1)* null, align 4
28  ret void
29}
30
31; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
32; GCN: s_mov_b32 s5, s32
33; GCN-DAG: buffer_store_dword v32
34; GCN-DAG: buffer_store_dword v33
35; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
36; GCN-DAG: v_writelane_b32
37; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
38; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
39; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
40; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
41
42; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
43; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
44
45; GCN: s_swappc_b64
46
47; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}
48
49; GCN: v_readlane_b32
50; GCN-NOT: v_readlane_b32 s32
51; GCN: buffer_load_dword v32,
52; GCN: buffer_load_dword v33,
53; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
54; GCN: s_setpc_b64
55define void  @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
56entry:
57  %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
58  %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
59  %add = add nsw i32 %tmp, 1
60  store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
61  %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
62  %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
63  %add3 = add nsw i32 %tmp1, 2
64  call void @external_void_func_void()
65  store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
66  store volatile i32 9, i32 addrspace(1)* null, align 4
67  ret void
68}
69
70; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
71; GCN: s_mov_b32 s5, s32
72; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
73; GCN-DAG: v_writelane_b32
74
75; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
76; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
77
78; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
79; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
80
81; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
82; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
83; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
84; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
85
86; GCN-NOT: s_add_u32 s32, s32, 0x800
87
88; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
89; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
90; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
91; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
92
93; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
94; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
95; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
96; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
97
98; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
99; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
100; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
101; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
102
103; GCN: s_swappc_b64
104; GCN-NOT: v_readlane_b32 s32
105; GCN: v_readlane_b32
106; GCN-NOT: v_readlane_b32 s32
107
108; GCN-NOT: s_sub_u32 s32, s32, 0x800
109
110; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
111; GCN-NEXT: s_waitcnt
112; GCN-NEXT: s_setpc_b64
113define void @call_void_func_byval_struct_func() #0 {
114entry:
115  %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
116  %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
117  %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
118  call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
119  %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
120  call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
121  %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
122  store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
123  %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
124  store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
125  call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
126  call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
127  call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
128  ret void
129}
130
131; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel:
132; GCN: s_mov_b32 s33, s7
133; GCN: s_add_u32 s32, s33, 0xc00{{$}}
134
135; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
136; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
137; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
138; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
139
140; GCN-NOT: s_add_u32 s32, s32, 0x800
141
142; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
143; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
144; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
145; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
146
147; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
148; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
149; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
150; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
151
152; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
153; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
154; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
155; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
156
157; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
158; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
159; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
160; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
161
162
163; GCN: s_swappc_b64
164; GCN-NOT: s_sub_u32 s32
165; GCN: s_endpgm
166define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
167entry:
168  %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
169  %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
170  %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
171  call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
172  %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
173  call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
174  %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
175  store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
176  %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
177  store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
178  call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
179  call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
180  call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
181  ret void
182}
183
184; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
185define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
186entry:
187  %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
188  %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
189  %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
190  call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
191  %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
192  call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
193  %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
194  store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
195  %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
196  store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
197  call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
198  call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
199  call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
200  ret void
201}
202
203declare void @external_void_func_void() #0
204
205declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3
206declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3
207
208attributes #0 = { nounwind }
209attributes #1 = { noinline norecurse nounwind }
210attributes #2 = { nounwind norecurse "no-frame-pointer-elim"="true" }
211