• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5
6
7; FUNC-LABEL: {{^}}global_load_i32:
8; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
9; GCN-HSA: flat_load_dword
10
11; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
12define amdgpu_kernel void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
13entry:
14  %ld = load i32, i32 addrspace(1)* %in
15  store i32 %ld, i32 addrspace(1)* %out
16  ret void
17}
18
19; FUNC-LABEL: {{^}}global_load_v2i32:
20; GCN-NOHSA: buffer_load_dwordx2
21; GCN-HSA: flat_load_dwordx2
22
23; EG: VTX_READ_64
24define amdgpu_kernel void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
25entry:
26  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
27  store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
28  ret void
29}
30
31; FUNC-LABEL: {{^}}global_load_v3i32:
32; GCN-NOHSA: buffer_load_dwordx4
33; GCN-HSA: flat_load_dwordx4
34
35; EG: VTX_READ_128
36define amdgpu_kernel void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
37entry:
38  %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
39  store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
40  ret void
41}
42
43; FUNC-LABEL: {{^}}global_load_v4i32:
44; GCN-NOHSA: buffer_load_dwordx4
45; GCN-HSA: flat_load_dwordx4
46
47; EG: VTX_READ_128
48define amdgpu_kernel void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
49entry:
50  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
51  store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
52  ret void
53}
54
55; FUNC-LABEL: {{^}}global_load_v8i32:
56; GCN-NOHSA: buffer_load_dwordx4
57; GCN-NOHSA: buffer_load_dwordx4
58; GCN-HSA: flat_load_dwordx4
59; GCN-HSA: flat_load_dwordx4
60
61; EG: VTX_READ_128
62; EG: VTX_READ_128
63define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
64entry:
65  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
66  store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
67  ret void
68}
69
70; FUNC-LABEL: {{^}}global_load_v16i32:
71; GCN-NOHSA: buffer_load_dwordx4
72; GCN-NOHSA: buffer_load_dwordx4
73; GCN-NOHSA: buffer_load_dwordx4
74; GCN-NOHSA: buffer_load_dwordx4
75
76; GCN-HSA: flat_load_dwordx4
77; GCN-HSA: flat_load_dwordx4
78; GCN-HSA: flat_load_dwordx4
79; GCN-HSA: flat_load_dwordx4
80
81; EG: VTX_READ_128
82; EG: VTX_READ_128
83; EG: VTX_READ_128
84; EG: VTX_READ_128
85define amdgpu_kernel void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
86entry:
87  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
88  store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
89  ret void
90}
91
92; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
93; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
94; GCN-HSA-DAG: flat_load_dword v[[LO:[0-9]+]],
95; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
96
97; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
98; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
99
100; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
101define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
102  %ld = load i32, i32 addrspace(1)* %in
103  %ext = zext i32 %ld to i64
104  store i64 %ext, i64 addrspace(1)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
109; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
110; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
111; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
112; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
113; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
114
115
116; EG: MEM_RAT
117; EG: VTX_READ_32
118; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.
119; EG: 31
120define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
121  %ld = load i32, i32 addrspace(1)* %in
122  %ext = sext i32 %ld to i64
123  store i64 %ext, i64 addrspace(1)* %out
124  ret void
125}
126
127; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
128; GCN-NOHSA: buffer_load_dword
129; GCN-NOHSA: buffer_store_dwordx2
130
131; GCN-HSA: flat_load_dword
132; GCN-HSA: flat_store_dwordx2
133define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
134  %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
135  %ext = zext <1 x i32> %ld to <1 x i64>
136  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
137  ret void
138}
139
140; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
141; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
142; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
143; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
144; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
145; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
146define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
147  %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
148  %ext = sext <1 x i32> %ld to <1 x i64>
149  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
150  ret void
151}
152
153; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
154; GCN-NOHSA: buffer_load_dwordx2
155; GCN-NOHSA: buffer_store_dwordx4
156
157; GCN-HSA: flat_load_dwordx2
158; GCN-HSA: flat_store_dwordx4
159define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
160  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
161  %ext = zext <2 x i32> %ld to <2 x i64>
162  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
163  ret void
164}
165
166; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
167; GCN-NOHSA: buffer_load_dwordx2
168; GCN-HSA: flat_load_dwordx2
169
170; GCN-DAG: v_ashrrev_i32
171; GCN-DAG: v_ashrrev_i32
172
173; GCN-NOHSA-DAG: buffer_store_dwordx4
174; GCN-HSA-DAG: flat_store_dwordx4
175define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
176  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
177  %ext = sext <2 x i32> %ld to <2 x i64>
178  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
179  ret void
180}
181
182; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
183; GCN-NOHSA: buffer_load_dwordx4
184; GCN-NOHSA: buffer_store_dwordx4
185; GCN-NOHSA: buffer_store_dwordx4
186
187; GCN-HSA: flat_load_dwordx4
188; GCN-HSA: flat_store_dwordx4
189; GCN-HSA: flat_store_dwordx4
190define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
191  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
192  %ext = zext <4 x i32> %ld to <4 x i64>
193  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
194  ret void
195}
196
197; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
198; GCN-NOHSA: buffer_load_dwordx4
199; GCN-HSA: flat_load_dwordx4
200
201; GCN-DAG: v_ashrrev_i32
202; GCN-DAG: v_ashrrev_i32
203; GCN-DAG: v_ashrrev_i32
204; GCN-DAG: v_ashrrev_i32
205
206; GCN-NOHSA-DAG: buffer_store_dwordx4
207; GCN-NOHSA-DAG: buffer_store_dwordx4
208
209; GCN-HSA-DAG: flat_store_dwordx4
210; GCN-HSA-DAG: flat_store_dwordx4
211define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
212  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
213  %ext = sext <4 x i32> %ld to <4 x i64>
214  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
215  ret void
216}
217
218; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
219; GCN-NOHSA: buffer_load_dwordx4
220; GCN-NOHSA: buffer_load_dwordx4
221
222; GCN-HSA: flat_load_dwordx4
223; GCN-HSA: flat_load_dwordx4
224
225; GCN-NOHSA-DAG: buffer_store_dwordx4
226; GCN-NOHSA-DAG: buffer_store_dwordx4
227; GCN-NOHSA-DAG: buffer_store_dwordx4
228; GCN-NOHSA-DAG: buffer_store_dwordx4
229
230; GCN-HSA-DAG: flat_store_dwordx4
231; GCN-HSA-DAG: flat_store_dwordx4
232; GCN-SA-DAG: flat_store_dwordx4
233; GCN-HSA-DAG: flat_store_dwordx4
234define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
235  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
236  %ext = zext <8 x i32> %ld to <8 x i64>
237  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
238  ret void
239}
240
241; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
242; GCN-NOHSA: buffer_load_dwordx4
243; GCN-NOHSA: buffer_load_dwordx4
244
245; GCN-HSA: flat_load_dwordx4
246; GCN-HSA: flat_load_dwordx4
247
248; GCN-DAG: v_ashrrev_i32
249; GCN-DAG: v_ashrrev_i32
250; GCN-DAG: v_ashrrev_i32
251; GCN-DAG: v_ashrrev_i32
252; GCN-DAG: v_ashrrev_i32
253; GCN-DAG: v_ashrrev_i32
254; GCN-DAG: v_ashrrev_i32
255; GCN-DAG: v_ashrrev_i32
256
257; GCN-NOHSA-DAG: buffer_store_dwordx4
258; GCN-NOHSA-DAG: buffer_store_dwordx4
259; GCN-NOHSA-DAG: buffer_store_dwordx4
260; GCN-NOHSA-DAG: buffer_store_dwordx4
261
262; GCN-HSA-DAG: flat_store_dwordx4
263; GCN-HSA-DAG: flat_store_dwordx4
264; GCN-HSA-DAG: flat_store_dwordx4
265; GCN-HSA-DAG: flat_store_dwordx4
266define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
267  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
268  %ext = sext <8 x i32> %ld to <8 x i64>
269  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
270  ret void
271}
272
273; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
274; GCN-NOHSA: buffer_load_dwordx4
275; GCN-NOHSA: buffer_load_dwordx4
276; GCN-NOHSA: buffer_load_dwordx4
277; GCN-NOHSA: buffer_load_dwordx4
278
279; GCN-HSA: flat_load_dwordx4
280; GCN-HSA: flat_load_dwordx4
281; GCN-HSA: flat_load_dwordx4
282; GCN-HSA: flat_load_dwordx4
283
284
285; GCN-DAG: v_ashrrev_i32
286; GCN-DAG: v_ashrrev_i32
287; GCN-DAG: v_ashrrev_i32
288; GCN-DAG: v_ashrrev_i32
289; GCN-NOHSA-DAG: buffer_store_dwordx4
290; GCN-HSA-DAG: flat_store_dwordx4
291
292; GCN-DAG: v_ashrrev_i32
293; GCN-DAG: v_ashrrev_i32
294; GCN-DAG: v_ashrrev_i32
295; GCN-DAG: v_ashrrev_i32
296; GCN-NOHSA-DAG: buffer_store_dwordx4
297; GCN-HSA-DAG: flat_store_dwordx4
298
299; GCN-DAG: v_ashrrev_i32
300; GCN-DAG: v_ashrrev_i32
301; GCN-DAG: v_ashrrev_i32
302; GCN-DAG: v_ashrrev_i32
303; GCN-NOHSA-DAG: buffer_store_dwordx4
304; GCN-HSA-DAG: flat_store_dwordx4
305
306; GCN-DAG: v_ashrrev_i32
307; GCN-DAG: v_ashrrev_i32
308; GCN-DAG: v_ashrrev_i32
309; GCN-DAG: v_ashrrev_i32
310; GCN-NOHSA-DAG: buffer_store_dwordx4
311; GCN-HSA-DAG: flat_store_dwordx4
312define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
313  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
314  %ext = sext <16 x i32> %ld to <16 x i64>
315  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
316  ret void
317}
318
319; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
320; GCN-NOHSA: buffer_load_dwordx4
321; GCN-NOHSA: buffer_load_dwordx4
322; GCN-NOHSA: buffer_load_dwordx4
323; GCN-NOHSA: buffer_load_dwordx4
324
325; GCN-HSA: flat_load_dwordx4
326; GCN-HSA: flat_load_dwordx4
327; GCN-HSA: flat_load_dwordx4
328; GCN-HSA: flat_load_dwordx4
329
330; GCN-NOHSA: buffer_store_dwordx4
331; GCN-NOHSA: buffer_store_dwordx4
332; GCN-NOHSA: buffer_store_dwordx4
333; GCN-NOHSA: buffer_store_dwordx4
334; GCN-NOHSA: buffer_store_dwordx4
335; GCN-NOHSA: buffer_store_dwordx4
336; GCN-NOHSA: buffer_store_dwordx4
337; GCN-NOHSA: buffer_store_dwordx4
338
339; GCN-HSA: flat_store_dwordx4
340; GCN-HSA: flat_store_dwordx4
341; GCN-HSA: flat_store_dwordx4
342; GCN-HSA: flat_store_dwordx4
343; GCN-HSA: flat_store_dwordx4
344; GCN-HSA: flat_store_dwordx4
345; GCN-HSA: flat_store_dwordx4
346; GCN-HSA: flat_store_dwordx4
347define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
348  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
349  %ext = zext <16 x i32> %ld to <16 x i64>
350  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
351  ret void
352}
353
354; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
355
356; GCN-NOHSA: buffer_load_dwordx4
357; GCN-NOHSA: buffer_load_dwordx4
358; GCN-NOHSA: buffer_load_dwordx4
359; GCN-NOHSA: buffer_load_dwordx4
360; GCN-NOHSA: buffer_load_dwordx4
361; GCN-NOHSA: buffer_load_dwordx4
362; GCN-NOHSA: buffer_load_dwordx4
363; GCN-NOHSA-DAG: buffer_load_dwordx4
364
365; GCN-HSA: flat_load_dwordx4
366; GCN-HSA: flat_load_dwordx4
367; GCN-HSA: flat_load_dwordx4
368; GCN-HSA: flat_load_dwordx4
369; GCN-HSA: flat_load_dwordx4
370; GCN-HSA: flat_load_dwordx4
371; GCN-HSA: flat_load_dwordx4
372; GCN-HSA: flat_load_dwordx4
373
374; GCN-DAG: v_ashrrev_i32
375; GCN-DAG: v_ashrrev_i32
376; GCN-DAG: v_ashrrev_i32
377; GCN-DAG: v_ashrrev_i32
378; GCN-DAG: v_ashrrev_i32
379; GCN-DAG: v_ashrrev_i32
380; GCN-DAG: v_ashrrev_i32
381; GCN-DAG: v_ashrrev_i32
382; GCN-DAG: v_ashrrev_i32
383; GCN-DAG: v_ashrrev_i32
384; GCN-DAG: v_ashrrev_i32
385; GCN-DAG: v_ashrrev_i32
386; GCN-DAG: v_ashrrev_i32
387; GCN-DAG: v_ashrrev_i32
388; GCN-DAG: v_ashrrev_i32
389; GCN-DAG: v_ashrrev_i32
390; GCN-DAG: v_ashrrev_i32
391; GCN-DAG: v_ashrrev_i32
392; GCN-DAG: v_ashrrev_i32
393; GCN-DAG: v_ashrrev_i32
394; GCN-DAG: v_ashrrev_i32
395; GCN-DAG: v_ashrrev_i32
396; GCN-DAG: v_ashrrev_i32
397; GCN-DAG: v_ashrrev_i32
398; GCN-DAG: v_ashrrev_i32
399; GCN-DAG: v_ashrrev_i32
400; GCN-DAG: v_ashrrev_i32
401; GCN-DAG: v_ashrrev_i32
402; GCN-DAG: v_ashrrev_i32
403; GCN-DAG: v_ashrrev_i32
404; GCN-DAG: v_ashrrev_i32
405; GCN-DAG: v_ashrrev_i32
406
407; GCN-NOHSA: buffer_store_dwordx4
408; GCN-NOHSA: buffer_store_dwordx4
409; GCN-NOHSA: buffer_store_dwordx4
410; GCN-NOHSA: buffer_store_dwordx4
411
412; GCN-NOHSA: buffer_store_dwordx4
413; GCN-NOHSA: buffer_store_dwordx4
414; GCN-NOHSA: buffer_store_dwordx4
415; GCN-NOHSA: buffer_store_dwordx4
416
417; GCN-NOHSA: buffer_store_dwordx4
418; GCN-NOHSA: buffer_store_dwordx4
419; GCN-NOHSA: buffer_store_dwordx4
420; GCN-NOHSA: buffer_store_dwordx4
421
422; GCN-NOHSA: buffer_store_dwordx4
423; GCN-NOHSA: buffer_store_dwordx4
424; GCN-NOHSA: buffer_store_dwordx4
425; GCN-NOHSA: buffer_store_dwordx4
426
427; GCN-HSA-DAG: flat_store_dwordx4
428; GCN-HSA-DAG: flat_store_dwordx4
429; GCN-HSA-DAG: flat_store_dwordx4
430; GCN-HSA-DAG: flat_store_dwordx4
431
432; GCN-HSA-DAG: flat_store_dwordx4
433; GCN-HSA-DAG: flat_store_dwordx4
434; GCN-HSA-DAG: flat_store_dwordx4
435; GCN-HSA-DAG: flat_store_dwordx4
436
437; GCN-HSA-DAG: flat_store_dwordx4
438; GCN-HSA-DAG: flat_store_dwordx4
439; GCN-HSA-DAG: flat_store_dwordx4
440; GCN-HSA-DAG: flat_store_dwordx4
441
442; GCN-HSA-DAG: flat_store_dwordx4
443; GCN-HSA-DAG: flat_store_dwordx4
444; GCN-HSA-DAG: flat_store_dwordx4
445; GCN-HSA-DAG: flat_store_dwordx4
446
447define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
448  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
449  %ext = sext <32 x i32> %ld to <32 x i64>
450  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
451  ret void
452}
453
454; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
455; GCN-NOHSA: buffer_load_dwordx4
456; GCN-NOHSA: buffer_load_dwordx4
457; GCN-NOHSA: buffer_load_dwordx4
458; GCN-NOHSA: buffer_load_dwordx4
459; GCN-NOHSA: buffer_load_dwordx4
460; GCN-NOHSA: buffer_load_dwordx4
461; GCN-NOHSA: buffer_load_dwordx4
462; GCN-NOHSA: buffer_load_dwordx4
463
464; GCN-HSA: flat_load_dwordx4
465; GCN-HSA: flat_load_dwordx4
466; GCN-HSA: flat_load_dwordx4
467; GCN-HSA: flat_load_dwordx4
468; GCN-HSA: flat_load_dwordx4
469; GCN-HSA: flat_load_dwordx4
470; GCN-HSA: flat_load_dwordx4
471; GCN-HSA: flat_load_dwordx4
472
473
474; GCN-NOHSA-DAG: buffer_store_dwordx4
475; GCN-NOHSA-DAG: buffer_store_dwordx4
476; GCN-NOHSA-DAG: buffer_store_dwordx4
477; GCN-NOHSA-DAG: buffer_store_dwordx4
478
479; GCN-NOHSA-DAG: buffer_store_dwordx4
480; GCN-NOHSA-DAG: buffer_store_dwordx4
481; GCN-NOHSA-DAG: buffer_store_dwordx4
482; GCN-NOHSA-DAG: buffer_store_dwordx4
483
484; GCN-NOHSA-DAG: buffer_store_dwordx4
485; GCN-NOHSA-DAG: buffer_store_dwordx4
486; GCN-NOHSA-DAG: buffer_store_dwordx4
487; GCN-NOHSA-DAG: buffer_store_dwordx4
488
489; GCN-NOHSA-DAG: buffer_store_dwordx4
490; GCN-NOHSA-DAG: buffer_store_dwordx4
491; GCN-NOHSA-DAG: buffer_store_dwordx4
492; GCN-NOHSA-DAG: buffer_store_dwordx4
493
494
495; GCN-HSA-DAG: flat_store_dwordx4
496; GCN-HSA-DAG: flat_store_dwordx4
497; GCN-HSA-DAG: flat_store_dwordx4
498; GCN-HSA-DAG: flat_store_dwordx4
499
500; GCN-HSA-DAG: flat_store_dwordx4
501; GCN-HSA-DAG: flat_store_dwordx4
502; GCN-HSA-DAG: flat_store_dwordx4
503; GCN-HSA-DAG: flat_store_dwordx4
504
505; GCN-HSA-DAG: flat_store_dwordx4
506; GCN-HSA-DAG: flat_store_dwordx4
507; GCN-HSA-DAG: flat_store_dwordx4
508; GCN-HSA-DAG: flat_store_dwordx4
509
510; GCN-HSA-DAG: flat_store_dwordx4
511; GCN-HSA-DAG: flat_store_dwordx4
512; GCN-HSA-DAG: flat_store_dwordx4
513; GCN-HSA-DAG: flat_store_dwordx4
514define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
515  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
516  %ext = zext <32 x i32> %ld to <32 x i64>
517  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
518  ret void
519}
520
521attributes #0 = { nounwind }
522