• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
3
4; GCN-LABEL: {{^}}load_local_lo_v2i16_undeflo:
5; GCN: s_waitcnt
6; GFX9-NEXT: ds_read_u16_d16 v0, v0
7; GFX9-NEXT: s_waitcnt
8; GFX9-NEXT: s_setpc_b64
9
10; VI: ds_read_u16
11define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 {
12entry:
13  %load = load i16, i16 addrspace(3)* %in
14  %build = insertelement <2 x i16> undef, i16 %load, i32 0
15  ret <2 x i16> %build
16}
17
18; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo:
19; GCN: s_waitcnt
20; GFX9-NEXT: ds_read_u16_d16 v0, v0
21; GFX9-NEXT: s_waitcnt
22; GFX9-NEXT: s_setpc_b64
23
24; VI: ds_read_u16
25define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 {
26entry:
27  %load = load i16, i16 addrspace(3)* %in
28  %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
29  %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
30  ret <2 x i16> %build1
31}
32
33; Show that we get reasonable regalloc without physreg constraints.
34; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg:
35; GCN: s_waitcnt
36; GFX9-NEXT: ds_read_u16_d16 v0, v0
37; GFX9-NEXT: s_waitcnt
38; GFX9-NEXT: global_store_dword v[0:1], v0, off{{$}}
39; GFX9-NEXT: s_waitcnt
40; GFX9-NEXT: s_setpc_b64
41
42; VI: ds_read_u16
43define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 {
44entry:
45  %load = load i16, i16 addrspace(3)* %in
46  %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
47  %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
48  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
49  ret void
50}
51
52; GCN-LABEL: {{^}}load_local_lo_v2i16_zerolo:
53; GCN: s_waitcnt
54; GFX9-NEXT: v_mov_b32_e32 v1, 0
55; GFX9-NEXT: ds_read_u16_d16 v1, v0
56; GFX9-NEXT: s_waitcnt
57; GFX9-NEXT: v_mov_b32_e32 v0, v1
58; GFX9-NEXT: s_setpc_b64
59
60; VI: ds_read_u16 v
61define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 {
62entry:
63  %load = load i16, i16 addrspace(3)* %in
64  %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0
65  ret <2 x i16> %build
66}
67
68; GCN-LABEL: {{^}}load_local_lo_v2f16_fpimm:
69; GCN: s_waitcnt
70; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
71; GFX9-NEXT: ds_read_u16_d16 v1, v0
72; GFX9-NEXT: s_waitcnt
73; GFX9-NEXT: v_mov_b32_e32 v0, v1
74; GFX9-NEXT: s_setpc_b64
75
76; VI: ds_read_u16 v
77define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 {
78entry:
79  %load = load half, half addrspace(3)* %in
80  %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0
81  ret <2 x half> %build
82}
83
84; GCN-LABEL: {{^}}load_local_lo_v2f16_reghi_vreg:
85; GCN: s_waitcnt
86; GFX9-NEXT: ds_read_u16_d16 v1, v0
87; GFX9-NEXT: s_waitcnt
88; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
89; GFX9-NEXT: s_waitcnt
90; GFX9-NEXT: s_setpc_b64
91
92; VI: ds_read_u16 v
93define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 {
94entry:
95  %reg.bc = bitcast i32 %reg to <2 x half>
96  %load = load half, half addrspace(3)* %in
97  %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
98  store <2 x half> %build1, <2 x half> addrspace(1)* undef
99  ret void
100}
101
102; GCN-LABEL: {{^}}load_local_lo_v2f16_reglo_vreg:
103
104; GFX9: ds_read_u16 v
105; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
106; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}}
107; GFX9: global_store_dword
108
109; VI: ds_read_u16 v
110define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 {
111entry:
112  %load = load half, half addrspace(3)* %in
113  %build0 = insertelement <2 x half> undef, half %reg, i32 1
114  %build1 = insertelement <2 x half> %build0, half %load, i32 0
115  store <2 x half> %build1, <2 x half> addrspace(1)* undef
116  ret void
117}
118
119; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_zexti8:
120; GCN: s_waitcnt
121; GFX9-NEXT: ds_read_u8_d16 v1, v0
122; GFX9-NEXT: s_waitcnt
123; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
124; GFX9-NEXT: s_waitcnt
125; GFX9-NEXT: s_setpc_b64
126
127; VI: ds_read_u8 v
128define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
129entry:
130  %reg.bc = bitcast i32 %reg to <2 x i16>
131  %load = load i8, i8 addrspace(3)* %in
132  %ext = zext i8 %load to i16
133  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
134  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
135  ret void
136}
137
138; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_zexti8:
139; GCN: s_waitcnt
140; GFX9: ds_read_u8 v
141; GFX9: global_store_dword
142; GFX9-NEXT: s_waitcnt
143; GFX9-NEXT: s_setpc_b64
144
145; VI: ds_read_u8 v
146define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
147entry:
148  %load = load i8, i8 addrspace(3)* %in
149  %ext = zext i8 %load to i16
150  %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
151  %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
152  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
153  ret void
154}
155
156; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_sexti8:
157; GCN: s_waitcnt
158; GFX9-NEXT: ds_read_i8_d16 v1, v0
159; GFX9-NEXT: s_waitcnt
160; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
161; GFX9-NEXT: s_waitcnt
162; GFX9-NEXT: s_setpc_b64
163
164; VI: ds_read_i8 v
165define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
166entry:
167  %reg.bc = bitcast i32 %reg to <2 x i16>
168  %load = load i8, i8 addrspace(3)* %in
169  %ext = sext i8 %load to i16
170  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
171  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
172  ret void
173}
174
175; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_sexti8:
176; GCN: s_waitcnt
177; GFX9: ds_read_i8 v
178; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
179; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}}
180
181; VI: ds_read_i8 v
182define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
183entry:
184  %load = load i8, i8 addrspace(3)* %in
185  %ext = sext i8 %load to i16
186  %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
187  %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
188  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
189  ret void
190}
191
192; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg:
193; GCN: s_waitcnt
194; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
195; GFX9-NEXT: s_waitcnt
196; GFX9-NEXT: global_store_dword
197; GFX9-NEXT: s_waitcnt
198; GFX9-NEXT: s_setpc_b64
199define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 {
200entry:
201  %reg.bc = bitcast i32 %reg to <2 x i16>
202  %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
203  %load = load i16, i16 addrspace(1)* %gep
204  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
205  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
206  ret void
207}
208
209; GCN-LABEL: {{^}}load_global_lo_v2f16_reglo_vreg:
210; GCN: s_waitcnt
211; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
212; GFX9-NEXT: s_waitcnt
213; GFX9-NEXT: global_store_dword
214; GFX9-NEXT: s_waitcnt
215; GFX9-NEXT: s_setpc_b64
216define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 {
217entry:
218  %reg.bc = bitcast i32 %reg to <2 x half>
219  %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047
220  %load = load half, half addrspace(1)* %gep
221  %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
222  store <2 x half> %build1, <2 x half> addrspace(1)* undef
223  ret void
224}
225
226; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_zexti8:
227; GCN: s_waitcnt
228; GFX9-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
229; GFX9-NEXT: s_waitcnt
230; GFX9-NEXT: global_store_dword
231; GFX9-NEXT: s_waitcnt
232; GFX9-NEXT: s_setpc_b64
233define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
234entry:
235  %reg.bc = bitcast i32 %reg to <2 x i16>
236  %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
237  %load = load i8, i8 addrspace(1)* %gep
238  %ext = zext i8 %load to i16
239  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
240  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
241  ret void
242}
243
244; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_sexti8:
245; GCN: s_waitcnt
246; GFX9-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
247; GFX9-NEXT: s_waitcnt
248; GFX9-NEXT: global_store_dword
249; GFX9-NEXT: s_waitcnt
250; GFX9-NEXT: s_setpc_b64
251define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
252entry:
253  %reg.bc = bitcast i32 %reg to <2 x i16>
254  %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
255  %load = load i8, i8 addrspace(1)* %gep
256  %ext = sext i8 %load to i16
257  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
258  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
259  ret void
260}
261
262; GCN-LABEL: {{^}}load_flat_lo_v2i16_reghi_vreg:
263; GCN: s_waitcnt
264; GFX9-NEXT: flat_load_short_d16 v2, v[0:1]
265; GFX9-NEXT: s_waitcnt
266; GFX9-NEXT: global_store_dword v[0:1], v2
267; GFX9-NEXT: s_waitcnt
268; GFX9-NEXT: s_setpc_b64
269
270; VI: flat_load_ushort v{{[0-9]+}}
271; VI: v_or_b32_e32
272define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 {
273entry:
274  %reg.bc = bitcast i32 %reg to <2 x i16>
275  %load = load i16, i16* %in
276  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
277  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
278  ret void
279}
280
281; GCN-LABEL: {{^}}load_flat_lo_v2f16_reghi_vreg:
282; GCN: s_waitcnt
283; GFX9-NEXT: flat_load_short_d16 v2, v[0:1]
284; GFX9-NEXT: s_waitcnt
285; GFX9-NEXT: global_store_dword v[0:1], v2
286; GFX9-NEXT: s_waitcnt
287; GFX9-NEXT: s_setpc_b64
288
289; VI: flat_load_ushort v{{[0-9]+}}
290; VI: v_or_b32_e32
291define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 {
292entry:
293  %reg.bc = bitcast i32 %reg to <2 x half>
294  %load = load half, half* %in
295  %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
296  store <2 x half> %build1, <2 x half> addrspace(1)* undef
297  ret void
298}
299
300; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_zexti8:
301; GCN: s_waitcnt
302; GFX9-NEXT: flat_load_ubyte_d16 v2, v[0:1]
303; GFX9-NEXT: s_waitcnt
304; GFX9-NEXT: global_store_dword v[0:1], v2
305; GFX9-NEXT: s_waitcnt
306; GFX9-NEXT: s_setpc_b64
307
308; VI: flat_load_ubyte v{{[0-9]+}}
309; VI: v_or_b32_e32
310define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
311entry:
312  %reg.bc = bitcast i32 %reg to <2 x i16>
313  %load = load i8, i8* %in
314  %ext = zext i8 %load to i16
315  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
316  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
317  ret void
318}
319
320; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_sexti8:
321; GCN: s_waitcnt
322; GFX9-NEXT: flat_load_sbyte_d16 v2, v[0:1]
323; GFX9-NEXT: s_waitcnt
324; GFX9-NEXT: global_store_dword v[0:1], v2
325; GFX9-NEXT: s_waitcnt
326; GFX9-NEXT: s_setpc_b64
327
328; VI: flat_load_sbyte v{{[0-9]+}}
329; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
330
331define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
332entry:
333  %reg.bc = bitcast i32 %reg to <2 x i16>
334  %load = load i8, i8* %in
335  %ext = sext i8 %load to i16
336  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
337  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
338  ret void
339}
340
341; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg:
342; GCN: s_waitcnt
343; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
344; GFX9-NEXT: s_waitcnt
345; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
346; GFX9-NEXT: s_waitcnt
347; GFX9-NEXT: s_setpc_b64
348
349; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
350define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 {
351entry:
352  %reg.bc = bitcast i32 %reg to <2 x i16>
353  %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
354  %load = load i16, i16 addrspace(5)* %gep
355  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
356  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
357  ret void
358}
359
360; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg:
361; GCN: s_waitcnt
362; GFX9: buffer_load_ushort v1, off, s[0:3], s5 offset:4094{{$}}
363; GFX9-NEXT: s_waitcnt
364; GFX9: v_and_b32
365; GFX9: v_lshl_or_b32
366
367; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
368; GFX9-NEXT: s_waitcnt
369; GFX9-NEXT: s_setpc_b64
370
371; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
372define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
373entry:
374  %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
375  %load = load i16, i16 addrspace(5)* %gep
376  %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
377  %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
378  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
379  ret void
380}
381
382; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg:
383; GCN: s_waitcnt
384; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
385; GFX9-NEXT: s_waitcnt
386; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
387; GFX9-NEXT: s_waitcnt
388; GFX9-NEXT: s_setpc_b64
389
390; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
391define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 {
392entry:
393  %reg.bc = bitcast i32 %reg to <2 x half>
394  %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045
395  %load = load half, half addrspace(5)* %gep
396  %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
397  store <2 x half> %build1, <2 x half> addrspace(1)* undef
398  ret void
399}
400
401; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff:
402; GCN: s_waitcnt
403; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
404; GFX9-NEXT: s_waitcnt
405; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
406; GFX9-NEXT: s_waitcnt
407; GFX9-NEXT: s_setpc_b64
408
409; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
410define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
411entry:
412  %reg.bc = bitcast i32 %reg to <2 x i16>
413  %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
414  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
415  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
416  ret void
417}
418
419; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg_nooff:
420; GCN: s_waitcnt
421; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
422; GFX9-NEXT: s_waitcnt
423; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
424; GFX9-NEXT: s_waitcnt
425; GFX9-NEXT: s_setpc_b64
426
427; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
428define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
429entry:
430  %reg.bc = bitcast i32 %reg to <2 x i16>
431  %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
432  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
433  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
434  ret void
435}
436
437; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff:
438; GCN: s_waitcnt
439; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
440; GFX9-NEXT: s_waitcnt
441; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
442; GFX9-NEXT: s_waitcnt
443; GFX9-NEXT: s_setpc_b64
444
445; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
446define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 {
447entry:
448  %reg.bc = bitcast i32 %reg to <2 x half>
449  %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
450  %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
451  store <2 x half> %build1, <2 x half> addrspace(1)* undef
452  ret void
453}
454
455; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8:
456; GCN: s_waitcnt
457; GFX9: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
458; GFX9-NEXT: s_waitcnt
459; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
460; GFX9-NEXT: s_waitcnt
461; GFX9-NEXT: s_setpc_b64
462
463; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
464define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
465entry:
466  %reg.bc = bitcast i32 %reg to <2 x i16>
467  %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
468  %load = load i8, i8 addrspace(5)* %gep
469  %ext = zext i8 %load to i16
470  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
471  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
472  ret void
473}
474
475; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8:
476; GCN: s_waitcnt
477; GFX9: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
478; GFX9-NEXT: s_waitcnt
479; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
480; GFX9-NEXT: s_waitcnt
481; GFX9-NEXT: s_setpc_b64
482
483; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
484define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
485entry:
486  %reg.bc = bitcast i32 %reg to <2 x i16>
487  %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
488  %load = load i8, i8 addrspace(5)* %gep
489  %ext = sext i8 %load to i16
490  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
491  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
492  ret void
493}
494
495; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
496; GCN: s_waitcnt
497; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
498; GFX9-NEXT: s_waitcnt
499; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
500; GFX9-NEXT: s_waitcnt
501; GFX9-NEXT: s_setpc_b64
502
503; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
504define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
505entry:
506  %reg.bc = bitcast i32 %reg to <2 x i16>
507  %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
508  %ext = zext i8 %load to i16
509  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
510  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
511  ret void
512}
513
514; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
515; GCN: s_waitcnt
516; GFX9-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
517; GFX9-NEXT: s_waitcnt
518; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
519; GFX9-NEXT: s_waitcnt
520; GFX9-NEXT: s_setpc_b64
521
522; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
523define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
524entry:
525  %reg.bc = bitcast i32 %reg to <2 x i16>
526  %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
527  %ext = sext i8 %load to i16
528  %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
529  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
530  ret void
531}
532
533; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
534; GCN: s_waitcnt
535; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
536; GFX9-NEXT: s_waitcnt
537; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
538; GFX9-NEXT: s_waitcnt
539; GFX9-NEXT: s_setpc_b64
540
541; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
542define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
543entry:
544  %reg.bc = bitcast i32 %reg to <2 x half>
545  %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
546  %ext = zext i8 %load to i16
547  %bc.ext = bitcast i16 %ext to half
548  %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
549  store <2 x half> %build1, <2 x half> addrspace(1)* undef
550  ret void
551}
552
553; GCN-LABEL: {{^}}load_constant_lo_v2i16_reglo_vreg:
554; GCN: s_waitcnt
555; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
556; GFX9-NEXT: s_waitcnt
557; GFX9-NEXT: global_store_dword
558; GFX9-NEXT: s_waitcnt
559; GFX9-NEXT: s_setpc_b64
560
561; VI: flat_load_ushort
562define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
563entry:
564  %reg.bc = bitcast i32 %reg to <2 x i16>
565  %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
566  %load = load i16, i16 addrspace(4)* %gep
567  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
568  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
569  ret void
570}
571
572; GCN-LABEL: load_constant_lo_v2f16_reglo_vreg
573; GCN: s_waitcnt
574; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
575; GFX9-NEXT: s_waitcnt
576; GFX9-NEXT: global_store_dword
577; GFX9-NEXT: s_waitcnt
578; GFX9-NEXT: s_setpc_b64
579
580; VI: flat_load_ushort
581define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 {
582entry:
583  %reg.bc = bitcast i32 %reg to <2 x half>
584  %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
585  %load = load half, half addrspace(4)* %gep
586  %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
587  store <2 x half> %build1, <2 x half> addrspace(1)* undef
588  ret void
589}
590
591; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_to_offset:
592; GFX9: buffer_store_dword
593; GFX9-NEXT: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094
594
595; VI: buffer_load_ushort v
596define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
597entry:
598  %obj0 = alloca [10 x i32], align 4, addrspace(5)
599  %obj1 = alloca [4096 x i16], align 2, addrspace(5)
600  %reg.bc = bitcast i32 %reg to <2 x i16>
601  %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
602  store volatile i32 123, i32 addrspace(5)* %bc
603  %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
604  %load = load volatile i16, i16 addrspace(5)* %gep
605  %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
606  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
607  ret void
608}
609
610; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
611; GFX9: buffer_store_dword
612; GFX9-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095
613
614; VI: buffer_load_sbyte v
615define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
616entry:
617  %obj0 = alloca [10 x i32], align 4, addrspace(5)
618  %obj1 = alloca [4096 x i8], align 2, addrspace(5)
619  %reg.bc = bitcast i32 %reg to <2 x i16>
620  %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
621  store volatile i32 123, i32 addrspace(5)* %bc
622  %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
623  %load = load volatile i8, i8 addrspace(5)* %gep
624  %load.ext = sext i8 %load to i16
625  %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
626  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
627  ret void
628}
629
630; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
631; GFX9: buffer_store_dword
632; GFX9-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095
633
634; VI: buffer_load_ubyte v
635define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
636entry:
637  %obj0 = alloca [10 x i32], align 4, addrspace(5)
638  %obj1 = alloca [4096 x i8], align 2, addrspace(5)
639  %reg.bc = bitcast i32 %reg to <2 x i16>
640  %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
641  store volatile i32 123, i32 addrspace(5)* %bc
642  %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
643  %load = load volatile i8, i8 addrspace(5)* %gep
644  %load.ext = zext i8 %load to i16
645  %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
646  store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
647  ret void
648}
649
650attributes #0 = { nounwind }
651