• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
3; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
4; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
5
6;===------------------------------------------------------------------------===;
7; GLOBAL ADDRESS SPACE
8;===------------------------------------------------------------------------===;
9
10; Load an i8 value from the global address space.
11; FUNC-LABEL: {{^}}load_i8:
12; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
13
14; SI: buffer_load_ubyte v{{[0-9]+}},
15define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
16  %1 = load i8, i8 addrspace(1)* %in
17  %2 = zext i8 %1 to i32
18  store i32 %2, i32 addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}load_i8_sext:
23; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
24; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
25; R600: 8
26; SI: buffer_load_sbyte
27define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
28entry:
29  %0 = load i8, i8 addrspace(1)* %in
30  %1 = sext i8 %0 to i32
31  store i32 %1, i32 addrspace(1)* %out
32  ret void
33}
34
35; FUNC-LABEL: {{^}}load_v2i8:
36; R600: VTX_READ_8
37; R600: VTX_READ_8
38; SI: buffer_load_ubyte
39; SI: buffer_load_ubyte
40define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
41entry:
42  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
43  %1 = zext <2 x i8> %0 to <2 x i32>
44  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
45  ret void
46}
47
48; FUNC-LABEL: {{^}}load_v2i8_sext:
49; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
50; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
51; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
52; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
53; R600-DAG: 8
54; R600-DAG: 8
55
56; SI: buffer_load_sbyte
57; SI: buffer_load_sbyte
58define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
59entry:
60  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
61  %1 = sext <2 x i8> %0 to <2 x i32>
62  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
63  ret void
64}
65
66; FUNC-LABEL: {{^}}load_v4i8:
67; R600: VTX_READ_8
68; R600: VTX_READ_8
69; R600: VTX_READ_8
70; R600: VTX_READ_8
71; SI: buffer_load_ubyte
72; SI: buffer_load_ubyte
73; SI: buffer_load_ubyte
74; SI: buffer_load_ubyte
75define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
76entry:
77  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
78  %1 = zext <4 x i8> %0 to <4 x i32>
79  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
80  ret void
81}
82
83; FUNC-LABEL: {{^}}load_v4i8_sext:
84; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
85; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
86; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
87; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
88; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
89; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
90; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
91; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
92; R600-DAG: 8
93; R600-DAG: 8
94; R600-DAG: 8
95; R600-DAG: 8
96; SI: buffer_load_sbyte
97; SI: buffer_load_sbyte
98; SI: buffer_load_sbyte
99; SI: buffer_load_sbyte
100define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
101entry:
102  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
103  %1 = sext <4 x i8> %0 to <4 x i32>
104  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
105  ret void
106}
107
108; Load an i16 value from the global address space.
109; FUNC-LABEL: {{^}}load_i16:
110; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
111; SI: buffer_load_ushort
112define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
113entry:
114  %0 = load i16	, i16	 addrspace(1)* %in
115  %1 = zext i16 %0 to i32
116  store i32 %1, i32 addrspace(1)* %out
117  ret void
118}
119
120; FUNC-LABEL: {{^}}load_i16_sext:
121; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
122; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
123; R600: 16
124; SI: buffer_load_sshort
125define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
126entry:
127  %0 = load i16, i16 addrspace(1)* %in
128  %1 = sext i16 %0 to i32
129  store i32 %1, i32 addrspace(1)* %out
130  ret void
131}
132
133; FUNC-LABEL: {{^}}load_v2i16:
134; R600: VTX_READ_16
135; R600: VTX_READ_16
136; SI: buffer_load_ushort
137; SI: buffer_load_ushort
138define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
139entry:
140  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
141  %1 = zext <2 x i16> %0 to <2 x i32>
142  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
143  ret void
144}
145
146; FUNC-LABEL: {{^}}load_v2i16_sext:
147; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
148; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
149; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
150; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
151; R600-DAG: 16
152; R600-DAG: 16
153; SI: buffer_load_sshort
154; SI: buffer_load_sshort
155define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
156entry:
157  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
158  %1 = sext <2 x i16> %0 to <2 x i32>
159  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
160  ret void
161}
162
163; FUNC-LABEL: {{^}}load_v4i16:
164; R600: VTX_READ_16
165; R600: VTX_READ_16
166; R600: VTX_READ_16
167; R600: VTX_READ_16
168; SI: buffer_load_ushort
169; SI: buffer_load_ushort
170; SI: buffer_load_ushort
171; SI: buffer_load_ushort
172define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
173entry:
174  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
175  %1 = zext <4 x i16> %0 to <4 x i32>
176  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
177  ret void
178}
179
180; FUNC-LABEL: {{^}}load_v4i16_sext:
181; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
182; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
183; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
184; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
185; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
186; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
187; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
188; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
189; R600-DAG: 16
190; R600-DAG: 16
191; R600-DAG: 16
192; R600-DAG: 16
193; SI: buffer_load_sshort
194; SI: buffer_load_sshort
195; SI: buffer_load_sshort
196; SI: buffer_load_sshort
197define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
198entry:
199  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
200  %1 = sext <4 x i16> %0 to <4 x i32>
201  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
202  ret void
203}
204
205; load an i32 value from the global address space.
206; FUNC-LABEL: {{^}}load_i32:
207; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
208
209; SI: buffer_load_dword v{{[0-9]+}}
210define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
211entry:
212  %0 = load i32, i32 addrspace(1)* %in
213  store i32 %0, i32 addrspace(1)* %out
214  ret void
215}
216
217; load a f32 value from the global address space.
218; FUNC-LABEL: {{^}}load_f32:
219; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
220
221; SI: buffer_load_dword v{{[0-9]+}}
222define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
223entry:
224  %0 = load float, float addrspace(1)* %in
225  store float %0, float addrspace(1)* %out
226  ret void
227}
228
229; load a v2f32 value from the global address space
230; FUNC-LABEL: {{^}}load_v2f32:
231; R600: MEM_RAT
232; R600: VTX_READ_64
233; SI: buffer_load_dwordx2
234define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
235entry:
236  %0 = load <2 x float>, <2 x float> addrspace(1)* %in
237  store <2 x float> %0, <2 x float> addrspace(1)* %out
238  ret void
239}
240
241; FUNC-LABEL: {{^}}load_i64:
242; R600: VTX_READ_64
243; SI: buffer_load_dwordx2
244define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
245entry:
246  %0 = load i64, i64 addrspace(1)* %in
247  store i64 %0, i64 addrspace(1)* %out
248  ret void
249}
250
251; FUNC-LABEL: {{^}}load_i64_sext:
252; R600: MEM_RAT
253; R600: MEM_RAT
254; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
255; R600: 31
256; SI: buffer_load_dword
257
258define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
259entry:
260  %0 = load i32, i32 addrspace(1)* %in
261  %1 = sext i32 %0 to i64
262  store i64 %1, i64 addrspace(1)* %out
263  ret void
264}
265
266; FUNC-LABEL: {{^}}load_i64_zext:
267; R600: MEM_RAT
268; R600: MEM_RAT
269define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
270entry:
271  %0 = load i32, i32 addrspace(1)* %in
272  %1 = zext i32 %0 to i64
273  store i64 %1, i64 addrspace(1)* %out
274  ret void
275}
276
277; FUNC-LABEL: {{^}}load_v8i32:
278; R600: VTX_READ_128
279; R600: VTX_READ_128
280
281; SI: buffer_load_dwordx4
282; SI: buffer_load_dwordx4
283define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
284entry:
285  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
286  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
287  ret void
288}
289
290; FUNC-LABEL: {{^}}load_v16i32:
291; R600: VTX_READ_128
292; R600: VTX_READ_128
293; R600: VTX_READ_128
294; R600: VTX_READ_128
295
296; SI: buffer_load_dwordx4
297; SI: buffer_load_dwordx4
298; SI: buffer_load_dwordx4
299; SI: buffer_load_dwordx4
300define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
301entry:
302  %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
303  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
304  ret void
305}
306
307;===------------------------------------------------------------------------===;
308; CONSTANT ADDRESS SPACE
309;===------------------------------------------------------------------------===;
310
311; Load a sign-extended i8 value
312; FUNC-LABEL: {{^}}load_const_i8_sext:
313; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
314; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
315; R600: 8
316; SI: buffer_load_sbyte v{{[0-9]+}},
317define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
318entry:
319  %0 = load i8, i8 addrspace(2)* %in
320  %1 = sext i8 %0 to i32
321  store i32 %1, i32 addrspace(1)* %out
322  ret void
323}
324
325; Load an aligned i8 value
326; FUNC-LABEL: {{^}}load_const_i8_aligned:
327; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
328; SI: buffer_load_ubyte v{{[0-9]+}},
329define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
330entry:
331  %0 = load i8, i8 addrspace(2)* %in
332  %1 = zext i8 %0 to i32
333  store i32 %1, i32 addrspace(1)* %out
334  ret void
335}
336
337; Load an un-aligned i8 value
338; FUNC-LABEL: {{^}}load_const_i8_unaligned:
339; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
340; SI: buffer_load_ubyte v{{[0-9]+}},
341define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
342entry:
343  %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
344  %1 = load i8, i8 addrspace(2)* %0
345  %2 = zext i8 %1 to i32
346  store i32 %2, i32 addrspace(1)* %out
347  ret void
348}
349
350; Load a sign-extended i16 value
351; FUNC-LABEL: {{^}}load_const_i16_sext:
352; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
353; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
354; R600: 16
355; SI: buffer_load_sshort
356define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
357entry:
358  %0 = load i16, i16 addrspace(2)* %in
359  %1 = sext i16 %0 to i32
360  store i32 %1, i32 addrspace(1)* %out
361  ret void
362}
363
364; Load an aligned i16 value
365; FUNC-LABEL: {{^}}load_const_i16_aligned:
366; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
367; SI: buffer_load_ushort
368define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
369entry:
370  %0 = load i16, i16 addrspace(2)* %in
371  %1 = zext i16 %0 to i32
372  store i32 %1, i32 addrspace(1)* %out
373  ret void
374}
375
376; Load an un-aligned i16 value
377; FUNC-LABEL: {{^}}load_const_i16_unaligned:
378; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
379; SI: buffer_load_ushort
380define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
381entry:
382  %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
383  %1 = load i16, i16 addrspace(2)* %0
384  %2 = zext i16 %1 to i32
385  store i32 %2, i32 addrspace(1)* %out
386  ret void
387}
388
389; Load an i32 value from the constant address space.
390; FUNC-LABEL: {{^}}load_const_addrspace_i32:
391; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
392
393; SI: s_load_dword s{{[0-9]+}}
394define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
395entry:
396  %0 = load i32, i32 addrspace(2)* %in
397  store i32 %0, i32 addrspace(1)* %out
398  ret void
399}
400
401; Load a f32 value from the constant address space.
402; FUNC-LABEL: {{^}}load_const_addrspace_f32:
403; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
404
405; SI: s_load_dword s{{[0-9]+}}
406define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
407  %1 = load float, float addrspace(2)* %in
408  store float %1, float addrspace(1)* %out
409  ret void
410}
411
412;===------------------------------------------------------------------------===;
413; LOCAL ADDRESS SPACE
414;===------------------------------------------------------------------------===;
415
416; Load an i8 value from the local address space.
417; FUNC-LABEL: {{^}}load_i8_local:
418; R600: LDS_UBYTE_READ_RET
419; SI-NOT: s_wqm_b64
420; SI: s_mov_b32 m0
421; SI: ds_read_u8
422define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
423  %1 = load i8, i8 addrspace(3)* %in
424  %2 = zext i8 %1 to i32
425  store i32 %2, i32 addrspace(1)* %out
426  ret void
427}
428
429; FUNC-LABEL: {{^}}load_i8_sext_local:
430; R600: LDS_UBYTE_READ_RET
431; R600: BFE_INT
432; SI-NOT: s_wqm_b64
433; SI: s_mov_b32 m0
434; SI: ds_read_i8
435define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
436entry:
437  %0 = load i8, i8 addrspace(3)* %in
438  %1 = sext i8 %0 to i32
439  store i32 %1, i32 addrspace(1)* %out
440  ret void
441}
442
443; FUNC-LABEL: {{^}}load_v2i8_local:
444; R600: LDS_UBYTE_READ_RET
445; R600: LDS_UBYTE_READ_RET
446; SI-NOT: s_wqm_b64
447; SI: s_mov_b32 m0
448; SI: ds_read_u8
449; SI: ds_read_u8
450define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
451entry:
452  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
453  %1 = zext <2 x i8> %0 to <2 x i32>
454  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
455  ret void
456}
457
458; FUNC-LABEL: {{^}}load_v2i8_sext_local:
459; R600-DAG: LDS_UBYTE_READ_RET
460; R600-DAG: LDS_UBYTE_READ_RET
461; R600-DAG: BFE_INT
462; R600-DAG: BFE_INT
463; SI-NOT: s_wqm_b64
464; SI: s_mov_b32 m0
465; SI: ds_read_i8
466; SI: ds_read_i8
467define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
468entry:
469  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
470  %1 = sext <2 x i8> %0 to <2 x i32>
471  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
472  ret void
473}
474
475; FUNC-LABEL: {{^}}load_v4i8_local:
476; R600: LDS_UBYTE_READ_RET
477; R600: LDS_UBYTE_READ_RET
478; R600: LDS_UBYTE_READ_RET
479; R600: LDS_UBYTE_READ_RET
480; SI-NOT: s_wqm_b64
481; SI: s_mov_b32 m0
482; SI: ds_read_u8
483; SI: ds_read_u8
484; SI: ds_read_u8
485; SI: ds_read_u8
486define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
487entry:
488  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
489  %1 = zext <4 x i8> %0 to <4 x i32>
490  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
491  ret void
492}
493
494; FUNC-LABEL: {{^}}load_v4i8_sext_local:
495; R600-DAG: LDS_UBYTE_READ_RET
496; R600-DAG: LDS_UBYTE_READ_RET
497; R600-DAG: LDS_UBYTE_READ_RET
498; R600-DAG: LDS_UBYTE_READ_RET
499; R600-DAG: BFE_INT
500; R600-DAG: BFE_INT
501; R600-DAG: BFE_INT
502; R600-DAG: BFE_INT
503; SI-NOT: s_wqm_b64
504; SI: s_mov_b32 m0
505; SI: ds_read_i8
506; SI: ds_read_i8
507; SI: ds_read_i8
508; SI: ds_read_i8
509define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
510entry:
511  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
512  %1 = sext <4 x i8> %0 to <4 x i32>
513  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
514  ret void
515}
516
517; Load an i16 value from the local address space.
518; FUNC-LABEL: {{^}}load_i16_local:
519; R600: LDS_USHORT_READ_RET
520; SI-NOT: s_wqm_b64
521; SI: s_mov_b32 m0
522; SI: ds_read_u16
523define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
524entry:
525  %0 = load i16	, i16	 addrspace(3)* %in
526  %1 = zext i16 %0 to i32
527  store i32 %1, i32 addrspace(1)* %out
528  ret void
529}
530
531; FUNC-LABEL: {{^}}load_i16_sext_local:
532; R600: LDS_USHORT_READ_RET
533; R600: BFE_INT
534; SI-NOT: s_wqm_b64
535; SI: s_mov_b32 m0
536; SI: ds_read_i16
537define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
538entry:
539  %0 = load i16, i16 addrspace(3)* %in
540  %1 = sext i16 %0 to i32
541  store i32 %1, i32 addrspace(1)* %out
542  ret void
543}
544
545; FUNC-LABEL: {{^}}load_v2i16_local:
546; R600: LDS_USHORT_READ_RET
547; R600: LDS_USHORT_READ_RET
548; SI-NOT: s_wqm_b64
549; SI: s_mov_b32 m0
550; SI: ds_read_u16
551; SI: ds_read_u16
552define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
553entry:
554  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
555  %1 = zext <2 x i16> %0 to <2 x i32>
556  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
557  ret void
558}
559
560; FUNC-LABEL: {{^}}load_v2i16_sext_local:
561; R600-DAG: LDS_USHORT_READ_RET
562; R600-DAG: LDS_USHORT_READ_RET
563; R600-DAG: BFE_INT
564; R600-DAG: BFE_INT
565; SI-NOT: s_wqm_b64
566; SI: s_mov_b32 m0
567; SI: ds_read_i16
568; SI: ds_read_i16
569define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
570entry:
571  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
572  %1 = sext <2 x i16> %0 to <2 x i32>
573  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
574  ret void
575}
576
577; FUNC-LABEL: {{^}}load_v4i16_local:
578; R600: LDS_USHORT_READ_RET
579; R600: LDS_USHORT_READ_RET
580; R600: LDS_USHORT_READ_RET
581; R600: LDS_USHORT_READ_RET
582; SI-NOT: s_wqm_b64
583; SI: s_mov_b32 m0
584; SI: ds_read_u16
585; SI: ds_read_u16
586; SI: ds_read_u16
587; SI: ds_read_u16
588define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
589entry:
590  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
591  %1 = zext <4 x i16> %0 to <4 x i32>
592  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
593  ret void
594}
595
596; FUNC-LABEL: {{^}}load_v4i16_sext_local:
597; R600-DAG: LDS_USHORT_READ_RET
598; R600-DAG: LDS_USHORT_READ_RET
599; R600-DAG: LDS_USHORT_READ_RET
600; R600-DAG: LDS_USHORT_READ_RET
601; R600-DAG: BFE_INT
602; R600-DAG: BFE_INT
603; R600-DAG: BFE_INT
604; R600-DAG: BFE_INT
605; SI-NOT: s_wqm_b64
606; SI: s_mov_b32 m0
607; SI: ds_read_i16
608; SI: ds_read_i16
609; SI: ds_read_i16
610; SI: ds_read_i16
611define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
612entry:
613  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
614  %1 = sext <4 x i16> %0 to <4 x i32>
615  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
616  ret void
617}
618
619; load an i32 value from the local address space.
620; FUNC-LABEL: {{^}}load_i32_local:
621; R600: LDS_READ_RET
622; SI-NOT: s_wqm_b64
623; SI: s_mov_b32 m0
624; SI: ds_read_b32
625define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
626entry:
627  %0 = load i32, i32 addrspace(3)* %in
628  store i32 %0, i32 addrspace(1)* %out
629  ret void
630}
631
632; load a f32 value from the local address space.
633; FUNC-LABEL: {{^}}load_f32_local:
634; R600: LDS_READ_RET
635; SI: s_mov_b32 m0
636; SI: ds_read_b32
637define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
638entry:
639  %0 = load float, float addrspace(3)* %in
640  store float %0, float addrspace(1)* %out
641  ret void
642}
643
644; load a v2f32 value from the local address space
645; FUNC-LABEL: {{^}}load_v2f32_local:
646; R600: LDS_READ_RET
647; R600: LDS_READ_RET
648; SI: s_mov_b32 m0
649; SI: ds_read_b64
650define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
651entry:
652  %0 = load <2 x float>, <2 x float> addrspace(3)* %in
653  store <2 x float> %0, <2 x float> addrspace(1)* %out
654  ret void
655}
656
657; Test loading a i32 and v2i32 value from the same base pointer.
658; FUNC-LABEL: {{^}}load_i32_v2i32_local:
659; R600: LDS_READ_RET
660; R600: LDS_READ_RET
661; R600: LDS_READ_RET
662; SI-DAG: ds_read_b32
663; SI-DAG: ds_read2_b32
664define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
665  %scalar = load i32, i32 addrspace(3)* %in
666  %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
667  %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
668  %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
669  %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
670  %vec = add <2 x i32> %vec0, %vec1
671  store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
672  ret void
673}
674
675
676@lds = addrspace(3) global [512 x i32] undef, align 4
677
678; On SI we need to make sure that the base offset is a register and not
679; an immediate.
680; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
681; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
682; SI: ds_read_b32 v0, v[[ZERO]] offset:4
683; R600: LDS_READ_RET
684define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
685entry:
686  %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
687  %tmp1 = load i32, i32 addrspace(3)* %tmp0
688  %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
689  store i32 %tmp1, i32 addrspace(1)* %tmp2
690  ret void
691}
692