• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
3; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
4
5;===------------------------------------------------------------------------===;
6; GLOBAL ADDRESS SPACE
7;===------------------------------------------------------------------------===;
8
9; Load an i8 value from the global address space.
10; FUNC-LABEL: @load_i8
11; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
12
13; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
14define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
15  %1 = load i8 addrspace(1)* %in
16  %2 = zext i8 %1 to i32
17  store i32 %2, i32 addrspace(1)* %out
18  ret void
19}
20
21; FUNC-LABEL: @load_i8_sext
22; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
23; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
24; R600-CHECK: 24
25; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
26; R600-CHECK: 24
27; SI-CHECK: BUFFER_LOAD_SBYTE
28define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
29entry:
30  %0 = load i8 addrspace(1)* %in
31  %1 = sext i8 %0 to i32
32  store i32 %1, i32 addrspace(1)* %out
33  ret void
34}
35
36; FUNC-LABEL: @load_v2i8
37; R600-CHECK: VTX_READ_8
38; R600-CHECK: VTX_READ_8
39; SI-CHECK: BUFFER_LOAD_UBYTE
40; SI-CHECK: BUFFER_LOAD_UBYTE
41define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
42entry:
43  %0 = load <2 x i8> addrspace(1)* %in
44  %1 = zext <2 x i8> %0 to <2 x i32>
45  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
46  ret void
47}
48
49; FUNC-LABEL: @load_v2i8_sext
50; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
51; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
52; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
53; R600-CHECK-DAG: 24
54; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
55; R600-CHECK-DAG: 24
56; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
57; R600-CHECK-DAG: 24
58; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
59; R600-CHECK-DAG: 24
60; SI-CHECK: BUFFER_LOAD_SBYTE
61; SI-CHECK: BUFFER_LOAD_SBYTE
62define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
63entry:
64  %0 = load <2 x i8> addrspace(1)* %in
65  %1 = sext <2 x i8> %0 to <2 x i32>
66  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
67  ret void
68}
69
70; FUNC-LABEL: @load_v4i8
71; R600-CHECK: VTX_READ_8
72; R600-CHECK: VTX_READ_8
73; R600-CHECK: VTX_READ_8
74; R600-CHECK: VTX_READ_8
75; SI-CHECK: BUFFER_LOAD_UBYTE
76; SI-CHECK: BUFFER_LOAD_UBYTE
77; SI-CHECK: BUFFER_LOAD_UBYTE
78; SI-CHECK: BUFFER_LOAD_UBYTE
79define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
80entry:
81  %0 = load <4 x i8> addrspace(1)* %in
82  %1 = zext <4 x i8> %0 to <4 x i32>
83  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
84  ret void
85}
86
87; FUNC-LABEL: @load_v4i8_sext
88; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
89; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
90; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
91; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
92; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
93; R600-CHECK-DAG: 24
94; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
95; R600-CHECK-DAG: 24
96; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
97; R600-CHECK-DAG: 24
98; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
99; R600-CHECK-DAG: 24
100; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
101; R600-CHECK-DAG: 24
102; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
103; R600-CHECK-DAG: 24
104; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
105; R600-CHECK-DAG: 24
106; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
107; R600-CHECK-DAG: 24
108; SI-CHECK: BUFFER_LOAD_SBYTE
109; SI-CHECK: BUFFER_LOAD_SBYTE
110; SI-CHECK: BUFFER_LOAD_SBYTE
111; SI-CHECK: BUFFER_LOAD_SBYTE
112define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
113entry:
114  %0 = load <4 x i8> addrspace(1)* %in
115  %1 = sext <4 x i8> %0 to <4 x i32>
116  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
117  ret void
118}
119
120; Load an i16 value from the global address space.
121; FUNC-LABEL: @load_i16
122; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
123; SI-CHECK: BUFFER_LOAD_USHORT
124define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
125entry:
126  %0 = load i16	 addrspace(1)* %in
127  %1 = zext i16 %0 to i32
128  store i32 %1, i32 addrspace(1)* %out
129  ret void
130}
131
132; FUNC-LABEL: @load_i16_sext
133; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
134; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
135; R600-CHECK: 16
136; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
137; R600-CHECK: 16
138; SI-CHECK: BUFFER_LOAD_SSHORT
139define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
140entry:
141  %0 = load i16 addrspace(1)* %in
142  %1 = sext i16 %0 to i32
143  store i32 %1, i32 addrspace(1)* %out
144  ret void
145}
146
147; FUNC-LABEL: @load_v2i16
148; R600-CHECK: VTX_READ_16
149; R600-CHECK: VTX_READ_16
150; SI-CHECK: BUFFER_LOAD_USHORT
151; SI-CHECK: BUFFER_LOAD_USHORT
152define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
153entry:
154  %0 = load <2 x i16> addrspace(1)* %in
155  %1 = zext <2 x i16> %0 to <2 x i32>
156  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
157  ret void
158}
159
160; FUNC-LABEL: @load_v2i16_sext
161; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
162; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
163; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
164; R600-CHECK-DAG: 16
165; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
166; R600-CHECK-DAG: 16
167; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
168; R600-CHECK-DAG: 16
169; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
170; R600-CHECK-DAG: 16
171; SI-CHECK: BUFFER_LOAD_SSHORT
172; SI-CHECK: BUFFER_LOAD_SSHORT
173define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
174entry:
175  %0 = load <2 x i16> addrspace(1)* %in
176  %1 = sext <2 x i16> %0 to <2 x i32>
177  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
178  ret void
179}
180
181; FUNC-LABEL: @load_v4i16
182; R600-CHECK: VTX_READ_16
183; R600-CHECK: VTX_READ_16
184; R600-CHECK: VTX_READ_16
185; R600-CHECK: VTX_READ_16
186; SI-CHECK: BUFFER_LOAD_USHORT
187; SI-CHECK: BUFFER_LOAD_USHORT
188; SI-CHECK: BUFFER_LOAD_USHORT
189; SI-CHECK: BUFFER_LOAD_USHORT
190define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
191entry:
192  %0 = load <4 x i16> addrspace(1)* %in
193  %1 = zext <4 x i16> %0 to <4 x i32>
194  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
195  ret void
196}
197
198; FUNC-LABEL: @load_v4i16_sext
199; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
200; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
201; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
202; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
203; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
204; R600-CHECK-DAG: 16
205; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
206; R600-CHECK-DAG: 16
207; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
208; R600-CHECK-DAG: 16
209; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
210; R600-CHECK-DAG: 16
211; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
212; R600-CHECK-DAG: 16
213; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
214; R600-CHECK-DAG: 16
215; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
216; R600-CHECK-DAG: 16
217; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
218; R600-CHECK-DAG: 16
219; SI-CHECK: BUFFER_LOAD_SSHORT
220; SI-CHECK: BUFFER_LOAD_SSHORT
221; SI-CHECK: BUFFER_LOAD_SSHORT
222; SI-CHECK: BUFFER_LOAD_SSHORT
223define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
224entry:
225  %0 = load <4 x i16> addrspace(1)* %in
226  %1 = sext <4 x i16> %0 to <4 x i32>
227  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
228  ret void
229}
230
231; load an i32 value from the global address space.
232; FUNC-LABEL: @load_i32
233; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
234
235; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
236define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
237entry:
238  %0 = load i32 addrspace(1)* %in
239  store i32 %0, i32 addrspace(1)* %out
240  ret void
241}
242
243; load a f32 value from the global address space.
244; FUNC-LABEL: @load_f32
245; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
246
247; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
248define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
249entry:
250  %0 = load float addrspace(1)* %in
251  store float %0, float addrspace(1)* %out
252  ret void
253}
254
255; load a v2f32 value from the global address space
256; FUNC-LABEL: @load_v2f32
257; R600-CHECK: VTX_READ_64
258
259; SI-CHECK: BUFFER_LOAD_DWORDX2
260define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
261entry:
262  %0 = load <2 x float> addrspace(1)* %in
263  store <2 x float> %0, <2 x float> addrspace(1)* %out
264  ret void
265}
266
267; FUNC-LABEL: @load_i64
268; R600-CHECK: MEM_RAT
269; R600-CHECK: MEM_RAT
270
271; SI-CHECK: BUFFER_LOAD_DWORDX2
272define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
273entry:
274  %0 = load i64 addrspace(1)* %in
275  store i64 %0, i64 addrspace(1)* %out
276  ret void
277}
278
279; FUNC-LABEL: @load_i64_sext
280; R600-CHECK: MEM_RAT
281; R600-CHECK: MEM_RAT
282; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
283; R600-CHECK: 31
284; SI-CHECK: BUFFER_LOAD_DWORD
285
286define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
287entry:
288  %0 = load i32 addrspace(1)* %in
289  %1 = sext i32 %0 to i64
290  store i64 %1, i64 addrspace(1)* %out
291  ret void
292}
293
294; FUNC-LABEL: @load_i64_zext
295; R600-CHECK: MEM_RAT
296; R600-CHECK: MEM_RAT
297define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
298entry:
299  %0 = load i32 addrspace(1)* %in
300  %1 = zext i32 %0 to i64
301  store i64 %1, i64 addrspace(1)* %out
302  ret void
303}
304
305; FUNC-LABEL: @load_v8i32
306; R600-CHECK: VTX_READ_128
307; R600-CHECK: VTX_READ_128
308; XXX: We should be using DWORDX4 instructions on SI.
309; SI-CHECK: BUFFER_LOAD_DWORD
310; SI-CHECK: BUFFER_LOAD_DWORD
311; SI-CHECK: BUFFER_LOAD_DWORD
312; SI-CHECK: BUFFER_LOAD_DWORD
313; SI-CHECK: BUFFER_LOAD_DWORD
314; SI-CHECK: BUFFER_LOAD_DWORD
315; SI-CHECK: BUFFER_LOAD_DWORD
316; SI-CHECK: BUFFER_LOAD_DWORD
317define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
318entry:
319  %0 = load <8 x i32> addrspace(1)* %in
320  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
321  ret void
322}
323
324; FUNC-LABEL: @load_v16i32
325; R600-CHECK: VTX_READ_128
326; R600-CHECK: VTX_READ_128
327; R600-CHECK: VTX_READ_128
328; R600-CHECK: VTX_READ_128
329; XXX: We should be using DWORDX4 instructions on SI.
330; SI-CHECK: BUFFER_LOAD_DWORD
331; SI-CHECK: BUFFER_LOAD_DWORD
332; SI-CHECK: BUFFER_LOAD_DWORD
333; SI-CHECK: BUFFER_LOAD_DWORD
334; SI-CHECK: BUFFER_LOAD_DWORD
335; SI-CHECK: BUFFER_LOAD_DWORD
336; SI-CHECK: BUFFER_LOAD_DWORD
337; SI-CHECK: BUFFER_LOAD_DWORD
338; SI-CHECK: BUFFER_LOAD_DWORD
339; SI-CHECK: BUFFER_LOAD_DWORD
340; SI-CHECK: BUFFER_LOAD_DWORD
341; SI-CHECK: BUFFER_LOAD_DWORD
342; SI-CHECK: BUFFER_LOAD_DWORD
343; SI-CHECK: BUFFER_LOAD_DWORD
344; SI-CHECK: BUFFER_LOAD_DWORD
345; SI-CHECK: BUFFER_LOAD_DWORD
346define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
347entry:
348  %0 = load <16 x i32> addrspace(1)* %in
349  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
350  ret void
351}
352
353;===------------------------------------------------------------------------===;
354; CONSTANT ADDRESS SPACE
355;===------------------------------------------------------------------------===;
356
357; Load a sign-extended i8 value
358; FUNC-LABEL: @load_const_i8_sext
359; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
360; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
361; R600-CHECK: 24
362; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
363; R600-CHECK: 24
364; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
365define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
366entry:
367  %0 = load i8 addrspace(2)* %in
368  %1 = sext i8 %0 to i32
369  store i32 %1, i32 addrspace(1)* %out
370  ret void
371}
372
373; Load an aligned i8 value
374; FUNC-LABEL: @load_const_i8_aligned
375; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
376; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
377define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
378entry:
379  %0 = load i8 addrspace(2)* %in
380  %1 = zext i8 %0 to i32
381  store i32 %1, i32 addrspace(1)* %out
382  ret void
383}
384
385; Load an un-aligned i8 value
386; FUNC-LABEL: @load_const_i8_unaligned
387; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
388; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
389define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
390entry:
391  %0 = getelementptr i8 addrspace(2)* %in, i32 1
392  %1 = load i8 addrspace(2)* %0
393  %2 = zext i8 %1 to i32
394  store i32 %2, i32 addrspace(1)* %out
395  ret void
396}
397
398; Load a sign-extended i16 value
399; FUNC-LABEL: @load_const_i16_sext
400; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
401; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
402; R600-CHECK: 16
403; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
404; R600-CHECK: 16
405; SI-CHECK: BUFFER_LOAD_SSHORT
406define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
407entry:
408  %0 = load i16 addrspace(2)* %in
409  %1 = sext i16 %0 to i32
410  store i32 %1, i32 addrspace(1)* %out
411  ret void
412}
413
414; Load an aligned i16 value
415; FUNC-LABEL: @load_const_i16_aligned
416; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
417; SI-CHECK: BUFFER_LOAD_USHORT
418define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
419entry:
420  %0 = load i16 addrspace(2)* %in
421  %1 = zext i16 %0 to i32
422  store i32 %1, i32 addrspace(1)* %out
423  ret void
424}
425
426; Load an un-aligned i16 value
427; FUNC-LABEL: @load_const_i16_unaligned
428; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
429; SI-CHECK: BUFFER_LOAD_USHORT
430define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
431entry:
432  %0 = getelementptr i16 addrspace(2)* %in, i32 1
433  %1 = load i16 addrspace(2)* %0
434  %2 = zext i16 %1 to i32
435  store i32 %2, i32 addrspace(1)* %out
436  ret void
437}
438
439; Load an i32 value from the constant address space.
440; FUNC-LABEL: @load_const_addrspace_i32
441; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
442
443; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
444define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
445entry:
446  %0 = load i32 addrspace(2)* %in
447  store i32 %0, i32 addrspace(1)* %out
448  ret void
449}
450
451; Load a f32 value from the constant address space.
452; FUNC-LABEL: @load_const_addrspace_f32
453; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
454
455; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
456define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
457  %1 = load float addrspace(2)* %in
458  store float %1, float addrspace(1)* %out
459  ret void
460}
461
462;===------------------------------------------------------------------------===;
463; LOCAL ADDRESS SPACE
464;===------------------------------------------------------------------------===;
465
466; Load an i8 value from the local address space.
467; FUNC-LABEL: @load_i8_local
468; R600-CHECK: LDS_UBYTE_READ_RET
469; SI-CHECK-NOT: S_WQM_B64
470; SI-CHECK: S_MOV_B32 m0
471; SI-CHECK: DS_READ_U8
472define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
473  %1 = load i8 addrspace(3)* %in
474  %2 = zext i8 %1 to i32
475  store i32 %2, i32 addrspace(1)* %out
476  ret void
477}
478
479; FUNC-LABEL: @load_i8_sext_local
480; R600-CHECK: LDS_UBYTE_READ_RET
481; R600-CHECK: ASHR
482; SI-CHECK-NOT: S_WQM_B64
483; SI-CHECK: S_MOV_B32 m0
484; SI-CHECK: DS_READ_I8
485define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
486entry:
487  %0 = load i8 addrspace(3)* %in
488  %1 = sext i8 %0 to i32
489  store i32 %1, i32 addrspace(1)* %out
490  ret void
491}
492
493; FUNC-LABEL: @load_v2i8_local
494; R600-CHECK: LDS_UBYTE_READ_RET
495; R600-CHECK: LDS_UBYTE_READ_RET
496; SI-CHECK-NOT: S_WQM_B64
497; SI-CHECK: S_MOV_B32 m0
498; SI-CHECK: DS_READ_U8
499; SI-CHECK: DS_READ_U8
500define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
501entry:
502  %0 = load <2 x i8> addrspace(3)* %in
503  %1 = zext <2 x i8> %0 to <2 x i32>
504  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
505  ret void
506}
507
508; FUNC-LABEL: @load_v2i8_sext_local
509; R600-CHECK-DAG: LDS_UBYTE_READ_RET
510; R600-CHECK-DAG: LDS_UBYTE_READ_RET
511; R600-CHECK-DAG: ASHR
512; R600-CHECK-DAG: ASHR
513; SI-CHECK-NOT: S_WQM_B64
514; SI-CHECK: S_MOV_B32 m0
515; SI-CHECK: DS_READ_I8
516; SI-CHECK: DS_READ_I8
517define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
518entry:
519  %0 = load <2 x i8> addrspace(3)* %in
520  %1 = sext <2 x i8> %0 to <2 x i32>
521  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
522  ret void
523}
524
525; FUNC-LABEL: @load_v4i8_local
526; R600-CHECK: LDS_UBYTE_READ_RET
527; R600-CHECK: LDS_UBYTE_READ_RET
528; R600-CHECK: LDS_UBYTE_READ_RET
529; R600-CHECK: LDS_UBYTE_READ_RET
530; SI-CHECK-NOT: S_WQM_B64
531; SI-CHECK: S_MOV_B32 m0
532; SI-CHECK: DS_READ_U8
533; SI-CHECK: DS_READ_U8
534; SI-CHECK: DS_READ_U8
535; SI-CHECK: DS_READ_U8
536define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
537entry:
538  %0 = load <4 x i8> addrspace(3)* %in
539  %1 = zext <4 x i8> %0 to <4 x i32>
540  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
541  ret void
542}
543
544; FUNC-LABEL: @load_v4i8_sext_local
545; R600-CHECK-DAG: LDS_UBYTE_READ_RET
546; R600-CHECK-DAG: LDS_UBYTE_READ_RET
547; R600-CHECK-DAG: LDS_UBYTE_READ_RET
548; R600-CHECK-DAG: LDS_UBYTE_READ_RET
549; R600-CHECK-DAG: ASHR
550; R600-CHECK-DAG: ASHR
551; R600-CHECK-DAG: ASHR
552; R600-CHECK-DAG: ASHR
553; SI-CHECK-NOT: S_WQM_B64
554; SI-CHECK: S_MOV_B32 m0
555; SI-CHECK: DS_READ_I8
556; SI-CHECK: DS_READ_I8
557; SI-CHECK: DS_READ_I8
558; SI-CHECK: DS_READ_I8
559define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
560entry:
561  %0 = load <4 x i8> addrspace(3)* %in
562  %1 = sext <4 x i8> %0 to <4 x i32>
563  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
564  ret void
565}
566
567; Load an i16 value from the local address space.
568; FUNC-LABEL: @load_i16_local
569; R600-CHECK: LDS_USHORT_READ_RET
570; SI-CHECK-NOT: S_WQM_B64
571; SI-CHECK: S_MOV_B32 m0
572; SI-CHECK: DS_READ_U16
573define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
574entry:
575  %0 = load i16	 addrspace(3)* %in
576  %1 = zext i16 %0 to i32
577  store i32 %1, i32 addrspace(1)* %out
578  ret void
579}
580
581; FUNC-LABEL: @load_i16_sext_local
582; R600-CHECK: LDS_USHORT_READ_RET
583; R600-CHECK: ASHR
584; SI-CHECK-NOT: S_WQM_B64
585; SI-CHECK: S_MOV_B32 m0
586; SI-CHECK: DS_READ_I16
587define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
588entry:
589  %0 = load i16 addrspace(3)* %in
590  %1 = sext i16 %0 to i32
591  store i32 %1, i32 addrspace(1)* %out
592  ret void
593}
594
595; FUNC-LABEL: @load_v2i16_local
596; R600-CHECK: LDS_USHORT_READ_RET
597; R600-CHECK: LDS_USHORT_READ_RET
598; SI-CHECK-NOT: S_WQM_B64
599; SI-CHECK: S_MOV_B32 m0
600; SI-CHECK: DS_READ_U16
601; SI-CHECK: DS_READ_U16
602define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
603entry:
604  %0 = load <2 x i16> addrspace(3)* %in
605  %1 = zext <2 x i16> %0 to <2 x i32>
606  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
607  ret void
608}
609
610; FUNC-LABEL: @load_v2i16_sext_local
611; R600-CHECK-DAG: LDS_USHORT_READ_RET
612; R600-CHECK-DAG: LDS_USHORT_READ_RET
613; R600-CHECK-DAG: ASHR
614; R600-CHECK-DAG: ASHR
615; SI-CHECK-NOT: S_WQM_B64
616; SI-CHECK: S_MOV_B32 m0
617; SI-CHECK: DS_READ_I16
618; SI-CHECK: DS_READ_I16
619define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
620entry:
621  %0 = load <2 x i16> addrspace(3)* %in
622  %1 = sext <2 x i16> %0 to <2 x i32>
623  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
624  ret void
625}
626
627; FUNC-LABEL: @load_v4i16_local
628; R600-CHECK: LDS_USHORT_READ_RET
629; R600-CHECK: LDS_USHORT_READ_RET
630; R600-CHECK: LDS_USHORT_READ_RET
631; R600-CHECK: LDS_USHORT_READ_RET
632; SI-CHECK-NOT: S_WQM_B64
633; SI-CHECK: S_MOV_B32 m0
634; SI-CHECK: DS_READ_U16
635; SI-CHECK: DS_READ_U16
636; SI-CHECK: DS_READ_U16
637; SI-CHECK: DS_READ_U16
638define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
639entry:
640  %0 = load <4 x i16> addrspace(3)* %in
641  %1 = zext <4 x i16> %0 to <4 x i32>
642  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
643  ret void
644}
645
646; FUNC-LABEL: @load_v4i16_sext_local
647; R600-CHECK-DAG: LDS_USHORT_READ_RET
648; R600-CHECK-DAG: LDS_USHORT_READ_RET
649; R600-CHECK-DAG: LDS_USHORT_READ_RET
650; R600-CHECK-DAG: LDS_USHORT_READ_RET
651; R600-CHECK-DAG: ASHR
652; R600-CHECK-DAG: ASHR
653; R600-CHECK-DAG: ASHR
654; R600-CHECK-DAG: ASHR
655; SI-CHECK-NOT: S_WQM_B64
656; SI-CHECK: S_MOV_B32 m0
657; SI-CHECK: DS_READ_I16
658; SI-CHECK: DS_READ_I16
659; SI-CHECK: DS_READ_I16
660; SI-CHECK: DS_READ_I16
661define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
662entry:
663  %0 = load <4 x i16> addrspace(3)* %in
664  %1 = sext <4 x i16> %0 to <4 x i32>
665  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
666  ret void
667}
668
669; load an i32 value from the local address space.
670; FUNC-LABEL: @load_i32_local
671; R600-CHECK: LDS_READ_RET
672; SI-CHECK-NOT: S_WQM_B64
673; SI-CHECK: S_MOV_B32 m0
674; SI-CHECK: DS_READ_B32
675define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
676entry:
677  %0 = load i32 addrspace(3)* %in
678  store i32 %0, i32 addrspace(1)* %out
679  ret void
680}
681
682; load a f32 value from the local address space.
683; FUNC-LABEL: @load_f32_local
684; R600-CHECK: LDS_READ_RET
685; SI-CHECK: S_MOV_B32 m0
686; SI-CHECK: DS_READ_B32
687define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
688entry:
689  %0 = load float addrspace(3)* %in
690  store float %0, float addrspace(1)* %out
691  ret void
692}
693
694; load a v2f32 value from the local address space
695; FUNC-LABEL: @load_v2f32_local
696; R600-CHECK: LDS_READ_RET
697; R600-CHECK: LDS_READ_RET
698; SI-CHECK: S_MOV_B32 m0
699; SI-CHECK: DS_READ_B64
700define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
701entry:
702  %0 = load <2 x float> addrspace(3)* %in
703  store <2 x float> %0, <2 x float> addrspace(1)* %out
704  ret void
705}
706