• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5
6
7; FUNC-LABEL: {{^}}constant_load_i8:
8; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
9; GCN-HSA: flat_load_ubyte
10
11; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
12; TODO: NOT AND
13define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
14entry:
15  %ld = load i8, i8 addrspace(4)* %in
16  store i8 %ld, i8 addrspace(1)* %out
17  ret void
18}
19
20; FUNC-LABEL: {{^}}constant_load_v2i8:
21; GCN-NOHSA: buffer_load_ushort v
22; GCN-HSA: flat_load_ushort v
23
24; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
25define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
26entry:
27  %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
28  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
29  ret void
30}
31
32; FUNC-LABEL: {{^}}constant_load_v3i8:
33; GCN: s_load_dword s
34
35; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
36define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
37entry:
38  %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
39  store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
40  ret void
41}
42
43; FUNC-LABEL: {{^}}constant_load_v4i8:
44; GCN: s_load_dword s
45
46; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
47define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
48entry:
49  %ld = load <4 x i8>, <4 x i8> addrspace(4)* %in
50  store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
51  ret void
52}
53
54; FUNC-LABEL: {{^}}constant_load_v8i8:
55; GCN: s_load_dwordx2
56
57; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
58define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
59entry:
60  %ld = load <8 x i8>, <8 x i8> addrspace(4)* %in
61  store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
62  ret void
63}
64
65; FUNC-LABEL: {{^}}constant_load_v16i8:
66; GCN: s_load_dwordx4
67
68; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
69define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
70entry:
71  %ld = load <16 x i8>, <16 x i8> addrspace(4)* %in
72  store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
73  ret void
74}
75
76; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32:
77; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
78; GCN-HSA: flat_load_ubyte
79
80; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
81define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
82  %a = load i8, i8 addrspace(4)* %in
83  %ext = zext i8 %a to i32
84  store i32 %ext, i32 addrspace(1)* %out
85  ret void
86}
87
88; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32:
89; GCN-NOHSA: buffer_load_sbyte
90; GCN-HSA: flat_load_sbyte
91
92; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
93; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
94; EG: 8
95define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
96  %ld = load i8, i8 addrspace(4)* %in
97  %ext = sext i8 %ld to i32
98  store i32 %ext, i32 addrspace(1)* %out
99  ret void
100}
101
102; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
103
104; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
105define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
106  %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
107  %ext = zext <1 x i8> %load to <1 x i32>
108  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
109  ret void
110}
111
112; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32:
113
114; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
115; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
116; EG: 8
117define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
118  %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
119  %ext = sext <1 x i8> %load to <1 x i32>
120  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
121  ret void
122}
123
124; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32:
125; GCN-NOHSA: buffer_load_ushort
126; GCN-HSA: flat_load_ushort
127
128; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
129; TODO: This should use DST, but for some there are redundant MOVs
130; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
131; EG: 8
132define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
133  %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
134  %ext = zext <2 x i8> %load to <2 x i32>
135  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
136  ret void
137}
138
139; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32:
140; GCN-NOHSA: buffer_load_ushort
141
142; GCN-HSA: flat_load_ushort
143
144; GCN: v_bfe_i32
145; GCN: v_bfe_i32
146
147; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
148; TODO: These should use DST, but for some there are redundant MOVs
149; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
150; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
151; EG-DAG: 8
152; EG-DAG: 8
153define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
154  %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
155  %ext = sext <2 x i8> %load to <2 x i32>
156  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
157  ret void
158}
159
160; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32:
161; GCN: s_load_dword s
162
163; GCN-DAG: s_bfe_u32
164; GCN-DAG: s_bfe_u32
165; GCN-DAG: s_and_b32
166
167; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
168; TODO: These should use DST, but for some there are redundant MOVs
169; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
170; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
171; EG-DAG: 8
172; EG-DAG: 8
173define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
174entry:
175  %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
176  %ext = zext <3 x i8> %ld to <3 x i32>
177  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
178  ret void
179}
180
181; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32:
182; GCN: s_load_dword s
183
184; GCN-DAG: s_bfe_i32
185; GCN-DAG: s_bfe_i32
186; GCN-DAG: s_bfe_i32
187
188; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
189; TODO: These should use DST, but for some there are redundant MOVs
190; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
191; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
192; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
193; EG-DAG: 8
194; EG-DAG: 8
195; EG-DAG: 8
196define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
197entry:
198  %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
199  %ext = sext <3 x i8> %ld to <3 x i32>
200  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
201  ret void
202}
203
204; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32:
205; GCN: s_load_dword s
206; GCN-DAG: s_and_b32
207; GCN-DAG: s_lshr_b32
208
209; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
210; TODO: These should use DST, but for some there are redundant MOVs
211; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
212; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
213; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
214; EG-DAG: 8
215; EG-DAG: 8
216; EG-DAG: 8
217define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
218  %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
219  %ext = zext <4 x i8> %load to <4 x i32>
220  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
221  ret void
222}
223
224; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32:
225; GCN: s_load_dword s
226; GCN-DAG: s_sext_i32_i8
227; GCN-DAG: s_ashr_i32
228
229; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
230; TODO: These should use DST, but for some there are redundant MOVs
231; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
232; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
233; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
234; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
235; EG-DAG: 8
236; EG-DAG: 8
237; EG-DAG: 8
238; EG-DAG: 8
239define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
240  %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
241  %ext = sext <4 x i8> %load to <4 x i32>
242  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
243  ret void
244}
245
246; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32:
247; GCN: s_load_dwordx2
248; GCN-DAG: s_and_b32
249; GCN-DAG: s_lshr_b32
250
251; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
252; TODO: These should use DST, but for some there are redundant MOVs
253; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
254; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
255; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
256; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
257; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
258; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
259; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
260; EG-DAG: 8
261; EG-DAG: 8
262; EG-DAG: 8
263; EG-DAG: 8
264; EG-DAG: 8
265; EG-DAG: 8
266; EG-DAG: 8
267define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
268  %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
269  %ext = zext <8 x i8> %load to <8 x i32>
270  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
271  ret void
272}
273
274; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32:
275; GCN: s_load_dwordx2
276; GCN-DAG: s_ashr_i32
277; GCN-DAG: s_sext_i32_i8
278
279; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1
280; TODO: These should use DST, but for some there are redundant MOVs
281; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
282; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
283; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
284; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
285; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
286; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
287; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
288; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
289; EG-DAG: 8
290; EG-DAG: 8
291; EG-DAG: 8
292; EG-DAG: 8
293; EG-DAG: 8
294; EG-DAG: 8
295; EG-DAG: 8
296; EG-DAG: 8
297define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
298  %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
299  %ext = sext <8 x i8> %load to <8 x i32>
300  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
301  ret void
302}
303
304; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32:
305
306; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
307; TODO: These should use DST, but for some there are redundant MOVs
308; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
309; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
310; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
311; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
312; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
313; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
314; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
315; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
316; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
317; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
318; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
319; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
320; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
321; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
322; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
323; EG-DAG: 8
324; EG-DAG: 8
325; EG-DAG: 8
326; EG-DAG: 8
327; EG-DAG: 8
328; EG-DAG: 8
329; EG-DAG: 8
330; EG-DAG: 8
331; EG-DAG: 8
332; EG-DAG: 8
333; EG-DAG: 8
334; EG-DAG: 8
335; EG-DAG: 8
336; EG-DAG: 8
337; EG-DAG: 8
338define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
339  %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
340  %ext = zext <16 x i8> %load to <16 x i32>
341  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
342  ret void
343}
344
345; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32:
346
347; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
348; TODO: These should use DST, but for some there are redundant MOVs
349; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
350; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
351; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
352; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
353; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
354; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
355; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
356; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
357; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
358; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
359; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
360; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
361; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
362; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
363; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
364; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
365; EG-DAG: 8
366; EG-DAG: 8
367; EG-DAG: 8
368; EG-DAG: 8
369; EG-DAG: 8
370; EG-DAG: 8
371; EG-DAG: 8
372; EG-DAG: 8
373; EG-DAG: 8
374; EG-DAG: 8
375; EG-DAG: 8
376; EG-DAG: 8
377; EG-DAG: 8
378; EG-DAG: 8
379; EG-DAG: 8
380; EG-DAG: 8
381define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
382  %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
383  %ext = sext <16 x i8> %load to <16 x i32>
384  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
385  ret void
386}
387
388; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32:
389
390; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
391; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
392; TODO: These should use DST, but for some there are redundant MOVs
393; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
394; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
395; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
396; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
397; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
398; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
399; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
400; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
401; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
402; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
403; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
404; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
405; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
406; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
407; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
408; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
409; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
410; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
411; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
412; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
413; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
414; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
415; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
416; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
417; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
418; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
419; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
420; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
421; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
422; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
423; EG-DAG: 8
424; EG-DAG: 8
425; EG-DAG: 8
426; EG-DAG: 8
427; EG-DAG: 8
428; EG-DAG: 8
429; EG-DAG: 8
430; EG-DAG: 8
431; EG-DAG: 8
432; EG-DAG: 8
433; EG-DAG: 8
434; EG-DAG: 8
435; EG-DAG: 8
436; EG-DAG: 8
437; EG-DAG: 8
438; EG-DAG: 8
439; EG-DAG: 8
440; EG-DAG: 8
441; EG-DAG: 8
442; EG-DAG: 8
443; EG-DAG: 8
444; EG-DAG: 8
445; EG-DAG: 8
446; EG-DAG: 8
447; EG-DAG: 8
448; EG-DAG: 8
449; EG-DAG: 8
450; EG-DAG: 8
451; EG-DAG: 8
452; EG-DAG: 8
453define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
454  %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
455  %ext = zext <32 x i8> %load to <32 x i32>
456  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
457  ret void
458}
459
460; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32:
461
462; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
463; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1
464; TODO: These should use DST, but for some there are redundant MOVs
465; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
466; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
467; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
468; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
469; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
470; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
471; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
472; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
473; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
474; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
475; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
476; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
477; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
478; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
479; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
480; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
481; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
482; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
483; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
484; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
485; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
486; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
487; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
488; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
489; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
490; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
491; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
492; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
493; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
494; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
495; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
496; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
497; EG-DAG: 8
498; EG-DAG: 8
499; EG-DAG: 8
500; EG-DAG: 8
501; EG-DAG: 8
502; EG-DAG: 8
503; EG-DAG: 8
504; EG-DAG: 8
505; EG-DAG: 8
506; EG-DAG: 8
507; EG-DAG: 8
508; EG-DAG: 8
509; EG-DAG: 8
510; EG-DAG: 8
511; EG-DAG: 8
512; EG-DAG: 8
513; EG-DAG: 8
514; EG-DAG: 8
515; EG-DAG: 8
516; EG-DAG: 8
517; EG-DAG: 8
518; EG-DAG: 8
519; EG-DAG: 8
520; EG-DAG: 8
521; EG-DAG: 8
522; EG-DAG: 8
523; EG-DAG: 8
524; EG-DAG: 8
525; EG-DAG: 8
526; EG-DAG: 8
527; EG-DAG: 8
528; EG-DAG: 8
529define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
530  %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
531  %ext = sext <32 x i8> %load to <32 x i32>
532  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
533  ret void
534}
535
536; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32:
537
538; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
539; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
540; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
541; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
542define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
543  %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
544  %ext = zext <64 x i8> %load to <64 x i32>
545  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
546  ret void
547}
548
549; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32:
550
551; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
552; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
553; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
554; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
555define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
556  %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
557  %ext = sext <64 x i8> %load to <64 x i32>
558  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
559  ret void
560}
561
562; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64:
563; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
564
565; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
566; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
567
568; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
569; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
570
571; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
572; EG: MOV {{.*}}, 0.0
573define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
574  %a = load i8, i8 addrspace(4)* %in
575  %ext = zext i8 %a to i64
576  store i64 %ext, i64 addrspace(1)* %out
577  ret void
578}
579
580; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64:
581; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
582; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
583; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
584
585; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
586; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
587
588; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
589; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
590; TODO: Why not 7 ?
591; EG: 31
592define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
593  %a = load i8, i8 addrspace(4)* %in
594  %ext = sext i8 %a to i64
595  store i64 %ext, i64 addrspace(1)* %out
596  ret void
597}
598
599; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64:
600
601; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
602; EG: MOV {{.*}}, 0.0
603define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
604  %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
605  %ext = zext <1 x i8> %load to <1 x i64>
606  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
607  ret void
608}
609
610; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64:
611
612; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
613; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
614; TODO: Why not 7 ?
615; EG: 31
616define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
617  %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
618  %ext = sext <1 x i8> %load to <1 x i64>
619  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
620  ret void
621}
622
623; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
624
625; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
626define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
627  %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
628  %ext = zext <2 x i8> %load to <2 x i64>
629  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
630  ret void
631}
632
633; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
634
635; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
636define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
637  %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
638  %ext = sext <2 x i8> %load to <2 x i64>
639  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
640  ret void
641}
642
643; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
644
645; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
646define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
647  %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
648  %ext = zext <4 x i8> %load to <4 x i64>
649  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
650  ret void
651}
652
653; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
654
655; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
656define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
657  %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
658  %ext = sext <4 x i8> %load to <4 x i64>
659  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
660  ret void
661}
662
663; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
664
665; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
666define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
667  %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
668  %ext = zext <8 x i8> %load to <8 x i64>
669  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
670  ret void
671}
672
673; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
674
675; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
676define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
677  %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
678  %ext = sext <8 x i8> %load to <8 x i64>
679  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
680  ret void
681}
682
683; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
684
685; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
686define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
687  %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
688  %ext = zext <16 x i8> %load to <16 x i64>
689  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
690  ret void
691}
692
693; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
694
695; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
696define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
697  %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
698  %ext = sext <16 x i8> %load to <16 x i64>
699  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
700  ret void
701}
702
703; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64:
704
705; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
706; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
707define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
708  %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
709  %ext = zext <32 x i8> %load to <32 x i64>
710  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
711  ret void
712}
713
714; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64:
715
716; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
717; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
718define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
719  %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
720  %ext = sext <32 x i8> %load to <32 x i64>
721  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
722  ret void
723}
724
725; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
726; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
727;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
728;   %ext = zext <64 x i8> %load to <64 x i64>
729;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
730;   ret void
731; }
732
733; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
734; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
735;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
736;   %ext = sext <64 x i8> %load to <64 x i64>
737;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
738;   ret void
739; }
740
741; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16:
742; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
743; GCN-NOHSA: buffer_store_short v[[VAL]]
744
745; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
746; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
747define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
748  %a = load i8, i8 addrspace(4)* %in
749  %ext = zext i8 %a to i16
750  store i16 %ext, i16 addrspace(1)* %out
751  ret void
752}
753
754; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16:
755; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
756; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
757
758; GCN-NOHSA: buffer_store_short v[[VAL]]
759; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
760
761; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
762define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
763  %a = load i8, i8 addrspace(4)* %in
764  %ext = sext i8 %a to i16
765  store i16 %ext, i16 addrspace(1)* %out
766  ret void
767}
768
769; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
770define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
771  %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
772  %ext = zext <1 x i8> %load to <1 x i16>
773  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
774  ret void
775}
776
777; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16:
778
779; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
780; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
781define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
782  %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
783  %ext = sext <1 x i8> %load to <1 x i16>
784  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
785  ret void
786}
787
788; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
789
790; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
791define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
792  %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
793  %ext = zext <2 x i8> %load to <2 x i16>
794  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
795  ret void
796}
797
798; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16:
799
800; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
801; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
802; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
803define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
804  %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
805  %ext = sext <2 x i8> %load to <2 x i16>
806  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
807  ret void
808}
809
810; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
811
812; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
813define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
814  %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
815  %ext = zext <4 x i8> %load to <4 x i16>
816  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
817  ret void
818}
819
820; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16:
821
822; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
823; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
824; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
825; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
826; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
827define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
828  %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
829  %ext = sext <4 x i8> %load to <4 x i16>
830  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
831  ret void
832}
833
834; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
835
836; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
837define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
838  %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
839  %ext = zext <8 x i8> %load to <8 x i16>
840  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
841  ret void
842}
843
844; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16:
845
846; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
847; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
848; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
849; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
850; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
851; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
852; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
853; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
854; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
855
856define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
857  %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
858  %ext = sext <8 x i8> %load to <8 x i16>
859  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
860  ret void
861}
862
863; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
864
865; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
866define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
867  %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
868  %ext = zext <16 x i8> %load to <16 x i16>
869  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
870  ret void
871}
872
873; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16:
874
875; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
876; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
877; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
878; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
879; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
880; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
881; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
882; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
883; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
884; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
885; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
886; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
887; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
888; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
889; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
890; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
891; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
892define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
893  %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
894  %ext = sext <16 x i8> %load to <16 x i16>
895  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
896  ret void
897}
898
899; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16:
900
901; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
902; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
903define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
904  %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
905  %ext = zext <32 x i8> %load to <32 x i16>
906  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
907  ret void
908}
909
910; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16:
911
912; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
913; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
914; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
915; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
916; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
917; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
918; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
919; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
920; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
921; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
922; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
923; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
924; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
925; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
926; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
927; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
928; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
929; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
930; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
931; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
932; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
933; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
934; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
935; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
936; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
937; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
938; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
939; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
940; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
941; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
942; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
943; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
944; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
945; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
946define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
947  %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
948  %ext = sext <32 x i8> %load to <32 x i16>
949  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
950  ret void
951}
952
953; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
954; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
955;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
956;   %ext = zext <64 x i8> %load to <64 x i16>
957;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
958;   ret void
959; }
960
961; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
962; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
963;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
964;   %ext = sext <64 x i8> %load to <64 x i16>
965;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
966;   ret void
967; }
968
969attributes #0 = { nounwind }
970