• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
5
6
7; FUNC-LABEL: @sext_in_reg_i1_i32
8; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
9; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
10; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
11; SI: BUFFER_STORE_DWORD [[EXTRACT]],
12
13; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
14; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
15; EG-NEXT: LSHR * [[ADDR]]
16define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
17  %shl = shl i32 %in, 31
18  %sext = ashr i32 %shl, 31
19  store i32 %sext, i32 addrspace(1)* %out
20  ret void
21}
22
23; FUNC-LABEL: @sext_in_reg_i8_to_i32
24; SI: S_ADD_I32 [[VAL:s[0-9]+]],
25; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
26; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
27; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
28
29; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
30; EG: ADD_INT
31; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
32; EG-NEXT: LSHR * [[ADDR]]
33define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
34  %c = add i32 %a, %b ; add to prevent folding into extload
35  %shl = shl i32 %c, 24
36  %ashr = ashr i32 %shl, 24
37  store i32 %ashr, i32 addrspace(1)* %out, align 4
38  ret void
39}
40
41; FUNC-LABEL: @sext_in_reg_i16_to_i32
42; SI: S_ADD_I32 [[VAL:s[0-9]+]],
43; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
44; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
45; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
46
47; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
48; EG: ADD_INT
49; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
50; EG-NEXT: LSHR * [[ADDR]]
51define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
52  %c = add i32 %a, %b ; add to prevent folding into extload
53  %shl = shl i32 %c, 16
54  %ashr = ashr i32 %shl, 16
55  store i32 %ashr, i32 addrspace(1)* %out, align 4
56  ret void
57}
58
59; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
60; SI: S_ADD_I32 [[VAL:s[0-9]+]],
61; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
62; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
63; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
64
65; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
66; EG: ADD_INT
67; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
68; EG-NEXT: LSHR * [[ADDR]]
69define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
70  %c = add <1 x i32> %a, %b ; add to prevent folding into extload
71  %shl = shl <1 x i32> %c, <i32 24>
72  %ashr = ashr <1 x i32> %shl, <i32 24>
73  store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
74  ret void
75}
76
77; FUNC-LABEL: @sext_in_reg_i1_to_i64
78; SI: S_ADD_I32 [[VAL:s[0-9]+]],
79; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
80; SI: S_MOV_B32 {{s[0-9]+}}, -1
81; SI: BUFFER_STORE_DWORDX2
82define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
83  %c = add i64 %a, %b
84  %shl = shl i64 %c, 63
85  %ashr = ashr i64 %shl, 63
86  store i64 %ashr, i64 addrspace(1)* %out, align 8
87  ret void
88}
89
90; FUNC-LABEL: @sext_in_reg_i8_to_i64
91; SI: S_ADD_I32 [[VAL:s[0-9]+]],
92; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
93; SI: S_MOV_B32 {{s[0-9]+}}, -1
94; SI: BUFFER_STORE_DWORDX2
95
96; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
97; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
98; EG: ADD_INT
99; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
100; EG: ASHR [[RES_HI]]
101; EG-NOT: BFE_INT
102; EG: LSHR
103; EG: LSHR
104;; TODO Check address computation, using | with variables in {{}} does not work,
105;; also the _LO/_HI order might be different
106define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
107  %c = add i64 %a, %b
108  %shl = shl i64 %c, 56
109  %ashr = ashr i64 %shl, 56
110  store i64 %ashr, i64 addrspace(1)* %out, align 8
111  ret void
112}
113
114; FUNC-LABEL: @sext_in_reg_i16_to_i64
115; SI: S_ADD_I32 [[VAL:s[0-9]+]],
116; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
117; SI: S_MOV_B32 {{s[0-9]+}}, -1
118; SI: BUFFER_STORE_DWORDX2
119
120; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
121; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
122; EG: ADD_INT
123; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
124; EG: ASHR [[RES_HI]]
125; EG-NOT: BFE_INT
126; EG: LSHR
127; EG: LSHR
128;; TODO Check address computation, using | with variables in {{}} does not work,
129;; also the _LO/_HI order might be different
130define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
131  %c = add i64 %a, %b
132  %shl = shl i64 %c, 48
133  %ashr = ashr i64 %shl, 48
134  store i64 %ashr, i64 addrspace(1)* %out, align 8
135  ret void
136}
137
138; FUNC-LABEL: @sext_in_reg_i32_to_i64
139; SI: S_LOAD_DWORD
140; SI: S_LOAD_DWORD
141; SI: S_ADD_I32 [[ADD:s[0-9]+]],
142; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
143; SI: BUFFER_STORE_DWORDX2
144
145; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
146; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
147; EG-NOT: BFE_INT
148; EG: ADD_INT {{\*?}} [[RES_LO]]
149; EG: ASHR [[RES_HI]]
150; EG: ADD_INT
151; EG: LSHR
152; EG: LSHR
153;; TODO Check address computation, using | with variables in {{}} does not work,
154;; also the _LO/_HI order might be different
155define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
156  %c = add i64 %a, %b
157  %shl = shl i64 %c, 32
158  %ashr = ashr i64 %shl, 32
159  store i64 %ashr, i64 addrspace(1)* %out, align 8
160  ret void
161}
162
163; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
164; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
165; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
166; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
167; XSI: BUFFER_STORE_DWORD
168; XEG: BFE_INT
169; XEG: ASHR
170; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
171;   %c = add <1 x i64> %a, %b
172;   %shl = shl <1 x i64> %c, <i64 56>
173;   %ashr = ashr <1 x i64> %shl, <i64 56>
174;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
175;   ret void
176; }
177
178; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount
179; SI-NOT: BFE
180; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
181; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
182
183; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
184; EG-NOT: BFE
185; EG: ADD_INT
186; EG: LSHL
187; EG: ASHR [[RES]]
188; EG: LSHR {{\*?}} [[ADDR]]
189define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
190  %c = add i32 %a, %b
191  %x = shl i32 %c, 6
192  %y = ashr i32 %x, 7
193  store i32 %y, i32 addrspace(1)* %out
194  ret void
195}
196
197; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount
198; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
199; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
200; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
201; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
202
203; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
204; EG-NOT: BFE
205; EG: ADD_INT
206; EG: LSHL
207; EG: ASHR [[RES]]
208; EG: LSHL
209; EG: ASHR [[RES]]
210; EG: LSHR {{\*?}} [[ADDR]]
211define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
212  %c = add <2 x i32> %a, %b
213  %x = shl <2 x i32> %c, <i32 6, i32 6>
214  %y = ashr <2 x i32> %x, <i32 7, i32 7>
215  store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
216  ret void
217}
218
219
220; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
221; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
222; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
223; SI: BUFFER_STORE_DWORDX2
224
225; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
226; EG: BFE_INT [[RES]]
227; EG: BFE_INT [[RES]]
228; EG: LSHR {{\*?}} [[ADDR]]
229define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
230  %c = add <2 x i32> %a, %b ; add to prevent folding into extload
231  %shl = shl <2 x i32> %c, <i32 31, i32 31>
232  %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
233  store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
234  ret void
235}
236
237; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
238; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
239; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
240; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
241; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
242; SI: BUFFER_STORE_DWORDX4
243
244; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
245; EG: BFE_INT [[RES]]
246; EG: BFE_INT [[RES]]
247; EG: BFE_INT [[RES]]
248; EG: BFE_INT [[RES]]
249; EG: LSHR {{\*?}} [[ADDR]]
250define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
251  %c = add <4 x i32> %a, %b ; add to prevent folding into extload
252  %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
253  %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
254  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
255  ret void
256}
257
258; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
259; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
260; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
261; SI: BUFFER_STORE_DWORDX2
262
263; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
264; EG: BFE_INT [[RES]]
265; EG: BFE_INT [[RES]]
266; EG: LSHR {{\*?}} [[ADDR]]
267define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
268  %c = add <2 x i32> %a, %b ; add to prevent folding into extload
269  %shl = shl <2 x i32> %c, <i32 24, i32 24>
270  %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
271  store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
272  ret void
273}
274
275; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
276; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
277; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
278; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
279; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
280; SI: BUFFER_STORE_DWORDX4
281
282; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
283; EG: BFE_INT [[RES]]
284; EG: BFE_INT [[RES]]
285; EG: BFE_INT [[RES]]
286; EG: BFE_INT [[RES]]
287; EG: LSHR {{\*?}} [[ADDR]]
288define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
289  %c = add <4 x i32> %a, %b ; add to prevent folding into extload
290  %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
291  %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
292  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
293  ret void
294}
295
296; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
297; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
298; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
299; SI: BUFFER_STORE_DWORDX2
300
301; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
302; EG: BFE_INT [[RES]]
303; EG: BFE_INT [[RES]]
304; EG: LSHR {{\*?}} [[ADDR]]
305define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
306  %c = add <2 x i32> %a, %b ; add to prevent folding into extload
307  %shl = shl <2 x i32> %c, <i32 16, i32 16>
308  %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
309  store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
310  ret void
311}
312
313; FUNC-LABEL: @testcase
314define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
315  %and_a_1 = and i8 %a, 1
316  %cmp_eq = icmp eq i8 %and_a_1, 0
317  %cmp_slt = icmp slt i8 %a, 0
318  %sel0 = select i1 %cmp_slt, i8 0, i8 %a
319  %sel1 = select i1 %cmp_eq, i8 0, i8 %a
320  %xor = xor i8 %sel0, %sel1
321  store i8 %xor, i8 addrspace(1)* %out
322  ret void
323}
324
325; FUNC-LABEL: @testcase_3
326define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
327  %and_a_1 = and i8 %a, 1
328  %cmp_eq = icmp eq i8 %and_a_1, 0
329  %cmp_slt = icmp slt i8 %a, 0
330  %sel0 = select i1 %cmp_slt, i8 0, i8 %a
331  %sel1 = select i1 %cmp_eq, i8 0, i8 %a
332  %xor = xor i8 %sel0, %sel1
333  store i8 %xor, i8 addrspace(1)* %out
334  ret void
335}
336
337; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
338; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
339; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
340; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
341; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
342define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
343  %loada = load <4 x i32> addrspace(1)* %a, align 16
344  %loadb = load <4 x i32> addrspace(1)* %b, align 16
345  %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
346  %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
347  %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
348  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
349  ret void
350}
351
352; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
353; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
354; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
355define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
356  %loada = load <4 x i32> addrspace(1)* %a, align 16
357  %loadb = load <4 x i32> addrspace(1)* %b, align 16
358  %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
359  %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
360  %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
361  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
362  ret void
363}
364
365; FIXME: The BFE should really be eliminated. I think it should happen
366; when computeKnownBitsForTargetNode is implemented for imax.
367
368; FUNC-LABEL: @sext_in_reg_to_illegal_type
369; SI: BUFFER_LOAD_SBYTE
370; SI: V_MAX_I32
371; SI: V_BFE_I32
372; SI: BUFFER_STORE_SHORT
373define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
374  %tmp5 = load i8 addrspace(1)* %src, align 1
375  %tmp2 = sext i8 %tmp5 to i32
376  %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
377  %tmp4 = trunc i32 %tmp3 to i8
378  %tmp6 = sext i8 %tmp4 to i16
379  store i16 %tmp6, i16 addrspace(1)* %out, align 2
380  ret void
381}
382
383declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
384
385; FUNC-LABEL: @bfe_0_width
386; SI-NOT: BFE
387; SI: S_ENDPGM
388define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
389  %load = load i32 addrspace(1)* %ptr, align 4
390  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
391  store i32 %bfe, i32 addrspace(1)* %out, align 4
392  ret void
393}
394
395; FUNC-LABEL: @bfe_8_bfe_8
396; SI: V_BFE_I32
397; SI-NOT: BFE
398; SI: S_ENDPGM
399define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
400  %load = load i32 addrspace(1)* %ptr, align 4
401  %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
402  %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
403  store i32 %bfe1, i32 addrspace(1)* %out, align 4
404  ret void
405}
406
407; FUNC-LABEL: @bfe_8_bfe_16
408; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
409; SI: S_ENDPGM
410define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
411  %load = load i32 addrspace(1)* %ptr, align 4
412  %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
413  %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
414  store i32 %bfe1, i32 addrspace(1)* %out, align 4
415  ret void
416}
417
418; This really should be folded into 1
419; FUNC-LABEL: @bfe_16_bfe_8
420; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
421; SI-NOT: BFE
422; SI: S_ENDPGM
423define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
424  %load = load i32 addrspace(1)* %ptr, align 4
425  %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
426  %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
427  store i32 %bfe1, i32 addrspace(1)* %out, align 4
428  ret void
429}
430
431; Make sure there isn't a redundant BFE
432; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
433; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
434; SI-NOT: BFE
435; SI: S_ENDPGM
436define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
437  %c = add i32 %a, %b ; add to prevent folding into extload
438  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
439  %shl = shl i32 %bfe, 24
440  %ashr = ashr i32 %shl, 24
441  store i32 %ashr, i32 addrspace(1)* %out, align 4
442  ret void
443}
444
445; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
446define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
447  %c = add i32 %a, %b ; add to prevent folding into extload
448  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
449  %shl = shl i32 %bfe, 24
450  %ashr = ashr i32 %shl, 24
451  store i32 %ashr, i32 addrspace(1)* %out, align 4
452  ret void
453}
454
455; FUNC-LABEL: @sextload_i8_to_i32_bfe
456; SI: BUFFER_LOAD_SBYTE
457; SI-NOT: BFE
458; SI: S_ENDPGM
459define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
460  %load = load i8 addrspace(1)* %ptr, align 1
461  %sext = sext i8 %load to i32
462  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
463  %shl = shl i32 %bfe, 24
464  %ashr = ashr i32 %shl, 24
465  store i32 %ashr, i32 addrspace(1)* %out, align 4
466  ret void
467}
468
469; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
470; SI-NOT: BFE
471; SI: S_ENDPGM
472define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
473  %load = load i8 addrspace(1)* %ptr, align 1
474  %sext = sext i8 %load to i32
475  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
476  %shl = shl i32 %bfe, 24
477  %ashr = ashr i32 %shl, 24
478  store i32 %ashr, i32 addrspace(1)* %out, align 4
479  ret void
480}
481
482; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
483; SI-NOT: SHR
484; SI-NOT: SHL
485; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
486; SI: S_ENDPGM
487define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
488  %x = load i32 addrspace(1)* %in, align 4
489  %shl = shl i32 %x, 31
490  %shr = ashr i32 %shl, 31
491  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
492  store i32 %bfe, i32 addrspace(1)* %out, align 4
493  ret void
494}
495
496; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
497; SI: BUFFER_LOAD_DWORD
498; SI-NOT: SHL
499; SI-NOT: SHR
500; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
501; SI: S_ENDPGM
502define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
503  %x = load i32 addrspace(1)* %in, align 4
504  %shl = shl i32 %x, 30
505  %shr = ashr i32 %shl, 30
506  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
507  store i32 %bfe, i32 addrspace(1)* %out, align 4
508  ret void
509}
510
511; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
512; SI: BUFFER_LOAD_DWORD
513; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
514; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
515; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
516; SI: S_ENDPGM
517define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
518  %x = load i32 addrspace(1)* %in, align 4
519  %shl = shl i32 %x, 30
520  %shr = ashr i32 %shl, 30
521  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
522  store i32 %bfe, i32 addrspace(1)* %out, align 4
523  ret void
524}
525