• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
5
6; FUNC-LABEL: @bfe_i32_arg_arg_arg
7; SI: V_BFE_I32
8; EG: BFE_INT
9; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
10define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
11  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
12  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
13  ret void
14}
15
16; FUNC-LABEL: @bfe_i32_arg_arg_imm
17; SI: V_BFE_I32
18; EG: BFE_INT
19define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
20  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
21  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
22  ret void
23}
24
25; FUNC-LABEL: @bfe_i32_arg_imm_arg
26; SI: V_BFE_I32
27; EG: BFE_INT
28define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
29  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
30  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
31  ret void
32}
33
34; FUNC-LABEL: @bfe_i32_imm_arg_arg
35; SI: V_BFE_I32
36; EG: BFE_INT
37define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
38  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
39  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
40  ret void
41}
42
43; FUNC-LABEL: @v_bfe_print_arg
44; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
45define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
46  %load = load i32 addrspace(1)* %src0, align 4
47  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
48  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
49  ret void
50}
51
52; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset
53; SI-NOT: BFE
54; SI: S_ENDPGM
55; EG-NOT: BFE
56define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
57  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
58  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
59  ret void
60}
61
62; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset
63; SI-NOT: BFE
64; SI: S_ENDPGM
65; EG-NOT: BFE
66define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
67  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
68  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
69  ret void
70}
71
72; FUNC-LABEL: @bfe_i32_test_6
73; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
74; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
75; SI: S_ENDPGM
76define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
77  %x = load i32 addrspace(1)* %in, align 4
78  %shl = shl i32 %x, 31
79  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
80  store i32 %bfe, i32 addrspace(1)* %out, align 4
81  ret void
82}
83
84; FUNC-LABEL: @bfe_i32_test_7
85; SI-NOT: SHL
86; SI-NOT: BFE
87; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
88; SI: BUFFER_STORE_DWORD [[VREG]],
89; SI: S_ENDPGM
90define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
91  %x = load i32 addrspace(1)* %in, align 4
92  %shl = shl i32 %x, 31
93  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
94  store i32 %bfe, i32 addrspace(1)* %out, align 4
95  ret void
96}
97
98; FIXME: The shifts should be 1 BFE
99; FUNC-LABEL: @bfe_i32_test_8
100; SI: BUFFER_LOAD_DWORD
101; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
102; SI: S_ENDPGM
103define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
104  %x = load i32 addrspace(1)* %in, align 4
105  %shl = shl i32 %x, 31
106  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
107  store i32 %bfe, i32 addrspace(1)* %out, align 4
108  ret void
109}
110
111; FUNC-LABEL: @bfe_i32_test_9
112; SI-NOT: BFE
113; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
114; SI-NOT: BFE
115; SI: S_ENDPGM
116define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
117  %x = load i32 addrspace(1)* %in, align 4
118  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
119  store i32 %bfe, i32 addrspace(1)* %out, align 4
120  ret void
121}
122
123; FUNC-LABEL: @bfe_i32_test_10
124; SI-NOT: BFE
125; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
126; SI-NOT: BFE
127; SI: S_ENDPGM
128define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
129  %x = load i32 addrspace(1)* %in, align 4
130  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
131  store i32 %bfe, i32 addrspace(1)* %out, align 4
132  ret void
133}
134
135; FUNC-LABEL: @bfe_i32_test_11
136; SI-NOT: BFE
137; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
138; SI-NOT: BFE
139; SI: S_ENDPGM
140define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
141  %x = load i32 addrspace(1)* %in, align 4
142  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
143  store i32 %bfe, i32 addrspace(1)* %out, align 4
144  ret void
145}
146
147; FUNC-LABEL: @bfe_i32_test_12
148; SI-NOT: BFE
149; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
150; SI-NOT: BFE
151; SI: S_ENDPGM
152define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
153  %x = load i32 addrspace(1)* %in, align 4
154  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
155  store i32 %bfe, i32 addrspace(1)* %out, align 4
156  ret void
157}
158
159; FUNC-LABEL: @bfe_i32_test_13
160; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
161; SI-NOT: BFE
162; SI: S_ENDPGM
163define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
164  %x = load i32 addrspace(1)* %in, align 4
165  %shl = ashr i32 %x, 31
166  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
167  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
168}
169
170; FUNC-LABEL: @bfe_i32_test_14
171; SI-NOT: LSHR
172; SI-NOT: BFE
173; SI: S_ENDPGM
174define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
175  %x = load i32 addrspace(1)* %in, align 4
176  %shl = lshr i32 %x, 31
177  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
178  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
179}
180
181; FUNC-LABEL: @bfe_i32_constant_fold_test_0
182; SI-NOT: BFE
183; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
184; SI: BUFFER_STORE_DWORD [[VREG]],
185; SI: S_ENDPGM
186; EG-NOT: BFE
187define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
188  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
189  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
190  ret void
191}
192
193; FUNC-LABEL: @bfe_i32_constant_fold_test_1
194; SI-NOT: BFE
195; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
196; SI: BUFFER_STORE_DWORD [[VREG]],
197; SI: S_ENDPGM
198; EG-NOT: BFE
199define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
200  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
201  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
202  ret void
203}
204
205; FUNC-LABEL: @bfe_i32_constant_fold_test_2
206; SI-NOT: BFE
207; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
208; SI: BUFFER_STORE_DWORD [[VREG]],
209; SI: S_ENDPGM
210; EG-NOT: BFE
211define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
212  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
213  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
214  ret void
215}
216
217; FUNC-LABEL: @bfe_i32_constant_fold_test_3
218; SI-NOT: BFE
219; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
220; SI: BUFFER_STORE_DWORD [[VREG]],
221; SI: S_ENDPGM
222; EG-NOT: BFE
223define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
224  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
225  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
226  ret void
227}
228
229; FUNC-LABEL: @bfe_i32_constant_fold_test_4
230; SI-NOT: BFE
231; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
232; SI: BUFFER_STORE_DWORD [[VREG]],
233; SI: S_ENDPGM
234; EG-NOT: BFE
235define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
236  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
237  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
238  ret void
239}
240
241; FUNC-LABEL: @bfe_i32_constant_fold_test_5
242; SI-NOT: BFE
243; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
244; SI: BUFFER_STORE_DWORD [[VREG]],
245; SI: S_ENDPGM
246; EG-NOT: BFE
247define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
248  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
249  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
250  ret void
251}
252
253; FUNC-LABEL: @bfe_i32_constant_fold_test_6
254; SI-NOT: BFE
255; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
256; SI: BUFFER_STORE_DWORD [[VREG]],
257; SI: S_ENDPGM
258; EG-NOT: BFE
259define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
260  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
261  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
262  ret void
263}
264
265; FUNC-LABEL: @bfe_i32_constant_fold_test_7
266; SI-NOT: BFE
267; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
268; SI: BUFFER_STORE_DWORD [[VREG]],
269; SI: S_ENDPGM
270; EG-NOT: BFE
271define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
272  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
273  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
274  ret void
275}
276
277; FUNC-LABEL: @bfe_i32_constant_fold_test_8
278; SI-NOT: BFE
279; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
280; SI: BUFFER_STORE_DWORD [[VREG]],
281; SI: S_ENDPGM
282; EG-NOT: BFE
283define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
284  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
285  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
286  ret void
287}
288
289; FUNC-LABEL: @bfe_i32_constant_fold_test_9
290; SI-NOT: BFE
291; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
292; SI: BUFFER_STORE_DWORD [[VREG]],
293; SI: S_ENDPGM
294; EG-NOT: BFE
295define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
296  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
297  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
298  ret void
299}
300
301; FUNC-LABEL: @bfe_i32_constant_fold_test_10
302; SI-NOT: BFE
303; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
304; SI: BUFFER_STORE_DWORD [[VREG]],
305; SI: S_ENDPGM
306; EG-NOT: BFE
307define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
308  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
309  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
310  ret void
311}
312
313; FUNC-LABEL: @bfe_i32_constant_fold_test_11
314; SI-NOT: BFE
315; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
316; SI: BUFFER_STORE_DWORD [[VREG]],
317; SI: S_ENDPGM
318; EG-NOT: BFE
319define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
320  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
321  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
322  ret void
323}
324
325; FUNC-LABEL: @bfe_i32_constant_fold_test_12
326; SI-NOT: BFE
327; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
328; SI: BUFFER_STORE_DWORD [[VREG]],
329; SI: S_ENDPGM
330; EG-NOT: BFE
331define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
332  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
333  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
334  ret void
335}
336
337; FUNC-LABEL: @bfe_i32_constant_fold_test_13
338; SI-NOT: BFE
339; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
340; SI: BUFFER_STORE_DWORD [[VREG]],
341; SI: S_ENDPGM
342; EG-NOT: BFE
343define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
344  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
345  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
346  ret void
347}
348
349; FUNC-LABEL: @bfe_i32_constant_fold_test_14
350; SI-NOT: BFE
351; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
352; SI: BUFFER_STORE_DWORD [[VREG]],
353; SI: S_ENDPGM
354; EG-NOT: BFE
355define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
356  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
357  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
358  ret void
359}
360
361; FUNC-LABEL: @bfe_i32_constant_fold_test_15
362; SI-NOT: BFE
363; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
364; SI: BUFFER_STORE_DWORD [[VREG]],
365; SI: S_ENDPGM
366; EG-NOT: BFE
367define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
368  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
369  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
370  ret void
371}
372
373; FUNC-LABEL: @bfe_i32_constant_fold_test_16
374; SI-NOT: BFE
375; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
376; SI: BUFFER_STORE_DWORD [[VREG]],
377; SI: S_ENDPGM
378; EG-NOT: BFE
379define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
380  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
381  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
382  ret void
383}
384
385; FUNC-LABEL: @bfe_i32_constant_fold_test_17
386; SI-NOT: BFE
387; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
388; SI: BUFFER_STORE_DWORD [[VREG]],
389; SI: S_ENDPGM
390; EG-NOT: BFE
391define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
392  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
393  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
394  ret void
395}
396
397; FUNC-LABEL: @bfe_i32_constant_fold_test_18
398; SI-NOT: BFE
399; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
400; SI: BUFFER_STORE_DWORD [[VREG]],
401; SI: S_ENDPGM
402; EG-NOT: BFE
403define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
404  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
405  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
406  ret void
407}
408
409; XXX - This should really be a single BFE, but the sext_inreg of the
410; extended type i24 is never custom lowered.
411; FUNC-LABEL: @bfe_sext_in_reg_i24
412; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
413; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
414; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
415; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
416; XSI-NOT: SHL
417; XSI-NOT: SHR
418; XSI: BUFFER_STORE_DWORD [[BFE]],
419define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
420  %x = load i32 addrspace(1)* %in, align 4
421  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
422  %shl = shl i32 %bfe, 8
423  %ashr = ashr i32 %shl, 8
424  store i32 %ashr, i32 addrspace(1)* %out, align 4
425  ret void
426}
427