• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR
3; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10
11declare i8 @llvm.fshr.i8(i8, i8, i8)
12declare i16 @llvm.fshr.i16(i16, i16, i16)
13declare i32 @llvm.fshr.i32(i32, i32, i32)
14declare i64 @llvm.fshr.i64(i64, i64, i64)
15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
16
17; General case - all operands can be variables.
18
19define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
20; CHECK-LABEL: fshl_i16:
21; CHECK:       @ %bb.0:
22; CHECK-NEXT:    pkhbt r0, r1, r0, lsl #16
23; CHECK-NEXT:    and r1, r2, #15
24; CHECK-NEXT:    lsl r0, r0, r1
25; CHECK-NEXT:    lsr r0, r0, #16
26; CHECK-NEXT:    bx lr
27  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
28  ret i16 %f
29}
30
31define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
32; CHECK-LABEL: fshl_i32:
33; CHECK:       @ %bb.0:
34; CHECK-NEXT:    mov r3, #31
35; CHECK-NEXT:    lsr r1, r1, #1
36; CHECK-NEXT:    bic r3, r3, r2
37; CHECK-NEXT:    and r2, r2, #31
38; CHECK-NEXT:    lsl r0, r0, r2
39; CHECK-NEXT:    orr r0, r0, r1, lsr r3
40; CHECK-NEXT:    bx lr
41  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
42  ret i32 %f
43}
44
45; Verify that weird types are minimally supported.
46declare i37 @llvm.fshl.i37(i37, i37, i37)
47define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
48; CHECK-LABEL: fshl_i37:
49; CHECK:       @ %bb.0:
50; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
51; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
52; CHECK-NEXT:    mov r8, r1
53; CHECK-NEXT:    mov r4, r0
54; CHECK-NEXT:    ldr r0, [sp, #24]
55; CHECK-NEXT:    mov r6, r3
56; CHECK-NEXT:    ldr r1, [sp, #28]
57; CHECK-NEXT:    mov r7, r2
58; CHECK-NEXT:    mov r2, #37
59; CHECK-NEXT:    mov r3, #0
60; CHECK-NEXT:    bl __aeabi_uldivmod
61; CHECK-NEXT:    mov r0, #63
62; CHECK-NEXT:    bic r1, r0, r2
63; CHECK-NEXT:    lsl r0, r6, #27
64; CHECK-NEXT:    lsl r3, r7, #27
65; CHECK-NEXT:    orr r0, r0, r7, lsr #5
66; CHECK-NEXT:    and r2, r2, #63
67; CHECK-NEXT:    lsrs r7, r0, #1
68; CHECK-NEXT:    rrx r0, r3
69; CHECK-NEXT:    rsb r3, r1, #32
70; CHECK-NEXT:    lsr r0, r0, r1
71; CHECK-NEXT:    lsl r6, r4, r2
72; CHECK-NEXT:    orr r0, r0, r7, lsl r3
73; CHECK-NEXT:    subs r3, r1, #32
74; CHECK-NEXT:    lsr r1, r7, r1
75; CHECK-NEXT:    lsrpl r0, r7, r3
76; CHECK-NEXT:    subs r5, r2, #32
77; CHECK-NEXT:    movwpl r6, #0
78; CHECK-NEXT:    orr r0, r6, r0
79; CHECK-NEXT:    rsb r6, r2, #32
80; CHECK-NEXT:    cmp r5, #0
81; CHECK-NEXT:    lsr r6, r4, r6
82; CHECK-NEXT:    orr r2, r6, r8, lsl r2
83; CHECK-NEXT:    lslpl r2, r4, r5
84; CHECK-NEXT:    cmp r3, #0
85; CHECK-NEXT:    movwpl r1, #0
86; CHECK-NEXT:    orr r1, r2, r1
87; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
88  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
89  ret i37 %f
90}
91
92; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
93
94declare i7 @llvm.fshl.i7(i7, i7, i7)
95define i7 @fshl_i7_const_fold() {
96; CHECK-LABEL: fshl_i7_const_fold:
97; CHECK:       @ %bb.0:
98; CHECK-NEXT:    mov r0, #67
99; CHECK-NEXT:    bx lr
100  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
101  ret i7 %f
102}
103
104define i8 @fshl_i8_const_fold_overshift_1() {
105; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
106; CHECK:       @ %bb.0:
107; CHECK-NEXT:    mov r0, #128
108; CHECK-NEXT:    bx lr
109  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
110  ret i8 %f
111}
112
113define i8 @fshl_i8_const_fold_overshift_2() {
114; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
115; CHECK:       @ %bb.0:
116; CHECK-NEXT:    mov r0, #120
117; CHECK-NEXT:    bx lr
118  %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
119  ret i8 %f
120}
121
122define i8 @fshl_i8_const_fold_overshift_3() {
123; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
124; CHECK:       @ %bb.0:
125; CHECK-NEXT:    mov r0, #0
126; CHECK-NEXT:    bx lr
127  %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
128  ret i8 %f
129}
130
131; With constant shift amount, this is 'extr'.
132
133define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
134; CHECK-LABEL: fshl_i32_const_shift:
135; CHECK:       @ %bb.0:
136; CHECK-NEXT:    lsl r0, r0, #9
137; CHECK-NEXT:    orr r0, r0, r1, lsr #23
138; CHECK-NEXT:    bx lr
139  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
140  ret i32 %f
141}
142
143; Check modulo math on shift amount.
144
145define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
146; CHECK-LABEL: fshl_i32_const_overshift:
147; CHECK:       @ %bb.0:
148; CHECK-NEXT:    lsl r0, r0, #9
149; CHECK-NEXT:    orr r0, r0, r1, lsr #23
150; CHECK-NEXT:    bx lr
151  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
152  ret i32 %f
153}
154
155; 64-bit should also work.
156
157define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
158; CHECK-LABEL: fshl_i64_const_overshift:
159; CHECK:       @ %bb.0:
160; CHECK-NEXT:    lsr r1, r2, #23
161; CHECK-NEXT:    orr r2, r1, r3, lsl #9
162; CHECK-NEXT:    lsl r0, r0, #9
163; CHECK-NEXT:    orr r1, r0, r3, lsr #23
164; CHECK-NEXT:    mov r0, r2
165; CHECK-NEXT:    bx lr
166  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
167  ret i64 %f
168}
169
170; This should work without any node-specific logic.
171
172define i8 @fshl_i8_const_fold() {
173; CHECK-LABEL: fshl_i8_const_fold:
174; CHECK:       @ %bb.0:
175; CHECK-NEXT:    mov r0, #128
176; CHECK-NEXT:    bx lr
177  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
178  ret i8 %f
179}
180
181; Repeat everything for funnel shift right.
182
183; General case - all operands can be variables.
184
185define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
186; CHECK-LABEL: fshr_i16:
187; CHECK:       @ %bb.0:
188; CHECK-NEXT:    pkhbt r0, r1, r0, lsl #16
189; CHECK-NEXT:    and r1, r2, #15
190; CHECK-NEXT:    lsr r0, r0, r1
191; CHECK-NEXT:    bx lr
192  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
193  ret i16 %f
194}
195
196define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
197; CHECK-LABEL: fshr_i32:
198; CHECK:       @ %bb.0:
199; CHECK-NEXT:    mov r3, #31
200; CHECK-NEXT:    lsl r0, r0, #1
201; CHECK-NEXT:    bic r3, r3, r2
202; CHECK-NEXT:    and r2, r2, #31
203; CHECK-NEXT:    lsl r0, r0, r3
204; CHECK-NEXT:    orr r0, r0, r1, lsr r2
205; CHECK-NEXT:    bx lr
206  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
207  ret i32 %f
208}
209
210; Verify that weird types are minimally supported.
211declare i37 @llvm.fshr.i37(i37, i37, i37)
212define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
213; CHECK-LABEL: fshr_i37:
214; CHECK:       @ %bb.0:
215; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
216; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
217; CHECK-NEXT:    mov r8, r1
218; CHECK-NEXT:    mov r9, r0
219; CHECK-NEXT:    ldr r0, [sp, #32]
220; CHECK-NEXT:    mov r6, r3
221; CHECK-NEXT:    ldr r1, [sp, #36]
222; CHECK-NEXT:    mov r7, r2
223; CHECK-NEXT:    mov r2, #37
224; CHECK-NEXT:    mov r3, #0
225; CHECK-NEXT:    bl __aeabi_uldivmod
226; CHECK-NEXT:    add r0, r2, #27
227; CHECK-NEXT:    lsl r6, r6, #27
228; CHECK-NEXT:    and r1, r0, #63
229; CHECK-NEXT:    lsl r2, r7, #27
230; CHECK-NEXT:    orr r7, r6, r7, lsr #5
231; CHECK-NEXT:    mov r6, #63
232; CHECK-NEXT:    rsb r3, r1, #32
233; CHECK-NEXT:    lsr r2, r2, r1
234; CHECK-NEXT:    subs r12, r1, #32
235; CHECK-NEXT:    bic r6, r6, r0
236; CHECK-NEXT:    orr r2, r2, r7, lsl r3
237; CHECK-NEXT:    lsl r5, r9, #1
238; CHECK-NEXT:    lsrpl r2, r7, r12
239; CHECK-NEXT:    lsl r0, r5, r6
240; CHECK-NEXT:    subs r4, r6, #32
241; CHECK-NEXT:    lsl r3, r8, #1
242; CHECK-NEXT:    movwpl r0, #0
243; CHECK-NEXT:    orr r3, r3, r9, lsr #31
244; CHECK-NEXT:    orr r0, r0, r2
245; CHECK-NEXT:    rsb r2, r6, #32
246; CHECK-NEXT:    cmp r4, #0
247; CHECK-NEXT:    lsr r1, r7, r1
248; CHECK-NEXT:    lsr r2, r5, r2
249; CHECK-NEXT:    orr r2, r2, r3, lsl r6
250; CHECK-NEXT:    lslpl r2, r5, r4
251; CHECK-NEXT:    cmp r12, #0
252; CHECK-NEXT:    movwpl r1, #0
253; CHECK-NEXT:    orr r1, r2, r1
254; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
255  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
256  ret i37 %f
257}
258
259; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
260
261declare i7 @llvm.fshr.i7(i7, i7, i7)
262define i7 @fshr_i7_const_fold() {
263; CHECK-LABEL: fshr_i7_const_fold:
264; CHECK:       @ %bb.0:
265; CHECK-NEXT:    mov r0, #31
266; CHECK-NEXT:    bx lr
267  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
268  ret i7 %f
269}
270
271define i8 @fshr_i8_const_fold_overshift_1() {
272; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
273; CHECK:       @ %bb.0:
274; CHECK-NEXT:    mov r0, #254
275; CHECK-NEXT:    bx lr
276  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
277  ret i8 %f
278}
279
280define i8 @fshr_i8_const_fold_overshift_2() {
281; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
282; CHECK:       @ %bb.0:
283; CHECK-NEXT:    mov r0, #225
284; CHECK-NEXT:    bx lr
285  %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
286  ret i8 %f
287}
288
289define i8 @fshr_i8_const_fold_overshift_3() {
290; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
291; CHECK:       @ %bb.0:
292; CHECK-NEXT:    mov r0, #255
293; CHECK-NEXT:    bx lr
294  %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
295  ret i8 %f
296}
297
298; With constant shift amount, this is 'extr'.
299
300define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
301; CHECK-LABEL: fshr_i32_const_shift:
302; CHECK:       @ %bb.0:
303; CHECK-NEXT:    lsl r0, r0, #23
304; CHECK-NEXT:    orr r0, r0, r1, lsr #9
305; CHECK-NEXT:    bx lr
306  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
307  ret i32 %f
308}
309
310; Check modulo math on shift amount. 41-32=9.
311
312define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
313; CHECK-LABEL: fshr_i32_const_overshift:
314; CHECK:       @ %bb.0:
315; CHECK-NEXT:    lsl r0, r0, #23
316; CHECK-NEXT:    orr r0, r0, r1, lsr #9
317; CHECK-NEXT:    bx lr
318  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
319  ret i32 %f
320}
321
322; 64-bit should also work. 105-64 = 41.
323
324define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
325; CHECK-LABEL: fshr_i64_const_overshift:
326; CHECK:       @ %bb.0:
327; CHECK-NEXT:    lsl r2, r0, #23
328; CHECK-NEXT:    lsl r1, r1, #23
329; CHECK-NEXT:    orr r2, r2, r3, lsr #9
330; CHECK-NEXT:    orr r1, r1, r0, lsr #9
331; CHECK-NEXT:    mov r0, r2
332; CHECK-NEXT:    bx lr
333  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
334  ret i64 %f
335}
336
337; This should work without any node-specific logic.
338
339define i8 @fshr_i8_const_fold() {
340; CHECK-LABEL: fshr_i8_const_fold:
341; CHECK:       @ %bb.0:
342; CHECK-NEXT:    mov r0, #254
343; CHECK-NEXT:    bx lr
344  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
345  ret i8 %f
346}
347
348define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
349; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
350; CHECK:       @ %bb.0:
351; CHECK-NEXT:    bx lr
352  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
353  ret i32 %f
354}
355
356define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
357; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
358; CHECK:       @ %bb.0:
359; CHECK-NEXT:    mov r0, r1
360; CHECK-NEXT:    bx lr
361  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
362  ret i32 %f
363}
364
365define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
366; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
367; CHECK:       @ %bb.0:
368; CHECK-NEXT:    bx lr
369  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
370  ret <4 x i32> %f
371}
372
373define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
374; SCALAR-LABEL: fshr_v4i32_shift_by_bitwidth:
375; SCALAR:       @ %bb.0:
376; SCALAR-NEXT:    ldm sp, {r0, r1, r2, r3}
377; SCALAR-NEXT:    bx lr
378;
379; NEON-LABEL: fshr_v4i32_shift_by_bitwidth:
380; NEON:       @ %bb.0:
381; NEON-NEXT:    mov r0, sp
382; NEON-NEXT:    vld1.64 {d16, d17}, [r0]
383; NEON-NEXT:    vmov r0, r1, d16
384; NEON-NEXT:    vmov r2, r3, d17
385; NEON-NEXT:    bx lr
386  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
387  ret <4 x i32> %f
388}
389
390