• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3
4declare i8 @llvm.fshl.i8(i8, i8, i8)
5declare i16 @llvm.fshl.i16(i16, i16, i16)
6declare i32 @llvm.fshl.i32(i32, i32, i32)
7declare i64 @llvm.fshl.i64(i64, i64, i64)
8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
9
10declare i8 @llvm.fshr.i8(i8, i8, i8)
11declare i16 @llvm.fshr.i16(i16, i16, i16)
12declare i32 @llvm.fshr.i32(i32, i32, i32)
13declare i64 @llvm.fshr.i64(i64, i64, i64)
14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
15
16; General case - all operands can be variables.
17
18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
19; CHECK-LABEL: fshl_i32:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
22; CHECK-NEXT:    mvn w9, w2
23; CHECK-NEXT:    lsr w10, w1, #1
24; CHECK-NEXT:    lsl w8, w0, w2
25; CHECK-NEXT:    lsr w9, w10, w9
26; CHECK-NEXT:    orr w0, w8, w9
27; CHECK-NEXT:    ret
28  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
29  ret i32 %f
30}
31
32define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
33; CHECK-LABEL: fshl_i64:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    mvn w9, w2
36; CHECK-NEXT:    lsr x10, x1, #1
37; CHECK-NEXT:    lsl x8, x0, x2
38; CHECK-NEXT:    lsr x9, x10, x9
39; CHECK-NEXT:    orr x0, x8, x9
40; CHECK-NEXT:    ret
41  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
42  ret i64 %f
43}
44
45; Verify that weird types are minimally supported.
46declare i37 @llvm.fshl.i37(i37, i37, i37)
47define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
48; CHECK-LABEL: fshl_i37:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    mov x8, #31883
51; CHECK-NEXT:    movk x8, #3542, lsl #16
52; CHECK-NEXT:    movk x8, #51366, lsl #32
53; CHECK-NEXT:    movk x8, #56679, lsl #48
54; CHECK-NEXT:    umulh x8, x2, x8
55; CHECK-NEXT:    mov w9, #37
56; CHECK-NEXT:    ubfx x8, x8, #5, #27
57; CHECK-NEXT:    msub w8, w8, w9, w2
58; CHECK-NEXT:    lsl x9, x0, x8
59; CHECK-NEXT:    mvn w8, w8
60; CHECK-NEXT:    ubfiz x10, x1, #26, #37
61; CHECK-NEXT:    lsr x8, x10, x8
62; CHECK-NEXT:    orr x0, x9, x8
63; CHECK-NEXT:    ret
64  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
65  ret i37 %f
66}
67
68; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
69
70declare i7 @llvm.fshl.i7(i7, i7, i7)
71define i7 @fshl_i7_const_fold() {
72; CHECK-LABEL: fshl_i7_const_fold:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    mov w0, #67
75; CHECK-NEXT:    ret
76  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
77  ret i7 %f
78}
79
80define i8 @fshl_i8_const_fold_overshift_1() {
81; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    mov w0, #128
84; CHECK-NEXT:    ret
85  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
86  ret i8 %f
87}
88
89define i8 @fshl_i8_const_fold_overshift_2() {
90; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    mov w0, #120
93; CHECK-NEXT:    ret
94  %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
95  ret i8 %f
96}
97
98define i8 @fshl_i8_const_fold_overshift_3() {
99; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    mov w0, wzr
102; CHECK-NEXT:    ret
103  %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
104  ret i8 %f
105}
106
107; With constant shift amount, this is 'extr'.
108
109define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
110; CHECK-LABEL: fshl_i32_const_shift:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    extr w0, w0, w1, #23
113; CHECK-NEXT:    ret
114  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
115  ret i32 %f
116}
117
118; Check modulo math on shift amount.
119
120define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
121; CHECK-LABEL: fshl_i32_const_overshift:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    extr w0, w0, w1, #23
124; CHECK-NEXT:    ret
125  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
126  ret i32 %f
127}
128
129; 64-bit should also work.
130
131define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
132; CHECK-LABEL: fshl_i64_const_overshift:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    extr x0, x0, x1, #23
135; CHECK-NEXT:    ret
136  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
137  ret i64 %f
138}
139
140; This should work without any node-specific logic.
141
142define i8 @fshl_i8_const_fold() {
143; CHECK-LABEL: fshl_i8_const_fold:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    mov w0, #128
146; CHECK-NEXT:    ret
147  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
148  ret i8 %f
149}
150
151; Repeat everything for funnel shift right.
152
153; General case - all operands can be variables.
154
155define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
156; CHECK-LABEL: fshr_i32:
157; CHECK:       // %bb.0:
158; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
159; CHECK-NEXT:    mvn w9, w2
160; CHECK-NEXT:    lsl w10, w0, #1
161; CHECK-NEXT:    lsr w8, w1, w2
162; CHECK-NEXT:    lsl w9, w10, w9
163; CHECK-NEXT:    orr w0, w9, w8
164; CHECK-NEXT:    ret
165  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
166  ret i32 %f
167}
168
169define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
170; CHECK-LABEL: fshr_i64:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    mvn w9, w2
173; CHECK-NEXT:    lsl x10, x0, #1
174; CHECK-NEXT:    lsr x8, x1, x2
175; CHECK-NEXT:    lsl x9, x10, x9
176; CHECK-NEXT:    orr x0, x9, x8
177; CHECK-NEXT:    ret
178  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
179  ret i64 %f
180}
181
182; Verify that weird types are minimally supported.
183declare i37 @llvm.fshr.i37(i37, i37, i37)
184define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
185; CHECK-LABEL: fshr_i37:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    mov x8, #31883
188; CHECK-NEXT:    movk x8, #3542, lsl #16
189; CHECK-NEXT:    movk x8, #51366, lsl #32
190; CHECK-NEXT:    movk x8, #56679, lsl #48
191; CHECK-NEXT:    umulh x8, x2, x8
192; CHECK-NEXT:    mov w9, #37
193; CHECK-NEXT:    lsr x8, x8, #5
194; CHECK-NEXT:    msub w8, w8, w9, w2
195; CHECK-NEXT:    lsl x10, x1, #27
196; CHECK-NEXT:    add w8, w8, #27 // =27
197; CHECK-NEXT:    lsr x9, x10, x8
198; CHECK-NEXT:    mvn w8, w8
199; CHECK-NEXT:    lsl x10, x0, #1
200; CHECK-NEXT:    lsl x8, x10, x8
201; CHECK-NEXT:    orr x0, x8, x9
202; CHECK-NEXT:    ret
203  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
204  ret i37 %f
205}
206
207; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
208
209declare i7 @llvm.fshr.i7(i7, i7, i7)
210define i7 @fshr_i7_const_fold() {
211; CHECK-LABEL: fshr_i7_const_fold:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    mov w0, #31
214; CHECK-NEXT:    ret
215  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
216  ret i7 %f
217}
218
219define i8 @fshr_i8_const_fold_overshift_1() {
220; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    mov w0, #254
223; CHECK-NEXT:    ret
224  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
225  ret i8 %f
226}
227
228define i8 @fshr_i8_const_fold_overshift_2() {
229; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    mov w0, #225
232; CHECK-NEXT:    ret
233  %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
234  ret i8 %f
235}
236
237define i8 @fshr_i8_const_fold_overshift_3() {
238; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
239; CHECK:       // %bb.0:
240; CHECK-NEXT:    mov w0, #255
241; CHECK-NEXT:    ret
242  %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
243  ret i8 %f
244}
245
246; With constant shift amount, this is 'extr'.
247
248define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
249; CHECK-LABEL: fshr_i32_const_shift:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    extr w0, w0, w1, #9
252; CHECK-NEXT:    ret
253  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
254  ret i32 %f
255}
256
257; Check modulo math on shift amount. 41-32=9.
258
259define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
260; CHECK-LABEL: fshr_i32_const_overshift:
261; CHECK:       // %bb.0:
262; CHECK-NEXT:    extr w0, w0, w1, #9
263; CHECK-NEXT:    ret
264  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
265  ret i32 %f
266}
267
268; 64-bit should also work. 105-64 = 41.
269
270define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
271; CHECK-LABEL: fshr_i64_const_overshift:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    extr x0, x0, x1, #41
274; CHECK-NEXT:    ret
275  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
276  ret i64 %f
277}
278
279; This should work without any node-specific logic.
280
281define i8 @fshr_i8_const_fold() {
282; CHECK-LABEL: fshr_i8_const_fold:
283; CHECK:       // %bb.0:
284; CHECK-NEXT:    mov w0, #254
285; CHECK-NEXT:    ret
286  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
287  ret i8 %f
288}
289
290define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
291; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    ret
294  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
295  ret i32 %f
296}
297
298define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
299; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
300; CHECK:       // %bb.0:
301; CHECK-NEXT:    mov w0, w1
302; CHECK-NEXT:    ret
303  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
304  ret i32 %f
305}
306
307define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
308; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    ret
311  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
312  ret <4 x i32> %f
313}
314
315define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
316; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
317; CHECK:       // %bb.0:
318; CHECK-NEXT:    mov v0.16b, v1.16b
319; CHECK-NEXT:    ret
320  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
321  ret <4 x i32> %f
322}
323
324