• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s
3
4declare i8 @llvm.fshl.i8(i8, i8, i8)
5declare i16 @llvm.fshl.i16(i16, i16, i16)
6declare i32 @llvm.fshl.i32(i32, i32, i32)
7declare i64 @llvm.fshl.i64(i64, i64, i64)
8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
9
10declare i8 @llvm.fshr.i8(i8, i8, i8)
11declare i16 @llvm.fshr.i16(i16, i16, i16)
12declare i32 @llvm.fshr.i32(i32, i32, i32)
13declare i64 @llvm.fshr.i64(i64, i64, i64)
14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
15
16; General case - all operands can be variables.
17
18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
19; CHECK-LABEL: fshl_i32:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    subfic 6, 5, 32
22; CHECK-NEXT:    andi. 5, 5, 31
23; CHECK-NEXT:    clrlwi 6, 6, 27
24; CHECK-NEXT:    slw 5, 3, 5
25; CHECK-NEXT:    srw 4, 4, 6
26; CHECK-NEXT:    or 4, 5, 4
27; CHECK-NEXT:    isel 3, 3, 4, 2
28; CHECK-NEXT:    blr
29  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
30  ret i32 %f
31}
32
33; Verify that weird types are minimally supported.
34declare i37 @llvm.fshl.i37(i37, i37, i37)
35define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
36; CHECK-LABEL: fshl_i37:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    lis 6, -8857
39; CHECK-NEXT:    subfic 7, 5, 37
40; CHECK-NEXT:    clrldi 5, 5, 27
41; CHECK-NEXT:    clrldi 4, 4, 27
42; CHECK-NEXT:    ori 6, 6, 51366
43; CHECK-NEXT:    clrldi 7, 7, 27
44; CHECK-NEXT:    sldi 6, 6, 32
45; CHECK-NEXT:    oris 6, 6, 3542
46; CHECK-NEXT:    ori 6, 6, 31883
47; CHECK-NEXT:    mulhdu 8, 7, 6
48; CHECK-NEXT:    mulhdu 6, 5, 6
49; CHECK-NEXT:    rldicl 8, 8, 59, 5
50; CHECK-NEXT:    rldicl 6, 6, 59, 5
51; CHECK-NEXT:    mulli 8, 8, 37
52; CHECK-NEXT:    mulli 6, 6, 37
53; CHECK-NEXT:    sub 7, 7, 8
54; CHECK-NEXT:    subf. 5, 6, 5
55; CHECK-NEXT:    srd 4, 4, 7
56; CHECK-NEXT:    sld 5, 3, 5
57; CHECK-NEXT:    or 4, 5, 4
58; CHECK-NEXT:    isel 3, 3, 4, 2
59; CHECK-NEXT:    blr
60  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
61  ret i37 %f
62}
63
64; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
65
66declare i7 @llvm.fshl.i7(i7, i7, i7)
67define i7 @fshl_i7_const_fold() {
68; CHECK-LABEL: fshl_i7_const_fold:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    li 3, 67
71; CHECK-NEXT:    blr
72  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
73  ret i7 %f
74}
75
76; With constant shift amount, this is rotate + insert (missing extended mnemonics).
77
78define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
79; CHECK-LABEL: fshl_i32_const_shift:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    rlwinm 4, 4, 9, 0, 31
82; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
83; CHECK-NEXT:    mr 3, 4
84; CHECK-NEXT:    blr
85  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
86  ret i32 %f
87}
88
89; Check modulo math on shift amount.
90
91define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
92; CHECK-LABEL: fshl_i32_const_overshift:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    rlwinm 4, 4, 9, 0, 31
95; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
96; CHECK-NEXT:    mr 3, 4
97; CHECK-NEXT:    blr
98  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
99  ret i32 %f
100}
101
102; 64-bit should also work.
103
104define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
105; CHECK-LABEL: fshl_i64_const_overshift:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    rotldi 4, 4, 41
108; CHECK-NEXT:    rldimi 4, 3, 41, 0
109; CHECK-NEXT:    mr 3, 4
110; CHECK-NEXT:    blr
111  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
112  ret i64 %f
113}
114
115; This should work without any node-specific logic.
116
117define i8 @fshl_i8_const_fold() {
118; CHECK-LABEL: fshl_i8_const_fold:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    li 3, 128
121; CHECK-NEXT:    blr
122  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
123  ret i8 %f
124}
125
126; Repeat everything for funnel shift right.
127
128; General case - all operands can be variables.
129
130define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
131; CHECK-LABEL: fshr_i32:
132; CHECK:       # %bb.0:
133; CHECK-NEXT:    subfic 6, 5, 32
134; CHECK-NEXT:    andi. 5, 5, 31
135; CHECK-NEXT:    clrlwi 6, 6, 27
136; CHECK-NEXT:    srw 5, 4, 5
137; CHECK-NEXT:    slw 3, 3, 6
138; CHECK-NEXT:    or 3, 3, 5
139; CHECK-NEXT:    isel 3, 4, 3, 2
140; CHECK-NEXT:    blr
141  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
142  ret i32 %f
143}
144
145; Verify that weird types are minimally supported.
146declare i37 @llvm.fshr.i37(i37, i37, i37)
147define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
148; CHECK-LABEL: fshr_i37:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    lis 6, -8857
151; CHECK-NEXT:    subfic 7, 5, 37
152; CHECK-NEXT:    clrldi 5, 5, 27
153; CHECK-NEXT:    clrldi 9, 4, 27
154; CHECK-NEXT:    ori 6, 6, 51366
155; CHECK-NEXT:    clrldi 7, 7, 27
156; CHECK-NEXT:    sldi 6, 6, 32
157; CHECK-NEXT:    oris 6, 6, 3542
158; CHECK-NEXT:    ori 6, 6, 31883
159; CHECK-NEXT:    mulhdu 8, 5, 6
160; CHECK-NEXT:    mulhdu 6, 7, 6
161; CHECK-NEXT:    rldicl 8, 8, 59, 5
162; CHECK-NEXT:    rldicl 6, 6, 59, 5
163; CHECK-NEXT:    mulli 8, 8, 37
164; CHECK-NEXT:    mulli 6, 6, 37
165; CHECK-NEXT:    subf. 5, 8, 5
166; CHECK-NEXT:    sub 6, 7, 6
167; CHECK-NEXT:    srd 5, 9, 5
168; CHECK-NEXT:    sld 3, 3, 6
169; CHECK-NEXT:    or 3, 3, 5
170; CHECK-NEXT:    isel 3, 4, 3, 2
171; CHECK-NEXT:    blr
172  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
173  ret i37 %f
174}
175
176; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
177
178declare i7 @llvm.fshr.i7(i7, i7, i7)
179define i7 @fshr_i7_const_fold() {
180; CHECK-LABEL: fshr_i7_const_fold:
181; CHECK:       # %bb.0:
182; CHECK-NEXT:    li 3, 31
183; CHECK-NEXT:    blr
184  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
185  ret i7 %f
186}
187
188; With constant shift amount, this is rotate + insert (missing extended mnemonics).
189
190define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
191; CHECK-LABEL: fshr_i32_const_shift:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    rlwinm 4, 4, 23, 0, 31
194; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
195; CHECK-NEXT:    mr 3, 4
196; CHECK-NEXT:    blr
197  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
198  ret i32 %f
199}
200
201; Check modulo math on shift amount. 41-32=9.
202
203define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
204; CHECK-LABEL: fshr_i32_const_overshift:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    rlwinm 4, 4, 23, 0, 31
207; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
208; CHECK-NEXT:    mr 3, 4
209; CHECK-NEXT:    blr
210  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
211  ret i32 %f
212}
213
214; 64-bit should also work. 105-64 = 41.
215
216define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
217; CHECK-LABEL: fshr_i64_const_overshift:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    rotldi 4, 4, 23
220; CHECK-NEXT:    rldimi 4, 3, 23, 0
221; CHECK-NEXT:    mr 3, 4
222; CHECK-NEXT:    blr
223  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
224  ret i64 %f
225}
226
227; This should work without any node-specific logic.
228
229define i8 @fshr_i8_const_fold() {
230; CHECK-LABEL: fshr_i8_const_fold:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    li 3, 254
233; CHECK-NEXT:    blr
234  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
235  ret i8 %f
236}
237
238define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
239; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
240; CHECK:       # %bb.0:
241; CHECK-NEXT:    blr
242  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
243  ret i32 %f
244}
245
246define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
247; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    mr 3, 4
250; CHECK-NEXT:    blr
251  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
252  ret i32 %f
253}
254
255define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
256; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    blr
259  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
260  ret <4 x i32> %f
261}
262
263define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
264; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    vmr 2, 3
267; CHECK-NEXT:    blr
268  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
269  ret <4 x i32> %f
270}
271
272