• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+popcntd < %s      | FileCheck %s --check-prefixes=ANY,FAST
3; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefixes=ANY,SLOW
4
5define i16 @zpop_i8_i16(i8 %x) {
6; FAST-LABEL: zpop_i8_i16:
7; FAST:       # %bb.0:
8; FAST-NEXT:    clrldi 3, 3, 56
9; FAST-NEXT:    popcntd 3, 3
10; FAST-NEXT:    blr
11;
12; SLOW-LABEL: zpop_i8_i16:
13; SLOW:       # %bb.0:
14; SLOW-NEXT:    clrlwi 5, 3, 24
15; SLOW-NEXT:    rotlwi 3, 3, 31
16; SLOW-NEXT:    andi. 3, 3, 85
17; SLOW-NEXT:    lis 4, 13107
18; SLOW-NEXT:    sub 3, 5, 3
19; SLOW-NEXT:    ori 4, 4, 13107
20; SLOW-NEXT:    rotlwi 5, 3, 30
21; SLOW-NEXT:    and 3, 3, 4
22; SLOW-NEXT:    andis. 4, 5, 13107
23; SLOW-NEXT:    andi. 5, 5, 13107
24; SLOW-NEXT:    or 4, 5, 4
25; SLOW-NEXT:    add 3, 3, 4
26; SLOW-NEXT:    lis 5, 3855
27; SLOW-NEXT:    srwi 4, 3, 4
28; SLOW-NEXT:    add 3, 3, 4
29; SLOW-NEXT:    lis 4, 257
30; SLOW-NEXT:    ori 5, 5, 3855
31; SLOW-NEXT:    and 3, 3, 5
32; SLOW-NEXT:    ori 4, 4, 257
33; SLOW-NEXT:    mullw 3, 3, 4
34; SLOW-NEXT:    srwi 3, 3, 24
35; SLOW-NEXT:    blr
36  %z = zext i8 %x to i16
37  %pop = tail call i16 @llvm.ctpop.i16(i16 %z)
38  ret i16 %pop
39}
40
41define i16 @popz_i8_i16(i8 %x) {
42; FAST-LABEL: popz_i8_i16:
43; FAST:       # %bb.0:
44; FAST-NEXT:    clrldi 3, 3, 56
45; FAST-NEXT:    popcntd 3, 3
46; FAST-NEXT:    blr
47;
48; SLOW-LABEL: popz_i8_i16:
49; SLOW:       # %bb.0:
50; SLOW-NEXT:    clrlwi 5, 3, 24
51; SLOW-NEXT:    rotlwi 3, 3, 31
52; SLOW-NEXT:    andi. 3, 3, 85
53; SLOW-NEXT:    lis 4, 13107
54; SLOW-NEXT:    sub 3, 5, 3
55; SLOW-NEXT:    ori 4, 4, 13107
56; SLOW-NEXT:    rotlwi 5, 3, 30
57; SLOW-NEXT:    and 3, 3, 4
58; SLOW-NEXT:    andis. 4, 5, 13107
59; SLOW-NEXT:    andi. 5, 5, 13107
60; SLOW-NEXT:    or 4, 5, 4
61; SLOW-NEXT:    add 3, 3, 4
62; SLOW-NEXT:    lis 5, 3855
63; SLOW-NEXT:    srwi 4, 3, 4
64; SLOW-NEXT:    add 3, 3, 4
65; SLOW-NEXT:    lis 4, 257
66; SLOW-NEXT:    ori 5, 5, 3855
67; SLOW-NEXT:    and 3, 3, 5
68; SLOW-NEXT:    ori 4, 4, 257
69; SLOW-NEXT:    mullw 3, 3, 4
70; SLOW-NEXT:    rlwinm 3, 3, 8, 24, 31
71; SLOW-NEXT:    blr
72  %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
73  %z = zext i8 %pop to i16
74  ret i16 %z
75}
76
77define i32 @zpop_i8_i32(i8 %x) {
78; FAST-LABEL: zpop_i8_i32:
79; FAST:       # %bb.0:
80; FAST-NEXT:    clrlwi 3, 3, 24
81; FAST-NEXT:    popcntw 3, 3
82; FAST-NEXT:    blr
83;
84; SLOW-LABEL: zpop_i8_i32:
85; SLOW:       # %bb.0:
86; SLOW-NEXT:    clrlwi 5, 3, 24
87; SLOW-NEXT:    rotlwi 3, 3, 31
88; SLOW-NEXT:    andi. 3, 3, 85
89; SLOW-NEXT:    lis 4, 13107
90; SLOW-NEXT:    sub 3, 5, 3
91; SLOW-NEXT:    ori 4, 4, 13107
92; SLOW-NEXT:    rotlwi 5, 3, 30
93; SLOW-NEXT:    and 3, 3, 4
94; SLOW-NEXT:    andis. 4, 5, 13107
95; SLOW-NEXT:    andi. 5, 5, 13107
96; SLOW-NEXT:    or 4, 5, 4
97; SLOW-NEXT:    add 3, 3, 4
98; SLOW-NEXT:    lis 5, 3855
99; SLOW-NEXT:    srwi 4, 3, 4
100; SLOW-NEXT:    add 3, 3, 4
101; SLOW-NEXT:    lis 4, 257
102; SLOW-NEXT:    ori 5, 5, 3855
103; SLOW-NEXT:    and 3, 3, 5
104; SLOW-NEXT:    ori 4, 4, 257
105; SLOW-NEXT:    mullw 3, 3, 4
106; SLOW-NEXT:    srwi 3, 3, 24
107; SLOW-NEXT:    blr
108  %z = zext i8 %x to i32
109  %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
110  ret i32 %pop
111}
112
113define i32 @popz_i8_32(i8 %x) {
114; FAST-LABEL: popz_i8_32:
115; FAST:       # %bb.0:
116; FAST-NEXT:    clrldi 3, 3, 56
117; FAST-NEXT:    popcntd 3, 3
118; FAST-NEXT:    blr
119;
120; SLOW-LABEL: popz_i8_32:
121; SLOW:       # %bb.0:
122; SLOW-NEXT:    clrlwi 5, 3, 24
123; SLOW-NEXT:    rotlwi 3, 3, 31
124; SLOW-NEXT:    andi. 3, 3, 85
125; SLOW-NEXT:    lis 4, 13107
126; SLOW-NEXT:    sub 3, 5, 3
127; SLOW-NEXT:    ori 4, 4, 13107
128; SLOW-NEXT:    rotlwi 5, 3, 30
129; SLOW-NEXT:    and 3, 3, 4
130; SLOW-NEXT:    andis. 4, 5, 13107
131; SLOW-NEXT:    andi. 5, 5, 13107
132; SLOW-NEXT:    or 4, 5, 4
133; SLOW-NEXT:    add 3, 3, 4
134; SLOW-NEXT:    lis 5, 3855
135; SLOW-NEXT:    srwi 4, 3, 4
136; SLOW-NEXT:    add 3, 3, 4
137; SLOW-NEXT:    lis 4, 257
138; SLOW-NEXT:    ori 5, 5, 3855
139; SLOW-NEXT:    and 3, 3, 5
140; SLOW-NEXT:    ori 4, 4, 257
141; SLOW-NEXT:    mullw 3, 3, 4
142; SLOW-NEXT:    rlwinm 3, 3, 8, 24, 31
143; SLOW-NEXT:    blr
144  %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
145  %z = zext i8 %pop to i32
146  ret i32 %z
147}
148
149define i32 @zpop_i16_i32(i16 %x) {
150; FAST-LABEL: zpop_i16_i32:
151; FAST:       # %bb.0:
152; FAST-NEXT:    clrlwi 3, 3, 16
153; FAST-NEXT:    popcntw 3, 3
154; FAST-NEXT:    blr
155;
156; SLOW-LABEL: zpop_i16_i32:
157; SLOW:       # %bb.0:
158; SLOW-NEXT:    clrlwi 5, 3, 16
159; SLOW-NEXT:    rotlwi 3, 3, 31
160; SLOW-NEXT:    andi. 3, 3, 21845
161; SLOW-NEXT:    lis 4, 13107
162; SLOW-NEXT:    sub 3, 5, 3
163; SLOW-NEXT:    ori 4, 4, 13107
164; SLOW-NEXT:    rotlwi 5, 3, 30
165; SLOW-NEXT:    and 3, 3, 4
166; SLOW-NEXT:    andis. 4, 5, 13107
167; SLOW-NEXT:    andi. 5, 5, 13107
168; SLOW-NEXT:    or 4, 5, 4
169; SLOW-NEXT:    add 3, 3, 4
170; SLOW-NEXT:    lis 5, 3855
171; SLOW-NEXT:    srwi 4, 3, 4
172; SLOW-NEXT:    add 3, 3, 4
173; SLOW-NEXT:    lis 4, 257
174; SLOW-NEXT:    ori 5, 5, 3855
175; SLOW-NEXT:    and 3, 3, 5
176; SLOW-NEXT:    ori 4, 4, 257
177; SLOW-NEXT:    mullw 3, 3, 4
178; SLOW-NEXT:    srwi 3, 3, 24
179; SLOW-NEXT:    blr
180  %z = zext i16 %x to i32
181  %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
182  ret i32 %pop
183}
184
185define i32 @popz_i16_32(i16 %x) {
186; FAST-LABEL: popz_i16_32:
187; FAST:       # %bb.0:
188; FAST-NEXT:    clrldi 3, 3, 48
189; FAST-NEXT:    popcntd 3, 3
190; FAST-NEXT:    blr
191;
192; SLOW-LABEL: popz_i16_32:
193; SLOW:       # %bb.0:
194; SLOW-NEXT:    clrlwi 5, 3, 16
195; SLOW-NEXT:    rotlwi 3, 3, 31
196; SLOW-NEXT:    andi. 3, 3, 21845
197; SLOW-NEXT:    lis 4, 13107
198; SLOW-NEXT:    sub 3, 5, 3
199; SLOW-NEXT:    ori 4, 4, 13107
200; SLOW-NEXT:    rotlwi 5, 3, 30
201; SLOW-NEXT:    and 3, 3, 4
202; SLOW-NEXT:    andis. 4, 5, 13107
203; SLOW-NEXT:    andi. 5, 5, 13107
204; SLOW-NEXT:    or 4, 5, 4
205; SLOW-NEXT:    add 3, 3, 4
206; SLOW-NEXT:    lis 5, 3855
207; SLOW-NEXT:    srwi 4, 3, 4
208; SLOW-NEXT:    add 3, 3, 4
209; SLOW-NEXT:    lis 4, 257
210; SLOW-NEXT:    ori 5, 5, 3855
211; SLOW-NEXT:    and 3, 3, 5
212; SLOW-NEXT:    ori 4, 4, 257
213; SLOW-NEXT:    mullw 3, 3, 4
214; SLOW-NEXT:    rlwinm 3, 3, 8, 24, 31
215; SLOW-NEXT:    blr
216  %pop = tail call i16 @llvm.ctpop.i16(i16 %x)
217  %z = zext i16 %pop to i32
218  ret i32 %z
219}
220
221define i64 @zpop_i32_i64(i32 %x) {
222; FAST-LABEL: zpop_i32_i64:
223; FAST:       # %bb.0:
224; FAST-NEXT:    clrldi 3, 3, 32
225; FAST-NEXT:    popcntd 3, 3
226; FAST-NEXT:    blr
227;
228; SLOW-LABEL: zpop_i32_i64:
229; SLOW:       # %bb.0:
230; SLOW-NEXT:    rlwinm 5, 3, 31, 1, 0
231; SLOW-NEXT:    lis 4, 13107
232; SLOW-NEXT:    andis. 6, 5, 21845
233; SLOW-NEXT:    andi. 5, 5, 21845
234; SLOW-NEXT:    ori 4, 4, 13107
235; SLOW-NEXT:    or 5, 5, 6
236; SLOW-NEXT:    clrldi 3, 3, 32
237; SLOW-NEXT:    rldimi 4, 4, 32, 0
238; SLOW-NEXT:    sub 3, 3, 5
239; SLOW-NEXT:    and 5, 3, 4
240; SLOW-NEXT:    rotldi 3, 3, 62
241; SLOW-NEXT:    and 3, 3, 4
242; SLOW-NEXT:    add 3, 5, 3
243; SLOW-NEXT:    lis 4, 3855
244; SLOW-NEXT:    rldicl 5, 3, 60, 4
245; SLOW-NEXT:    ori 4, 4, 3855
246; SLOW-NEXT:    add 3, 3, 5
247; SLOW-NEXT:    lis 5, 257
248; SLOW-NEXT:    rldimi 4, 4, 32, 0
249; SLOW-NEXT:    ori 5, 5, 257
250; SLOW-NEXT:    and 3, 3, 4
251; SLOW-NEXT:    rldimi 5, 5, 32, 0
252; SLOW-NEXT:    mulld 3, 3, 5
253; SLOW-NEXT:    rldicl 3, 3, 8, 56
254; SLOW-NEXT:    blr
255  %z = zext i32 %x to i64
256  %pop = tail call i64 @llvm.ctpop.i64(i64 %z)
257  ret i64 %pop
258}
259
260define i64 @popz_i32_i64(i32 %x) {
261; FAST-LABEL: popz_i32_i64:
262; FAST:       # %bb.0:
263; FAST-NEXT:    popcntw 3, 3
264; FAST-NEXT:    clrldi 3, 3, 32
265; FAST-NEXT:    blr
266;
267; SLOW-LABEL: popz_i32_i64:
268; SLOW:       # %bb.0:
269; SLOW-NEXT:    rotlwi 5, 3, 31
270; SLOW-NEXT:    andis. 6, 5, 21845
271; SLOW-NEXT:    andi. 5, 5, 21845
272; SLOW-NEXT:    or 5, 5, 6
273; SLOW-NEXT:    lis 4, 13107
274; SLOW-NEXT:    sub 3, 3, 5
275; SLOW-NEXT:    ori 4, 4, 13107
276; SLOW-NEXT:    rotlwi 5, 3, 30
277; SLOW-NEXT:    and 3, 3, 4
278; SLOW-NEXT:    andis. 4, 5, 13107
279; SLOW-NEXT:    andi. 5, 5, 13107
280; SLOW-NEXT:    or 4, 5, 4
281; SLOW-NEXT:    add 3, 3, 4
282; SLOW-NEXT:    lis 5, 3855
283; SLOW-NEXT:    srwi 4, 3, 4
284; SLOW-NEXT:    add 3, 3, 4
285; SLOW-NEXT:    lis 4, 257
286; SLOW-NEXT:    ori 5, 5, 3855
287; SLOW-NEXT:    and 3, 3, 5
288; SLOW-NEXT:    ori 4, 4, 257
289; SLOW-NEXT:    mullw 3, 3, 4
290; SLOW-NEXT:    rlwinm 3, 3, 8, 24, 31
291; SLOW-NEXT:    blr
292  %pop = tail call i32 @llvm.ctpop.i32(i32 %x)
293  %z = zext i32 %pop to i64
294  ret i64 %z
295}
296
297define i64 @popa_i16_i64(i16 %x) {
298; FAST-LABEL: popa_i16_i64:
299; FAST:       # %bb.0:
300; FAST-NEXT:    clrldi 3, 3, 48
301; FAST-NEXT:    popcntd 3, 3
302; FAST-NEXT:    rlwinm 3, 3, 0, 27, 27
303; FAST-NEXT:    blr
304;
305; SLOW-LABEL: popa_i16_i64:
306; SLOW:       # %bb.0:
307; SLOW-NEXT:    clrlwi 5, 3, 16
308; SLOW-NEXT:    rotlwi 3, 3, 31
309; SLOW-NEXT:    andi. 3, 3, 21845
310; SLOW-NEXT:    lis 4, 13107
311; SLOW-NEXT:    sub 3, 5, 3
312; SLOW-NEXT:    ori 4, 4, 13107
313; SLOW-NEXT:    rotlwi 5, 3, 30
314; SLOW-NEXT:    and 3, 3, 4
315; SLOW-NEXT:    andis. 4, 5, 13107
316; SLOW-NEXT:    andi. 5, 5, 13107
317; SLOW-NEXT:    or 4, 5, 4
318; SLOW-NEXT:    add 3, 3, 4
319; SLOW-NEXT:    lis 5, 3855
320; SLOW-NEXT:    srwi 4, 3, 4
321; SLOW-NEXT:    add 3, 3, 4
322; SLOW-NEXT:    lis 4, 257
323; SLOW-NEXT:    ori 5, 5, 3855
324; SLOW-NEXT:    and 3, 3, 5
325; SLOW-NEXT:    ori 4, 4, 257
326; SLOW-NEXT:    mullw 3, 3, 4
327; SLOW-NEXT:    srwi 3, 3, 24
328; SLOW-NEXT:    rlwinm 3, 3, 0, 27, 27
329; SLOW-NEXT:    blr
330  %pop = call i16 @llvm.ctpop.i16(i16 %x)
331  %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext
332  %a = and i64 %z, 16
333  ret i64 %a
334}
335
336declare i8 @llvm.ctpop.i8(i8) nounwind readnone
337declare i16 @llvm.ctpop.i16(i16) nounwind readnone
338declare i32 @llvm.ctpop.i32(i32) nounwind readnone
339declare i64 @llvm.ctpop.i64(i64) nounwind readnone
340