1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+popcntd < %s | FileCheck %s --check-prefixes=ANY,FAST 3; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefixes=ANY,SLOW 4 5define i16 @zpop_i8_i16(i8 %x) { 6; FAST-LABEL: zpop_i8_i16: 7; FAST: # %bb.0: 8; FAST-NEXT: clrldi 3, 3, 56 9; FAST-NEXT: popcntd 3, 3 10; FAST-NEXT: blr 11; 12; SLOW-LABEL: zpop_i8_i16: 13; SLOW: # %bb.0: 14; SLOW-NEXT: clrlwi 5, 3, 24 15; SLOW-NEXT: rotlwi 3, 3, 31 16; SLOW-NEXT: andi. 3, 3, 85 17; SLOW-NEXT: lis 4, 13107 18; SLOW-NEXT: sub 3, 5, 3 19; SLOW-NEXT: ori 4, 4, 13107 20; SLOW-NEXT: rotlwi 5, 3, 30 21; SLOW-NEXT: and 3, 3, 4 22; SLOW-NEXT: andis. 4, 5, 13107 23; SLOW-NEXT: andi. 5, 5, 13107 24; SLOW-NEXT: or 4, 5, 4 25; SLOW-NEXT: add 3, 3, 4 26; SLOW-NEXT: lis 5, 3855 27; SLOW-NEXT: srwi 4, 3, 4 28; SLOW-NEXT: add 3, 3, 4 29; SLOW-NEXT: lis 4, 257 30; SLOW-NEXT: ori 5, 5, 3855 31; SLOW-NEXT: and 3, 3, 5 32; SLOW-NEXT: ori 4, 4, 257 33; SLOW-NEXT: mullw 3, 3, 4 34; SLOW-NEXT: srwi 3, 3, 24 35; SLOW-NEXT: blr 36 %z = zext i8 %x to i16 37 %pop = tail call i16 @llvm.ctpop.i16(i16 %z) 38 ret i16 %pop 39} 40 41define i16 @popz_i8_i16(i8 %x) { 42; FAST-LABEL: popz_i8_i16: 43; FAST: # %bb.0: 44; FAST-NEXT: clrldi 3, 3, 56 45; FAST-NEXT: popcntd 3, 3 46; FAST-NEXT: blr 47; 48; SLOW-LABEL: popz_i8_i16: 49; SLOW: # %bb.0: 50; SLOW-NEXT: clrlwi 5, 3, 24 51; SLOW-NEXT: rotlwi 3, 3, 31 52; SLOW-NEXT: andi. 3, 3, 85 53; SLOW-NEXT: lis 4, 13107 54; SLOW-NEXT: sub 3, 5, 3 55; SLOW-NEXT: ori 4, 4, 13107 56; SLOW-NEXT: rotlwi 5, 3, 30 57; SLOW-NEXT: and 3, 3, 4 58; SLOW-NEXT: andis. 4, 5, 13107 59; SLOW-NEXT: andi. 5, 5, 13107 60; SLOW-NEXT: or 4, 5, 4 61; SLOW-NEXT: add 3, 3, 4 62; SLOW-NEXT: lis 5, 3855 63; SLOW-NEXT: srwi 4, 3, 4 64; SLOW-NEXT: add 3, 3, 4 65; SLOW-NEXT: lis 4, 257 66; SLOW-NEXT: ori 5, 5, 3855 67; SLOW-NEXT: and 3, 3, 5 68; SLOW-NEXT: ori 4, 4, 257 69; SLOW-NEXT: mullw 3, 3, 4 70; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31 71; SLOW-NEXT: blr 72 %pop = tail call i8 @llvm.ctpop.i8(i8 %x) 73 %z = zext i8 %pop to i16 74 ret i16 %z 75} 76 77define i32 @zpop_i8_i32(i8 %x) { 78; FAST-LABEL: zpop_i8_i32: 79; FAST: # %bb.0: 80; FAST-NEXT: clrlwi 3, 3, 24 81; FAST-NEXT: popcntw 3, 3 82; FAST-NEXT: blr 83; 84; SLOW-LABEL: zpop_i8_i32: 85; SLOW: # %bb.0: 86; SLOW-NEXT: clrlwi 5, 3, 24 87; SLOW-NEXT: rotlwi 3, 3, 31 88; SLOW-NEXT: andi. 3, 3, 85 89; SLOW-NEXT: lis 4, 13107 90; SLOW-NEXT: sub 3, 5, 3 91; SLOW-NEXT: ori 4, 4, 13107 92; SLOW-NEXT: rotlwi 5, 3, 30 93; SLOW-NEXT: and 3, 3, 4 94; SLOW-NEXT: andis. 4, 5, 13107 95; SLOW-NEXT: andi. 5, 5, 13107 96; SLOW-NEXT: or 4, 5, 4 97; SLOW-NEXT: add 3, 3, 4 98; SLOW-NEXT: lis 5, 3855 99; SLOW-NEXT: srwi 4, 3, 4 100; SLOW-NEXT: add 3, 3, 4 101; SLOW-NEXT: lis 4, 257 102; SLOW-NEXT: ori 5, 5, 3855 103; SLOW-NEXT: and 3, 3, 5 104; SLOW-NEXT: ori 4, 4, 257 105; SLOW-NEXT: mullw 3, 3, 4 106; SLOW-NEXT: srwi 3, 3, 24 107; SLOW-NEXT: blr 108 %z = zext i8 %x to i32 109 %pop = tail call i32 @llvm.ctpop.i32(i32 %z) 110 ret i32 %pop 111} 112 113define i32 @popz_i8_32(i8 %x) { 114; FAST-LABEL: popz_i8_32: 115; FAST: # %bb.0: 116; FAST-NEXT: clrldi 3, 3, 56 117; FAST-NEXT: popcntd 3, 3 118; FAST-NEXT: blr 119; 120; SLOW-LABEL: popz_i8_32: 121; SLOW: # %bb.0: 122; SLOW-NEXT: clrlwi 5, 3, 24 123; SLOW-NEXT: rotlwi 3, 3, 31 124; SLOW-NEXT: andi. 3, 3, 85 125; SLOW-NEXT: lis 4, 13107 126; SLOW-NEXT: sub 3, 5, 3 127; SLOW-NEXT: ori 4, 4, 13107 128; SLOW-NEXT: rotlwi 5, 3, 30 129; SLOW-NEXT: and 3, 3, 4 130; SLOW-NEXT: andis. 4, 5, 13107 131; SLOW-NEXT: andi. 5, 5, 13107 132; SLOW-NEXT: or 4, 5, 4 133; SLOW-NEXT: add 3, 3, 4 134; SLOW-NEXT: lis 5, 3855 135; SLOW-NEXT: srwi 4, 3, 4 136; SLOW-NEXT: add 3, 3, 4 137; SLOW-NEXT: lis 4, 257 138; SLOW-NEXT: ori 5, 5, 3855 139; SLOW-NEXT: and 3, 3, 5 140; SLOW-NEXT: ori 4, 4, 257 141; SLOW-NEXT: mullw 3, 3, 4 142; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31 143; SLOW-NEXT: blr 144 %pop = tail call i8 @llvm.ctpop.i8(i8 %x) 145 %z = zext i8 %pop to i32 146 ret i32 %z 147} 148 149define i32 @zpop_i16_i32(i16 %x) { 150; FAST-LABEL: zpop_i16_i32: 151; FAST: # %bb.0: 152; FAST-NEXT: clrlwi 3, 3, 16 153; FAST-NEXT: popcntw 3, 3 154; FAST-NEXT: blr 155; 156; SLOW-LABEL: zpop_i16_i32: 157; SLOW: # %bb.0: 158; SLOW-NEXT: clrlwi 5, 3, 16 159; SLOW-NEXT: rotlwi 3, 3, 31 160; SLOW-NEXT: andi. 3, 3, 21845 161; SLOW-NEXT: lis 4, 13107 162; SLOW-NEXT: sub 3, 5, 3 163; SLOW-NEXT: ori 4, 4, 13107 164; SLOW-NEXT: rotlwi 5, 3, 30 165; SLOW-NEXT: and 3, 3, 4 166; SLOW-NEXT: andis. 4, 5, 13107 167; SLOW-NEXT: andi. 5, 5, 13107 168; SLOW-NEXT: or 4, 5, 4 169; SLOW-NEXT: add 3, 3, 4 170; SLOW-NEXT: lis 5, 3855 171; SLOW-NEXT: srwi 4, 3, 4 172; SLOW-NEXT: add 3, 3, 4 173; SLOW-NEXT: lis 4, 257 174; SLOW-NEXT: ori 5, 5, 3855 175; SLOW-NEXT: and 3, 3, 5 176; SLOW-NEXT: ori 4, 4, 257 177; SLOW-NEXT: mullw 3, 3, 4 178; SLOW-NEXT: srwi 3, 3, 24 179; SLOW-NEXT: blr 180 %z = zext i16 %x to i32 181 %pop = tail call i32 @llvm.ctpop.i32(i32 %z) 182 ret i32 %pop 183} 184 185define i32 @popz_i16_32(i16 %x) { 186; FAST-LABEL: popz_i16_32: 187; FAST: # %bb.0: 188; FAST-NEXT: clrldi 3, 3, 48 189; FAST-NEXT: popcntd 3, 3 190; FAST-NEXT: blr 191; 192; SLOW-LABEL: popz_i16_32: 193; SLOW: # %bb.0: 194; SLOW-NEXT: clrlwi 5, 3, 16 195; SLOW-NEXT: rotlwi 3, 3, 31 196; SLOW-NEXT: andi. 3, 3, 21845 197; SLOW-NEXT: lis 4, 13107 198; SLOW-NEXT: sub 3, 5, 3 199; SLOW-NEXT: ori 4, 4, 13107 200; SLOW-NEXT: rotlwi 5, 3, 30 201; SLOW-NEXT: and 3, 3, 4 202; SLOW-NEXT: andis. 4, 5, 13107 203; SLOW-NEXT: andi. 5, 5, 13107 204; SLOW-NEXT: or 4, 5, 4 205; SLOW-NEXT: add 3, 3, 4 206; SLOW-NEXT: lis 5, 3855 207; SLOW-NEXT: srwi 4, 3, 4 208; SLOW-NEXT: add 3, 3, 4 209; SLOW-NEXT: lis 4, 257 210; SLOW-NEXT: ori 5, 5, 3855 211; SLOW-NEXT: and 3, 3, 5 212; SLOW-NEXT: ori 4, 4, 257 213; SLOW-NEXT: mullw 3, 3, 4 214; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31 215; SLOW-NEXT: blr 216 %pop = tail call i16 @llvm.ctpop.i16(i16 %x) 217 %z = zext i16 %pop to i32 218 ret i32 %z 219} 220 221define i64 @zpop_i32_i64(i32 %x) { 222; FAST-LABEL: zpop_i32_i64: 223; FAST: # %bb.0: 224; FAST-NEXT: clrldi 3, 3, 32 225; FAST-NEXT: popcntd 3, 3 226; FAST-NEXT: blr 227; 228; SLOW-LABEL: zpop_i32_i64: 229; SLOW: # %bb.0: 230; SLOW-NEXT: rlwinm 5, 3, 31, 1, 0 231; SLOW-NEXT: lis 4, 13107 232; SLOW-NEXT: andis. 6, 5, 21845 233; SLOW-NEXT: andi. 5, 5, 21845 234; SLOW-NEXT: ori 4, 4, 13107 235; SLOW-NEXT: or 5, 5, 6 236; SLOW-NEXT: clrldi 3, 3, 32 237; SLOW-NEXT: rldimi 4, 4, 32, 0 238; SLOW-NEXT: sub 3, 3, 5 239; SLOW-NEXT: and 5, 3, 4 240; SLOW-NEXT: rotldi 3, 3, 62 241; SLOW-NEXT: and 3, 3, 4 242; SLOW-NEXT: add 3, 5, 3 243; SLOW-NEXT: lis 4, 3855 244; SLOW-NEXT: rldicl 5, 3, 60, 4 245; SLOW-NEXT: ori 4, 4, 3855 246; SLOW-NEXT: add 3, 3, 5 247; SLOW-NEXT: lis 5, 257 248; SLOW-NEXT: rldimi 4, 4, 32, 0 249; SLOW-NEXT: ori 5, 5, 257 250; SLOW-NEXT: and 3, 3, 4 251; SLOW-NEXT: rldimi 5, 5, 32, 0 252; SLOW-NEXT: mulld 3, 3, 5 253; SLOW-NEXT: rldicl 3, 3, 8, 56 254; SLOW-NEXT: blr 255 %z = zext i32 %x to i64 256 %pop = tail call i64 @llvm.ctpop.i64(i64 %z) 257 ret i64 %pop 258} 259 260define i64 @popz_i32_i64(i32 %x) { 261; FAST-LABEL: popz_i32_i64: 262; FAST: # %bb.0: 263; FAST-NEXT: popcntw 3, 3 264; FAST-NEXT: clrldi 3, 3, 32 265; FAST-NEXT: blr 266; 267; SLOW-LABEL: popz_i32_i64: 268; SLOW: # %bb.0: 269; SLOW-NEXT: rotlwi 5, 3, 31 270; SLOW-NEXT: andis. 6, 5, 21845 271; SLOW-NEXT: andi. 5, 5, 21845 272; SLOW-NEXT: or 5, 5, 6 273; SLOW-NEXT: lis 4, 13107 274; SLOW-NEXT: sub 3, 3, 5 275; SLOW-NEXT: ori 4, 4, 13107 276; SLOW-NEXT: rotlwi 5, 3, 30 277; SLOW-NEXT: and 3, 3, 4 278; SLOW-NEXT: andis. 4, 5, 13107 279; SLOW-NEXT: andi. 5, 5, 13107 280; SLOW-NEXT: or 4, 5, 4 281; SLOW-NEXT: add 3, 3, 4 282; SLOW-NEXT: lis 5, 3855 283; SLOW-NEXT: srwi 4, 3, 4 284; SLOW-NEXT: add 3, 3, 4 285; SLOW-NEXT: lis 4, 257 286; SLOW-NEXT: ori 5, 5, 3855 287; SLOW-NEXT: and 3, 3, 5 288; SLOW-NEXT: ori 4, 4, 257 289; SLOW-NEXT: mullw 3, 3, 4 290; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31 291; SLOW-NEXT: blr 292 %pop = tail call i32 @llvm.ctpop.i32(i32 %x) 293 %z = zext i32 %pop to i64 294 ret i64 %z 295} 296 297define i64 @popa_i16_i64(i16 %x) { 298; FAST-LABEL: popa_i16_i64: 299; FAST: # %bb.0: 300; FAST-NEXT: clrldi 3, 3, 48 301; FAST-NEXT: popcntd 3, 3 302; FAST-NEXT: rlwinm 3, 3, 0, 27, 27 303; FAST-NEXT: blr 304; 305; SLOW-LABEL: popa_i16_i64: 306; SLOW: # %bb.0: 307; SLOW-NEXT: clrlwi 5, 3, 16 308; SLOW-NEXT: rotlwi 3, 3, 31 309; SLOW-NEXT: andi. 3, 3, 21845 310; SLOW-NEXT: lis 4, 13107 311; SLOW-NEXT: sub 3, 5, 3 312; SLOW-NEXT: ori 4, 4, 13107 313; SLOW-NEXT: rotlwi 5, 3, 30 314; SLOW-NEXT: and 3, 3, 4 315; SLOW-NEXT: andis. 4, 5, 13107 316; SLOW-NEXT: andi. 5, 5, 13107 317; SLOW-NEXT: or 4, 5, 4 318; SLOW-NEXT: add 3, 3, 4 319; SLOW-NEXT: lis 5, 3855 320; SLOW-NEXT: srwi 4, 3, 4 321; SLOW-NEXT: add 3, 3, 4 322; SLOW-NEXT: lis 4, 257 323; SLOW-NEXT: ori 5, 5, 3855 324; SLOW-NEXT: and 3, 3, 5 325; SLOW-NEXT: ori 4, 4, 257 326; SLOW-NEXT: mullw 3, 3, 4 327; SLOW-NEXT: srwi 3, 3, 24 328; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27 329; SLOW-NEXT: blr 330 %pop = call i16 @llvm.ctpop.i16(i16 %x) 331 %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext 332 %a = and i64 %z, 16 333 ret i64 %a 334} 335 336declare i8 @llvm.ctpop.i8(i8) nounwind readnone 337declare i16 @llvm.ctpop.i16(i16) nounwind readnone 338declare i32 @llvm.ctpop.i32(i32) nounwind readnone 339declare i64 @llvm.ctpop.i64(i64) nounwind readnone 340