1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; Test patterns which generates lzcnt instructions. 3; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt)) 4; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s 5; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s 6; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s 7; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s 8; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s 9; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s 10 11; Test one 32-bit input, output is 32-bit, no transformations expected. 12define i32 @test_zext_cmp0(i32 %a) { 13; ALL-LABEL: test_zext_cmp0: 14; ALL: # %bb.0: # %entry 15; ALL-NEXT: xorl %eax, %eax 16; ALL-NEXT: testl %edi, %edi 17; ALL-NEXT: sete %al 18; ALL-NEXT: retq 19entry: 20 %cmp = icmp eq i32 %a, 0 21 %conv = zext i1 %cmp to i32 22 ret i32 %conv 23} 24 25; Test two 32-bit inputs, output is 32-bit. 26define i32 @test_zext_cmp1(i32 %a, i32 %b) { 27; FASTLZCNT-LABEL: test_zext_cmp1: 28; FASTLZCNT: # %bb.0: 29; FASTLZCNT-NEXT: lzcntl %edi, %ecx 30; FASTLZCNT-NEXT: lzcntl %esi, %eax 31; FASTLZCNT-NEXT: orl %ecx, %eax 32; FASTLZCNT-NEXT: shrl $5, %eax 33; FASTLZCNT-NEXT: retq 34; 35; NOFASTLZCNT-LABEL: test_zext_cmp1: 36; NOFASTLZCNT: # %bb.0: 37; NOFASTLZCNT-NEXT: testl %edi, %edi 38; NOFASTLZCNT-NEXT: sete %al 39; NOFASTLZCNT-NEXT: testl %esi, %esi 40; NOFASTLZCNT-NEXT: sete %cl 41; NOFASTLZCNT-NEXT: orb %al, %cl 42; NOFASTLZCNT-NEXT: movzbl %cl, %eax 43; NOFASTLZCNT-NEXT: retq 44 %cmp = icmp eq i32 %a, 0 45 %cmp1 = icmp eq i32 %b, 0 46 %or = or i1 %cmp, %cmp1 47 %lor.ext = zext i1 %or to i32 48 ret i32 %lor.ext 49} 50 51; Test two 64-bit inputs, output is 64-bit. 52define i64 @test_zext_cmp2(i64 %a, i64 %b) { 53; FASTLZCNT-LABEL: test_zext_cmp2: 54; FASTLZCNT: # %bb.0: 55; FASTLZCNT-NEXT: lzcntq %rdi, %rcx 56; FASTLZCNT-NEXT: lzcntq %rsi, %rax 57; FASTLZCNT-NEXT: orl %ecx, %eax 58; FASTLZCNT-NEXT: shrl $6, %eax 59; FASTLZCNT-NEXT: retq 60; 61; NOFASTLZCNT-LABEL: test_zext_cmp2: 62; NOFASTLZCNT: # %bb.0: 63; NOFASTLZCNT-NEXT: testq %rdi, %rdi 64; NOFASTLZCNT-NEXT: sete %al 65; NOFASTLZCNT-NEXT: testq %rsi, %rsi 66; NOFASTLZCNT-NEXT: sete %cl 67; NOFASTLZCNT-NEXT: orb %al, %cl 68; NOFASTLZCNT-NEXT: movzbl %cl, %eax 69; NOFASTLZCNT-NEXT: retq 70 %cmp = icmp eq i64 %a, 0 71 %cmp1 = icmp eq i64 %b, 0 72 %or = or i1 %cmp, %cmp1 73 %lor.ext = zext i1 %or to i64 74 ret i64 %lor.ext 75} 76 77; Test two 16-bit inputs, output is 16-bit. 78; The transform is disabled for the 16-bit case, as we still have to clear the 79; upper 16-bits, adding one more instruction. 80define i16 @test_zext_cmp3(i16 %a, i16 %b) { 81; ALL-LABEL: test_zext_cmp3: 82; ALL: # %bb.0: 83; ALL-NEXT: testw %di, %di 84; ALL-NEXT: sete %al 85; ALL-NEXT: testw %si, %si 86; ALL-NEXT: sete %cl 87; ALL-NEXT: orb %al, %cl 88; ALL-NEXT: movzbl %cl, %eax 89; ALL-NEXT: # kill: def $ax killed $ax killed $eax 90; ALL-NEXT: retq 91 %cmp = icmp eq i16 %a, 0 92 %cmp1 = icmp eq i16 %b, 0 93 %or = or i1 %cmp, %cmp1 94 %lor.ext = zext i1 %or to i16 95 ret i16 %lor.ext 96} 97 98; Test two 32-bit inputs, output is 64-bit. 99define i64 @test_zext_cmp4(i32 %a, i32 %b) { 100; FASTLZCNT-LABEL: test_zext_cmp4: 101; FASTLZCNT: # %bb.0: # %entry 102; FASTLZCNT-NEXT: lzcntl %edi, %ecx 103; FASTLZCNT-NEXT: lzcntl %esi, %eax 104; FASTLZCNT-NEXT: orl %ecx, %eax 105; FASTLZCNT-NEXT: shrl $5, %eax 106; FASTLZCNT-NEXT: retq 107; 108; NOFASTLZCNT-LABEL: test_zext_cmp4: 109; NOFASTLZCNT: # %bb.0: # %entry 110; NOFASTLZCNT-NEXT: testl %edi, %edi 111; NOFASTLZCNT-NEXT: sete %al 112; NOFASTLZCNT-NEXT: testl %esi, %esi 113; NOFASTLZCNT-NEXT: sete %cl 114; NOFASTLZCNT-NEXT: orb %al, %cl 115; NOFASTLZCNT-NEXT: movzbl %cl, %eax 116; NOFASTLZCNT-NEXT: retq 117entry: 118 %cmp = icmp eq i32 %a, 0 119 %cmp1 = icmp eq i32 %b, 0 120 %0 = or i1 %cmp, %cmp1 121 %conv = zext i1 %0 to i64 122 ret i64 %conv 123} 124 125; Test two 64-bit inputs, output is 32-bit. 126define i32 @test_zext_cmp5(i64 %a, i64 %b) { 127; FASTLZCNT-LABEL: test_zext_cmp5: 128; FASTLZCNT: # %bb.0: # %entry 129; FASTLZCNT-NEXT: lzcntq %rdi, %rcx 130; FASTLZCNT-NEXT: lzcntq %rsi, %rax 131; FASTLZCNT-NEXT: orl %ecx, %eax 132; FASTLZCNT-NEXT: shrl $6, %eax 133; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax 134; FASTLZCNT-NEXT: retq 135; 136; NOFASTLZCNT-LABEL: test_zext_cmp5: 137; NOFASTLZCNT: # %bb.0: # %entry 138; NOFASTLZCNT-NEXT: testq %rdi, %rdi 139; NOFASTLZCNT-NEXT: sete %al 140; NOFASTLZCNT-NEXT: testq %rsi, %rsi 141; NOFASTLZCNT-NEXT: sete %cl 142; NOFASTLZCNT-NEXT: orb %al, %cl 143; NOFASTLZCNT-NEXT: movzbl %cl, %eax 144; NOFASTLZCNT-NEXT: retq 145entry: 146 %cmp = icmp eq i64 %a, 0 147 %cmp1 = icmp eq i64 %b, 0 148 %0 = or i1 %cmp, %cmp1 149 %lor.ext = zext i1 %0 to i32 150 ret i32 %lor.ext 151} 152 153; Test three 32-bit inputs, output is 32-bit. 154define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) { 155; FASTLZCNT-LABEL: test_zext_cmp6: 156; FASTLZCNT: # %bb.0: # %entry 157; FASTLZCNT-NEXT: lzcntl %edi, %eax 158; FASTLZCNT-NEXT: lzcntl %esi, %ecx 159; FASTLZCNT-NEXT: orl %eax, %ecx 160; FASTLZCNT-NEXT: lzcntl %edx, %eax 161; FASTLZCNT-NEXT: orl %ecx, %eax 162; FASTLZCNT-NEXT: shrl $5, %eax 163; FASTLZCNT-NEXT: retq 164; 165; NOFASTLZCNT-LABEL: test_zext_cmp6: 166; NOFASTLZCNT: # %bb.0: # %entry 167; NOFASTLZCNT-NEXT: testl %edi, %edi 168; NOFASTLZCNT-NEXT: sete %al 169; NOFASTLZCNT-NEXT: testl %esi, %esi 170; NOFASTLZCNT-NEXT: sete %cl 171; NOFASTLZCNT-NEXT: orb %al, %cl 172; NOFASTLZCNT-NEXT: testl %edx, %edx 173; NOFASTLZCNT-NEXT: sete %al 174; NOFASTLZCNT-NEXT: orb %cl, %al 175; NOFASTLZCNT-NEXT: movzbl %al, %eax 176; NOFASTLZCNT-NEXT: retq 177entry: 178 %cmp = icmp eq i32 %a, 0 179 %cmp1 = icmp eq i32 %b, 0 180 %or.cond = or i1 %cmp, %cmp1 181 %cmp2 = icmp eq i32 %c, 0 182 %.cmp2 = or i1 %or.cond, %cmp2 183 %lor.ext = zext i1 %.cmp2 to i32 184 ret i32 %lor.ext 185} 186 187; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test, 188; %.cmp2 inputs' order is inverted. 189define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) { 190; FASTLZCNT-LABEL: test_zext_cmp7: 191; FASTLZCNT: # %bb.0: # %entry 192; FASTLZCNT-NEXT: lzcntl %edi, %eax 193; FASTLZCNT-NEXT: lzcntl %esi, %ecx 194; FASTLZCNT-NEXT: orl %eax, %ecx 195; FASTLZCNT-NEXT: lzcntl %edx, %eax 196; FASTLZCNT-NEXT: orl %ecx, %eax 197; FASTLZCNT-NEXT: shrl $5, %eax 198; FASTLZCNT-NEXT: retq 199; 200; NOFASTLZCNT-LABEL: test_zext_cmp7: 201; NOFASTLZCNT: # %bb.0: # %entry 202; NOFASTLZCNT-NEXT: testl %edi, %edi 203; NOFASTLZCNT-NEXT: sete %al 204; NOFASTLZCNT-NEXT: testl %esi, %esi 205; NOFASTLZCNT-NEXT: sete %cl 206; NOFASTLZCNT-NEXT: orb %al, %cl 207; NOFASTLZCNT-NEXT: testl %edx, %edx 208; NOFASTLZCNT-NEXT: sete %al 209; NOFASTLZCNT-NEXT: orb %cl, %al 210; NOFASTLZCNT-NEXT: movzbl %al, %eax 211; NOFASTLZCNT-NEXT: retq 212entry: 213 %cmp = icmp eq i32 %a, 0 214 %cmp1 = icmp eq i32 %b, 0 215 %or.cond = or i1 %cmp, %cmp1 216 %cmp2 = icmp eq i32 %c, 0 217 %.cmp2 = or i1 %cmp2, %or.cond 218 %lor.ext = zext i1 %.cmp2 to i32 219 ret i32 %lor.ext 220} 221 222; Test four 32-bit inputs, output is 32-bit. 223define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) { 224; FASTLZCNT-LABEL: test_zext_cmp8: 225; FASTLZCNT: # %bb.0: # %entry 226; FASTLZCNT-NEXT: lzcntl %edi, %eax 227; FASTLZCNT-NEXT: lzcntl %esi, %esi 228; FASTLZCNT-NEXT: lzcntl %edx, %edx 229; FASTLZCNT-NEXT: orl %eax, %esi 230; FASTLZCNT-NEXT: lzcntl %ecx, %eax 231; FASTLZCNT-NEXT: orl %edx, %eax 232; FASTLZCNT-NEXT: orl %esi, %eax 233; FASTLZCNT-NEXT: shrl $5, %eax 234; FASTLZCNT-NEXT: retq 235; 236; NOFASTLZCNT-LABEL: test_zext_cmp8: 237; NOFASTLZCNT: # %bb.0: # %entry 238; NOFASTLZCNT-NEXT: testl %edi, %edi 239; NOFASTLZCNT-NEXT: sete %dil 240; NOFASTLZCNT-NEXT: testl %esi, %esi 241; NOFASTLZCNT-NEXT: sete %al 242; NOFASTLZCNT-NEXT: orb %dil, %al 243; NOFASTLZCNT-NEXT: testl %edx, %edx 244; NOFASTLZCNT-NEXT: sete %dl 245; NOFASTLZCNT-NEXT: testl %ecx, %ecx 246; NOFASTLZCNT-NEXT: sete %cl 247; NOFASTLZCNT-NEXT: orb %dl, %cl 248; NOFASTLZCNT-NEXT: orb %al, %cl 249; NOFASTLZCNT-NEXT: movzbl %cl, %eax 250; NOFASTLZCNT-NEXT: retq 251entry: 252 %cmp = icmp eq i32 %a, 0 253 %cmp1 = icmp eq i32 %b, 0 254 %or.cond = or i1 %cmp, %cmp1 255 %cmp3 = icmp eq i32 %c, 0 256 %or.cond5 = or i1 %or.cond, %cmp3 257 %cmp4 = icmp eq i32 %d, 0 258 %.cmp4 = or i1 %or.cond5, %cmp4 259 %lor.ext = zext i1 %.cmp4 to i32 260 ret i32 %lor.ext 261} 262 263; Test one 32-bit input, one 64-bit input, output is 32-bit. 264define i32 @test_zext_cmp9(i32 %a, i64 %b) { 265; FASTLZCNT-LABEL: test_zext_cmp9: 266; FASTLZCNT: # %bb.0: # %entry 267; FASTLZCNT-NEXT: lzcntq %rsi, %rax 268; FASTLZCNT-NEXT: lzcntl %edi, %ecx 269; FASTLZCNT-NEXT: shrl $5, %ecx 270; FASTLZCNT-NEXT: shrl $6, %eax 271; FASTLZCNT-NEXT: orl %ecx, %eax 272; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax 273; FASTLZCNT-NEXT: retq 274; 275; NOFASTLZCNT-LABEL: test_zext_cmp9: 276; NOFASTLZCNT: # %bb.0: # %entry 277; NOFASTLZCNT-NEXT: testl %edi, %edi 278; NOFASTLZCNT-NEXT: sete %al 279; NOFASTLZCNT-NEXT: testq %rsi, %rsi 280; NOFASTLZCNT-NEXT: sete %cl 281; NOFASTLZCNT-NEXT: orb %al, %cl 282; NOFASTLZCNT-NEXT: movzbl %cl, %eax 283; NOFASTLZCNT-NEXT: retq 284entry: 285 %cmp = icmp eq i32 %a, 0 286 %cmp1 = icmp eq i64 %b, 0 287 %0 = or i1 %cmp, %cmp1 288 %lor.ext = zext i1 %0 to i32 289 ret i32 %lor.ext 290} 291 292; Test 2 128-bit inputs, output is 32-bit, no transformations expected. 293define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) { 294; ALL-LABEL: test_zext_cmp10: 295; ALL: # %bb.0: # %entry 296; ALL-NEXT: orq %rsi, %rdi 297; ALL-NEXT: sete %al 298; ALL-NEXT: orq %rcx, %rdx 299; ALL-NEXT: sete %cl 300; ALL-NEXT: orb %al, %cl 301; ALL-NEXT: movzbl %cl, %eax 302; ALL-NEXT: retq 303entry: 304 %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128 305 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64 306 %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128 307 %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext 308 %b.sroa.2.0.insert.ext = zext i64 %b.coerce1 to i128 309 %b.sroa.2.0.insert.shift = shl nuw i128 %b.sroa.2.0.insert.ext, 64 310 %b.sroa.0.0.insert.ext = zext i64 %b.coerce0 to i128 311 %b.sroa.0.0.insert.insert = or i128 %b.sroa.2.0.insert.shift, %b.sroa.0.0.insert.ext 312 %cmp = icmp eq i128 %a.sroa.0.0.insert.insert, 0 313 %cmp3 = icmp eq i128 %b.sroa.0.0.insert.insert, 0 314 %0 = or i1 %cmp, %cmp3 315 %lor.ext = zext i1 %0 to i32 316 ret i32 %lor.ext 317} 318 319; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math. 320define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { 321; 322; ALL-LABEL: test_zext_cmp11: 323; ALL: # %bb.0: # %entry 324; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 325; ALL-NEXT: vucomisd %xmm2, %xmm0 326; ALL-NEXT: sete %al 327; ALL-NEXT: vucomisd %xmm2, %xmm1 328; ALL-NEXT: sete %cl 329; ALL-NEXT: orb %al, %cl 330; ALL-NEXT: movzbl %cl, %eax 331; ALL-NEXT: retq 332entry: 333 %cmp = fcmp fast oeq double %a, 0.000000e+00 334 %cmp1 = fcmp fast oeq double %b, 0.000000e+00 335 %0 = or i1 %cmp, %cmp1 336 %conv = zext i1 %0 to i32 337 ret i32 %conv 338} 339