1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse,+sse2,+avx,+avx2 | FileCheck %s 3 4; Given: 5; icmp eq/ne (urem %x, C), 0 6; Iff C is not a power of two (those should not get to here though), 7; and %x may have at most one bit set, omit the 'urem': 8; icmp eq/ne %x, 0 9 10;------------------------------------------------------------------------------; 11; Basic scalar tests 12;------------------------------------------------------------------------------; 13 14define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) { 15; CHECK-LABEL: p0_scalar_urem_by_const: 16; CHECK: # %bb.0: 17; CHECK-NEXT: testb $-128, %dil 18; CHECK-NEXT: sete %al 19; CHECK-NEXT: retq 20 %t0 = and i32 %x, 128 ; clearly a power-of-two or zero 21 %t1 = urem i32 %t0, 6 ; '6' is clearly not a power of two 22 %t2 = icmp eq i32 %t1, 0 23 ret i1 %t2 24} 25 26define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) { 27; CHECK-LABEL: p1_scalar_urem_by_nonconst: 28; CHECK: # %bb.0: 29; CHECK-NEXT: testb $-128, %dil 30; CHECK-NEXT: sete %al 31; CHECK-NEXT: retq 32 %t0 = and i32 %x, 128 ; clearly a power-of-two or zero 33 %t1 = or i32 %y, 6 ; two bits set, clearly not a power of two 34 %t2 = urem i32 %t0, %t1 35 %t3 = icmp eq i32 %t2, 0 36 ret i1 %t3 37} 38 39define i1 @p2_scalar_shifted_urem_by_const(i32 %x, i32 %y) { 40; CHECK-LABEL: p2_scalar_shifted_urem_by_const: 41; CHECK: # %bb.0: 42; CHECK-NEXT: movl %esi, %ecx 43; CHECK-NEXT: andl $1, %edi 44; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx 45; CHECK-NEXT: shll %cl, %edi 46; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB 47; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556 48; CHECK-NEXT: setb %al 49; CHECK-NEXT: retq 50 %t0 = and i32 %x, 1 ; clearly a power-of-two or zero 51 %t1 = shl i32 %t0, %y ; will still be a power-of-two or zero with any %y 52 %t2 = urem i32 %t1, 3 ; '3' is clearly not a power of two 53 %t3 = icmp eq i32 %t2, 0 54 ret i1 %t3 55} 56 57define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) { 58; CHECK-LABEL: p3_scalar_shifted2_urem_by_const: 59; CHECK: # %bb.0: 60; CHECK-NEXT: movl %esi, %ecx 61; CHECK-NEXT: andl $2, %edi 62; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx 63; CHECK-NEXT: shll %cl, %edi 64; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB 65; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556 66; CHECK-NEXT: setb %al 67; CHECK-NEXT: retq 68 %t0 = and i32 %x, 2 ; clearly a power-of-two or zero 69 %t1 = shl i32 %t0, %y ; will still be a power-of-two or zero with any %y 70 %t2 = urem i32 %t1, 3 ; '3' is clearly not a power of two 71 %t3 = icmp eq i32 %t2, 0 72 ret i1 %t3 73} 74 75;------------------------------------------------------------------------------; 76; Basic vector tests 77;------------------------------------------------------------------------------; 78 79define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { 80; CHECK-LABEL: p4_vector_urem_by_const__splat: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128] 83; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 84; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 85; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] 86; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 87; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 88; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 89; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 90; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1 91; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] 92; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1 93; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 94; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 95; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 96; CHECK-NEXT: retq 97 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero 98 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two 99 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0> 100 ret <4 x i1> %t2 101} 102 103define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) { 104; CHECK-LABEL: p5_vector_urem_by_const__nonsplat: 105; CHECK: # %bb.0: 106; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 107; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,954437177] 108; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 109; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 110; CHECK-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 111; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 112; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 113; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 114; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 115; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 116; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 117; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 118; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 119; CHECK-NEXT: retq 120 %t0 = and <4 x i32> %x, <i32 128, i32 2, i32 4, i32 8> 121 %t1 = urem <4 x i32> %t0, <i32 3, i32 5, i32 6, i32 9> 122 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0> 123 ret <4 x i1> %t2 124} 125 126define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32> %y) { 127; CHECK-LABEL: p6_vector_urem_by_const__nonsplat_undef0: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128] 130; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 131; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 132; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] 133; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 134; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 135; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 136; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 137; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1 138; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] 139; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1 140; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 141; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 142; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 143; CHECK-NEXT: retq 144 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128> 145 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two 146 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0> 147 ret <4 x i1> %t2 148} 149 150define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) { 151; CHECK-LABEL: p7_vector_urem_by_const__nonsplat_undef2: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128] 154; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 155; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 156; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] 157; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 158; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 159; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 160; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 161; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1 162; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] 163; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1 164; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 165; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 166; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 167; CHECK-NEXT: retq 168 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero 169 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two 170 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0> 171 ret <4 x i1> %t2 172} 173 174define <4 x i1> @p8_vector_urem_by_const__nonsplat_undef3(<4 x i32> %x, <4 x i32> %y) { 175; CHECK-LABEL: p8_vector_urem_by_const__nonsplat_undef3: 176; CHECK: # %bb.0: 177; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128] 178; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 179; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 180; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] 181; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 182; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 183; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 184; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 185; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1 186; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] 187; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1 188; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 189; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 190; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 191; CHECK-NEXT: retq 192 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128> 193 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two 194 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0> 195 ret <4 x i1> %t2 196} 197 198;------------------------------------------------------------------------------; 199; Basic negative tests 200;------------------------------------------------------------------------------; 201 202define i1 @n0_urem_of_maybe_not_power_of_two(i32 %x, i32 %y) { 203; CHECK-LABEL: n0_urem_of_maybe_not_power_of_two: 204; CHECK: # %bb.0: 205; CHECK-NEXT: andl $3, %edi 206; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB 207; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556 208; CHECK-NEXT: setb %al 209; CHECK-NEXT: retq 210 %t0 = and i32 %x, 3 ; up to two bits set, not power-of-two 211 %t1 = urem i32 %t0, 3 212 %t2 = icmp eq i32 %t1, 0 213 ret i1 %t2 214} 215 216define i1 @n1_urem_by_maybe_power_of_two(i32 %x, i32 %y) { 217; CHECK-LABEL: n1_urem_by_maybe_power_of_two: 218; CHECK: # %bb.0: 219; CHECK-NEXT: movl %edi, %eax 220; CHECK-NEXT: andl $128, %eax 221; CHECK-NEXT: orl $1, %esi 222; CHECK-NEXT: xorl %edx, %edx 223; CHECK-NEXT: divl %esi 224; CHECK-NEXT: testl %edx, %edx 225; CHECK-NEXT: sete %al 226; CHECK-NEXT: retq 227 %t0 = and i32 %x, 128 ; clearly a power-of-two or zero 228 %t1 = or i32 %y, 1 ; one low bit set, may be a power of two 229 %t2 = urem i32 %t0, %t1 230 %t3 = icmp eq i32 %t2, 0 231 ret i1 %t3 232} 233