• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse,+sse2,+avx,+avx2 | FileCheck %s
3
4; Given:
5;   icmp eq/ne (urem %x, C), 0
6; Iff C is not a power of two (those should not get to here though),
7; and %x may have at most one bit set, omit the 'urem':
8;   icmp eq/ne %x, 0
9
10;------------------------------------------------------------------------------;
11; Basic scalar tests
12;------------------------------------------------------------------------------;
13
14define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) {
15; CHECK-LABEL: p0_scalar_urem_by_const:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    testb $-128, %dil
18; CHECK-NEXT:    sete %al
19; CHECK-NEXT:    retq
20  %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
21  %t1 = urem i32 %t0, 6 ; '6' is clearly not a power of two
22  %t2 = icmp eq i32 %t1, 0
23  ret i1 %t2
24}
25
26define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) {
27; CHECK-LABEL: p1_scalar_urem_by_nonconst:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    testb $-128, %dil
30; CHECK-NEXT:    sete %al
31; CHECK-NEXT:    retq
32  %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
33  %t1 = or i32 %y, 6 ; two bits set, clearly not a power of two
34  %t2 = urem i32 %t0, %t1
35  %t3 = icmp eq i32 %t2, 0
36  ret i1 %t3
37}
38
39define i1 @p2_scalar_shifted_urem_by_const(i32 %x, i32 %y) {
40; CHECK-LABEL: p2_scalar_shifted_urem_by_const:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    movl %esi, %ecx
43; CHECK-NEXT:    andl $1, %edi
44; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
45; CHECK-NEXT:    shll %cl, %edi
46; CHECK-NEXT:    imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
47; CHECK-NEXT:    cmpl $1431655766, %eax # imm = 0x55555556
48; CHECK-NEXT:    setb %al
49; CHECK-NEXT:    retq
50  %t0 = and i32 %x, 1 ; clearly a power-of-two or zero
51  %t1 = shl i32 %t0, %y ; will still be a power-of-two or zero with any %y
52  %t2 = urem i32 %t1, 3 ; '3' is clearly not a power of two
53  %t3 = icmp eq i32 %t2, 0
54  ret i1 %t3
55}
56
57define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) {
58; CHECK-LABEL: p3_scalar_shifted2_urem_by_const:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    movl %esi, %ecx
61; CHECK-NEXT:    andl $2, %edi
62; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
63; CHECK-NEXT:    shll %cl, %edi
64; CHECK-NEXT:    imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
65; CHECK-NEXT:    cmpl $1431655766, %eax # imm = 0x55555556
66; CHECK-NEXT:    setb %al
67; CHECK-NEXT:    retq
68  %t0 = and i32 %x, 2 ; clearly a power-of-two or zero
69  %t1 = shl i32 %t0, %y ; will still be a power-of-two or zero with any %y
70  %t2 = urem i32 %t1, 3 ; '3' is clearly not a power of two
71  %t3 = icmp eq i32 %t2, 0
72  ret i1 %t3
73}
74
75;------------------------------------------------------------------------------;
76; Basic vector tests
77;------------------------------------------------------------------------------;
78
79define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
80; CHECK-LABEL: p4_vector_urem_by_const__splat:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
83; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
84; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
85; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
86; CHECK-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
87; CHECK-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
88; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
89; CHECK-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
90; CHECK-NEXT:    vpsrld $2, %xmm1, %xmm1
91; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
92; CHECK-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
93; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
94; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
95; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
96; CHECK-NEXT:    retq
97  %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
98  %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
99  %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
100  ret <4 x i1> %t2
101}
102
103define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
104; CHECK-LABEL: p5_vector_urem_by_const__nonsplat:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
107; CHECK-NEXT:    vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,954437177]
108; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
109; CHECK-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
110; CHECK-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
111; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
112; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
113; CHECK-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
114; CHECK-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
115; CHECK-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
116; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
117; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
118; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
119; CHECK-NEXT:    retq
120  %t0 = and <4 x i32> %x, <i32 128, i32 2, i32 4, i32 8>
121  %t1 = urem <4 x i32> %t0, <i32 3, i32 5, i32 6, i32 9>
122  %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
123  ret <4 x i1> %t2
124}
125
126define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32> %y) {
127; CHECK-LABEL: p6_vector_urem_by_const__nonsplat_undef0:
128; CHECK:       # %bb.0:
129; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
130; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
131; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
132; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
133; CHECK-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
134; CHECK-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
135; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
136; CHECK-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
137; CHECK-NEXT:    vpsrld $2, %xmm1, %xmm1
138; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
139; CHECK-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
140; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
141; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
142; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
143; CHECK-NEXT:    retq
144  %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128>
145  %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
146  %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
147  ret <4 x i1> %t2
148}
149
150define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) {
151; CHECK-LABEL: p7_vector_urem_by_const__nonsplat_undef2:
152; CHECK:       # %bb.0:
153; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
154; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
155; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
156; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
157; CHECK-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
158; CHECK-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
159; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
160; CHECK-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
161; CHECK-NEXT:    vpsrld $2, %xmm1, %xmm1
162; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
163; CHECK-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
164; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
165; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
166; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
167; CHECK-NEXT:    retq
168  %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
169  %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
170  %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
171  ret <4 x i1> %t2
172}
173
174define <4 x i1> @p8_vector_urem_by_const__nonsplat_undef3(<4 x i32> %x, <4 x i32> %y) {
175; CHECK-LABEL: p8_vector_urem_by_const__nonsplat_undef3:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
178; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
179; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
180; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
181; CHECK-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
182; CHECK-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
183; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
184; CHECK-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
185; CHECK-NEXT:    vpsrld $2, %xmm1, %xmm1
186; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
187; CHECK-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
188; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
189; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
190; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
191; CHECK-NEXT:    retq
192  %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128>
193  %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
194  %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
195  ret <4 x i1> %t2
196}
197
198;------------------------------------------------------------------------------;
199; Basic negative tests
200;------------------------------------------------------------------------------;
201
202define i1 @n0_urem_of_maybe_not_power_of_two(i32 %x, i32 %y) {
203; CHECK-LABEL: n0_urem_of_maybe_not_power_of_two:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    andl $3, %edi
206; CHECK-NEXT:    imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
207; CHECK-NEXT:    cmpl $1431655766, %eax # imm = 0x55555556
208; CHECK-NEXT:    setb %al
209; CHECK-NEXT:    retq
210  %t0 = and i32 %x, 3 ; up to two bits set, not power-of-two
211  %t1 = urem i32 %t0, 3
212  %t2 = icmp eq i32 %t1, 0
213  ret i1 %t2
214}
215
216define i1 @n1_urem_by_maybe_power_of_two(i32 %x, i32 %y) {
217; CHECK-LABEL: n1_urem_by_maybe_power_of_two:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    movl %edi, %eax
220; CHECK-NEXT:    andl $128, %eax
221; CHECK-NEXT:    orl $1, %esi
222; CHECK-NEXT:    xorl %edx, %edx
223; CHECK-NEXT:    divl %esi
224; CHECK-NEXT:    testl %edx, %edx
225; CHECK-NEXT:    sete %al
226; CHECK-NEXT:    retq
227  %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
228  %t1 = or i32 %y, 1 ; one low bit set, may be a power of two
229  %t2 = urem i32 %t0, %t1
230  %t3 = icmp eq i32 %t2, 0
231  ret i1 %t3
232}
233