• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE41
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL
7
8; Odd divisor
9define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
10; CHECK-SSE2-LABEL: test_urem_odd_25:
11; CHECK-SSE2:       # %bb.0:
12; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
13; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
14; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
15; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
16; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
17; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
18; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
19; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
20; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
21; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
22; CHECK-SSE2-NEXT:    retq
23;
24; CHECK-SSE41-LABEL: test_urem_odd_25:
25; CHECK-SSE41:       # %bb.0:
26; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
27; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691]
28; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
29; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30; CHECK-SSE41-NEXT:    psrld $31, %xmm0
31; CHECK-SSE41-NEXT:    retq
32;
33; CHECK-AVX1-LABEL: test_urem_odd_25:
34; CHECK-AVX1:       # %bb.0:
35; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
36; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
37; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
38; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
39; CHECK-AVX1-NEXT:    retq
40;
41; CHECK-AVX2-LABEL: test_urem_odd_25:
42; CHECK-AVX2:       # %bb.0:
43; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
44; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
45; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691]
46; CHECK-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
47; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
48; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
49; CHECK-AVX2-NEXT:    retq
50;
51; CHECK-AVX512VL-LABEL: test_urem_odd_25:
52; CHECK-AVX512VL:       # %bb.0:
53; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
54; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
55; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
56; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
57; CHECK-AVX512VL-NEXT:    retq
58  %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
59  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
60  %ret = zext <4 x i1> %cmp to <4 x i32>
61  ret <4 x i32> %ret
62}
63
64; Even divisors
65define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
66; CHECK-SSE2-LABEL: test_urem_even_100:
67; CHECK-SSE2:       # %bb.0:
68; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
69; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
70; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
71; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
72; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
73; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
74; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
75; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
76; CHECK-SSE2-NEXT:    psrld $5, %xmm2
77; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [100,100,100,100]
78; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
79; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
80; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
81; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
82; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
83; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
84; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
85; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
86; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
87; CHECK-SSE2-NEXT:    psrld $31, %xmm0
88; CHECK-SSE2-NEXT:    retq
89;
90; CHECK-SSE41-LABEL: test_urem_even_100:
91; CHECK-SSE41:       # %bb.0:
92; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
93; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
94; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
95; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
96; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
97; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
98; CHECK-SSE41-NEXT:    psrld $5, %xmm2
99; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
100; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
101; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
102; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
103; CHECK-SSE41-NEXT:    psrld $31, %xmm0
104; CHECK-SSE41-NEXT:    retq
105;
106; CHECK-AVX1-LABEL: test_urem_even_100:
107; CHECK-AVX1:       # %bb.0:
108; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
109; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
110; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
111; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
112; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
113; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
114; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
115; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
116; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
117; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
118; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
119; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
120; CHECK-AVX1-NEXT:    retq
121;
122; CHECK-AVX2-LABEL: test_urem_even_100:
123; CHECK-AVX2:       # %bb.0:
124; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
125; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
126; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
127; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
128; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
129; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
130; CHECK-AVX2-NEXT:    vpsrld $5, %xmm1, %xmm1
131; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100]
132; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
133; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
134; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
135; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
136; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
137; CHECK-AVX2-NEXT:    retq
138;
139; CHECK-AVX512VL-LABEL: test_urem_even_100:
140; CHECK-AVX512VL:       # %bb.0:
141; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
142; CHECK-AVX512VL-NEXT:    vprord $2, %xmm0, %xmm0
143; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
144; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
145; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
146; CHECK-AVX512VL-NEXT:    retq
147  %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
148  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
149  %ret = zext <4 x i1> %cmp to <4 x i32>
150  ret <4 x i32> %ret
151}
152
153; Negative divisors should be negated, and thus this is still splat vectors.
154
155; Odd divisor
156define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
157; CHECK-SSE2-LABEL: test_urem_odd_neg25:
158; CHECK-SSE2:       # %bb.0:
159; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3264175145,1030792151,1030792151,3264175145]
160; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
161; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
162; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
163; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
164; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
165; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
166; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
167; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
168; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
169; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
170; CHECK-SSE2-NEXT:    retq
171;
172; CHECK-SSE41-LABEL: test_urem_odd_neg25:
173; CHECK-SSE41:       # %bb.0:
174; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
175; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691]
176; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
177; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
178; CHECK-SSE41-NEXT:    psrld $31, %xmm0
179; CHECK-SSE41-NEXT:    retq
180;
181; CHECK-AVX-LABEL: test_urem_odd_neg25:
182; CHECK-AVX:       # %bb.0:
183; CHECK-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
184; CHECK-AVX-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
185; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
186; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
187; CHECK-AVX-NEXT:    retq
188  %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
189  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
190  %ret = zext <4 x i1> %cmp to <4 x i32>
191  ret <4 x i32> %ret
192}
193
194; Even divisors
195define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
196; CHECK-SSE2-LABEL: test_urem_even_neg100:
197; CHECK-SSE2:       # %bb.0:
198; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
199; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
200; CHECK-SSE2-NEXT:    psrld $5, %xmm1
201; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
202; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
203; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
204; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
205; CHECK-SSE2-NEXT:    psrld $2, %xmm2
206; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
207; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
208; CHECK-SSE2-NEXT:    psrld $27, %xmm2
209; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
210; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
211; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
212; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
213; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
214; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
215; CHECK-SSE2-NEXT:    psrld $31, %xmm0
216; CHECK-SSE2-NEXT:    retq
217;
218; CHECK-SSE41-LABEL: test_urem_even_neg100:
219; CHECK-SSE41:       # %bb.0:
220; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
221; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
222; CHECK-SSE41-NEXT:    psrld $5, %xmm1
223; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm2
224; CHECK-SSE41-NEXT:    psrld $2, %xmm2
225; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm2
226; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
227; CHECK-SSE41-NEXT:    psrld $27, %xmm2
228; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
229; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
230; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
231; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
232; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
233; CHECK-SSE41-NEXT:    psrld $31, %xmm0
234; CHECK-SSE41-NEXT:    retq
235;
236; CHECK-AVX1-LABEL: test_urem_even_neg100:
237; CHECK-AVX1:       # %bb.0:
238; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
239; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
240; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
241; CHECK-AVX1-NEXT:    vpsrld $2, %xmm0, %xmm2
242; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm2, %xmm2
243; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
244; CHECK-AVX1-NEXT:    vpsrld $27, %xmm2, %xmm2
245; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
246; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
247; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
248; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
249; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
250; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
251; CHECK-AVX1-NEXT:    retq
252;
253; CHECK-AVX2-LABEL: test_urem_even_neg100:
254; CHECK-AVX2:       # %bb.0:
255; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
256; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
257; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [1374389535,1374389535,1374389535,1374389535]
258; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
259; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [536870925,536870925,536870925,536870925]
260; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
261; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
262; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
263; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
264; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
265; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
266; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
267; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
268; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
269; CHECK-AVX2-NEXT:    retq
270;
271; CHECK-AVX512VL-LABEL: test_urem_even_neg100:
272; CHECK-AVX512VL:       # %bb.0:
273; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
274; CHECK-AVX512VL-NEXT:    vprord $2, %xmm0, %xmm0
275; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
276; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
277; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
278; CHECK-AVX512VL-NEXT:    retq
279  %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
280  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
281  %ret = zext <4 x i1> %cmp to <4 x i32>
282  ret <4 x i32> %ret
283}
284
285;------------------------------------------------------------------------------;
286; Comparison constant has undef elements.
287;------------------------------------------------------------------------------;
288
289define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
290; CHECK-SSE2-LABEL: test_urem_odd_undef1:
291; CHECK-SSE2:       # %bb.0:
292; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
293; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
294; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
295; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
296; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
297; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
298; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
299; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
300; CHECK-SSE2-NEXT:    psrld $3, %xmm2
301; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [25,25,25,25]
302; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
303; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
304; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
305; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
306; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
307; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
308; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
309; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
310; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
311; CHECK-SSE2-NEXT:    psrld $31, %xmm0
312; CHECK-SSE2-NEXT:    retq
313;
314; CHECK-SSE41-LABEL: test_urem_odd_undef1:
315; CHECK-SSE41:       # %bb.0:
316; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
317; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
318; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
319; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
320; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
321; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
322; CHECK-SSE41-NEXT:    psrld $3, %xmm2
323; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
324; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
325; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
326; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
327; CHECK-SSE41-NEXT:    psrld $31, %xmm0
328; CHECK-SSE41-NEXT:    retq
329;
330; CHECK-AVX1-LABEL: test_urem_odd_undef1:
331; CHECK-AVX1:       # %bb.0:
332; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
333; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
334; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
335; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
336; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
337; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
338; CHECK-AVX1-NEXT:    vpsrld $3, %xmm1, %xmm1
339; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
340; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
341; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
342; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
343; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
344; CHECK-AVX1-NEXT:    retq
345;
346; CHECK-AVX2-LABEL: test_urem_odd_undef1:
347; CHECK-AVX2:       # %bb.0:
348; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
349; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
350; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
351; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
352; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
353; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
354; CHECK-AVX2-NEXT:    vpsrld $3, %xmm1, %xmm1
355; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [25,25,25,25]
356; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
357; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
358; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
359; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
360; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
361; CHECK-AVX2-NEXT:    retq
362;
363; CHECK-AVX512VL-LABEL: test_urem_odd_undef1:
364; CHECK-AVX512VL:       # %bb.0:
365; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
366; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
367; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
368; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
369; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
370; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
371; CHECK-AVX512VL-NEXT:    vpsrld $3, %xmm1, %xmm1
372; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
373; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
374; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
375; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
376; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
377; CHECK-AVX512VL-NEXT:    retq
378  %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
379  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0>
380  %ret = zext <4 x i1> %cmp to <4 x i32>
381  ret <4 x i32> %ret
382}
383
384define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
385; CHECK-SSE2-LABEL: test_urem_even_undef1:
386; CHECK-SSE2:       # %bb.0:
387; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
388; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
389; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
390; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
391; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
392; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
393; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
394; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
395; CHECK-SSE2-NEXT:    psrld $5, %xmm2
396; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [100,100,100,100]
397; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
398; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
399; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
400; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
401; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
402; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
403; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
404; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
405; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
406; CHECK-SSE2-NEXT:    psrld $31, %xmm0
407; CHECK-SSE2-NEXT:    retq
408;
409; CHECK-SSE41-LABEL: test_urem_even_undef1:
410; CHECK-SSE41:       # %bb.0:
411; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
412; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
413; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
414; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
415; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
416; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
417; CHECK-SSE41-NEXT:    psrld $5, %xmm2
418; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
419; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
420; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
421; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
422; CHECK-SSE41-NEXT:    psrld $31, %xmm0
423; CHECK-SSE41-NEXT:    retq
424;
425; CHECK-AVX1-LABEL: test_urem_even_undef1:
426; CHECK-AVX1:       # %bb.0:
427; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
428; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
429; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
430; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
431; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
432; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
433; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
434; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
435; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
436; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
437; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
438; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
439; CHECK-AVX1-NEXT:    retq
440;
441; CHECK-AVX2-LABEL: test_urem_even_undef1:
442; CHECK-AVX2:       # %bb.0:
443; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
444; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
445; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
446; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
447; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
448; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
449; CHECK-AVX2-NEXT:    vpsrld $5, %xmm1, %xmm1
450; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100]
451; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
452; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
453; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
454; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
455; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
456; CHECK-AVX2-NEXT:    retq
457;
458; CHECK-AVX512VL-LABEL: test_urem_even_undef1:
459; CHECK-AVX512VL:       # %bb.0:
460; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
461; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
462; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
463; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
464; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
465; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
466; CHECK-AVX512VL-NEXT:    vpsrld $5, %xmm1, %xmm1
467; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
468; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
469; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
470; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
471; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
472; CHECK-AVX512VL-NEXT:    retq
473  %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
474  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0>
475  %ret = zext <4 x i1> %cmp to <4 x i32>
476  ret <4 x i32> %ret
477}
478
479;------------------------------------------------------------------------------;
480; Negative tests
481;------------------------------------------------------------------------------;
482
483define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
484; CHECK-SSE-LABEL: test_urem_one_eq:
485; CHECK-SSE:       # %bb.0:
486; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,1,1,1]
487; CHECK-SSE-NEXT:    retq
488;
489; CHECK-AVX1-LABEL: test_urem_one_eq:
490; CHECK-AVX1:       # %bb.0:
491; CHECK-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,1,1]
492; CHECK-AVX1-NEXT:    retq
493;
494; CHECK-AVX2-LABEL: test_urem_one_eq:
495; CHECK-AVX2:       # %bb.0:
496; CHECK-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
497; CHECK-AVX2-NEXT:    retq
498;
499; CHECK-AVX512VL-LABEL: test_urem_one_eq:
500; CHECK-AVX512VL:       # %bb.0:
501; CHECK-AVX512VL-NEXT:    vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
502; CHECK-AVX512VL-NEXT:    retq
503  %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
504  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
505  %ret = zext <4 x i1> %cmp to <4 x i32>
506  ret <4 x i32> %ret
507}
508define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
509; CHECK-SSE-LABEL: test_urem_one_ne:
510; CHECK-SSE:       # %bb.0:
511; CHECK-SSE-NEXT:    xorps %xmm0, %xmm0
512; CHECK-SSE-NEXT:    retq
513;
514; CHECK-AVX-LABEL: test_urem_one_ne:
515; CHECK-AVX:       # %bb.0:
516; CHECK-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
517; CHECK-AVX-NEXT:    retq
518  %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
519  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
520  %ret = zext <4 x i1> %cmp to <4 x i32>
521  ret <4 x i32> %ret
522}
523
524; We can lower remainder of division by powers of two much better elsewhere.
525define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
526; CHECK-SSE-LABEL: test_urem_pow2:
527; CHECK-SSE:       # %bb.0:
528; CHECK-SSE-NEXT:    pand {{.*}}(%rip), %xmm0
529; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
530; CHECK-SSE-NEXT:    pcmpeqd %xmm1, %xmm0
531; CHECK-SSE-NEXT:    psrld $31, %xmm0
532; CHECK-SSE-NEXT:    retq
533;
534; CHECK-AVX1-LABEL: test_urem_pow2:
535; CHECK-AVX1:       # %bb.0:
536; CHECK-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
537; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
538; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
539; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
540; CHECK-AVX1-NEXT:    retq
541;
542; CHECK-AVX2-LABEL: test_urem_pow2:
543; CHECK-AVX2:       # %bb.0:
544; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
545; CHECK-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
546; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
547; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
548; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
549; CHECK-AVX2-NEXT:    retq
550;
551; CHECK-AVX512VL-LABEL: test_urem_pow2:
552; CHECK-AVX512VL:       # %bb.0:
553; CHECK-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
554; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
555; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
556; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
557; CHECK-AVX512VL-NEXT:    retq
558  %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
559  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
560  %ret = zext <4 x i1> %cmp to <4 x i32>
561  ret <4 x i32> %ret
562}
563
564; We could lower remainder of division by INT_MIN much better elsewhere.
565define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind {
566; CHECK-SSE-LABEL: test_urem_int_min:
567; CHECK-SSE:       # %bb.0:
568; CHECK-SSE-NEXT:    pand {{.*}}(%rip), %xmm0
569; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
570; CHECK-SSE-NEXT:    pcmpeqd %xmm1, %xmm0
571; CHECK-SSE-NEXT:    psrld $31, %xmm0
572; CHECK-SSE-NEXT:    retq
573;
574; CHECK-AVX1-LABEL: test_urem_int_min:
575; CHECK-AVX1:       # %bb.0:
576; CHECK-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
577; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
578; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
579; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
580; CHECK-AVX1-NEXT:    retq
581;
582; CHECK-AVX2-LABEL: test_urem_int_min:
583; CHECK-AVX2:       # %bb.0:
584; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
585; CHECK-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
586; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
587; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
588; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
589; CHECK-AVX2-NEXT:    retq
590;
591; CHECK-AVX512VL-LABEL: test_urem_int_min:
592; CHECK-AVX512VL:       # %bb.0:
593; CHECK-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
594; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
595; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
596; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
597; CHECK-AVX512VL-NEXT:    retq
598  %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
599  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
600  %ret = zext <4 x i1> %cmp to <4 x i32>
601  ret <4 x i32> %ret
602}
603
604; We could lower remainder of division by all-ones much better elsewhere.
605define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind {
606; CHECK-SSE2-LABEL: test_urem_allones:
607; CHECK-SSE2:       # %bb.0:
608; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
609; CHECK-SSE2-NEXT:    psubd %xmm0, %xmm1
610; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm1
611; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
612; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm1
613; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
614; CHECK-SSE2-NEXT:    retq
615;
616; CHECK-SSE41-LABEL: test_urem_allones:
617; CHECK-SSE41:       # %bb.0:
618; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
619; CHECK-SSE41-NEXT:    psubd %xmm0, %xmm1
620; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [1,1,1,1]
621; CHECK-SSE41-NEXT:    pminud %xmm1, %xmm0
622; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
623; CHECK-SSE41-NEXT:    psrld $31, %xmm0
624; CHECK-SSE41-NEXT:    retq
625;
626; CHECK-AVX1-LABEL: test_urem_allones:
627; CHECK-AVX1:       # %bb.0:
628; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
629; CHECK-AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
630; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
631; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
632; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
633; CHECK-AVX1-NEXT:    retq
634;
635; CHECK-AVX2-LABEL: test_urem_allones:
636; CHECK-AVX2:       # %bb.0:
637; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
638; CHECK-AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
639; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
640; CHECK-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
641; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
642; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
643; CHECK-AVX2-NEXT:    retq
644;
645; CHECK-AVX512VL-LABEL: test_urem_allones:
646; CHECK-AVX512VL:       # %bb.0:
647; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
648; CHECK-AVX512VL-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
649; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
650; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
651; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
652; CHECK-AVX512VL-NEXT:    retq
653  %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
654  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
655  %ret = zext <4 x i1> %cmp to <4 x i32>
656  ret <4 x i32> %ret
657}
658