• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL
7
8; Odd+Even divisors
9define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
10; CHECK-SSE2-LABEL: test_urem_odd_even:
11; CHECK-SSE2:       # %bb.0:
12; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2454267027,1374389535,1374389535]
13; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
14; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
15; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
16; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
17; CHECK-SSE2-NEXT:    psrld $1, %xmm3
18; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
19; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
20; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
21; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
22; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
23; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
24; CHECK-SSE2-NEXT:    psrld $2, %xmm3
25; CHECK-SSE2-NEXT:    psrld $3, %xmm2
26; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
27; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [5,14,25,100]
28; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
29; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
30; CHECK-SSE2-NEXT:    psrld $5, %xmm1
31; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
32; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
33; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
34; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
35; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
36; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
37; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
38; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
39; CHECK-SSE2-NEXT:    psrld $31, %xmm0
40; CHECK-SSE2-NEXT:    retq
41;
42; CHECK-SSE41-LABEL: test_urem_odd_even:
43; CHECK-SSE41:       # %bb.0:
44; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
45; CHECK-SSE41-NEXT:    psrld $1, %xmm1
46; CHECK-SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
47; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,2454267027,1374389535,1374389535]
48; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
49; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
50; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
51; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
52; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
53; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
54; CHECK-SSE41-NEXT:    psrld $2, %xmm2
55; CHECK-SSE41-NEXT:    psrld $5, %xmm3
56; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7]
57; CHECK-SSE41-NEXT:    psrld $3, %xmm1
58; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
59; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
60; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
61; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
62; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
63; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
64; CHECK-SSE41-NEXT:    psrld $31, %xmm0
65; CHECK-SSE41-NEXT:    retq
66;
67; CHECK-AVX1-LABEL: test_urem_odd_even:
68; CHECK-AVX1:       # %bb.0:
69; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
70; CHECK-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
71; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,1374389535,1374389535]
72; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
73; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
74; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
75; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
76; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
77; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
78; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
79; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
80; CHECK-AVX1-NEXT:    vpsrld $3, %xmm2, %xmm2
81; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
82; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
83; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
84; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
85; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
86; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
87; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
88; CHECK-AVX1-NEXT:    retq
89;
90; CHECK-AVX2-LABEL: test_urem_odd_even:
91; CHECK-AVX2:       # %bb.0:
92; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,1374389535,1374389535]
93; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
94; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
95; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
96; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
97; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
98; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
99; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
100; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
101; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
102; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
103; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
104; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
105; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
106; CHECK-AVX2-NEXT:    retq
107;
108; CHECK-AVX512VL-LABEL: test_urem_odd_even:
109; CHECK-AVX512VL:       # %bb.0:
110; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
111; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
112; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
113; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
114; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
115; CHECK-AVX512VL-NEXT:    retq
116  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
117  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
118  %ret = zext <4 x i1> %cmp to <4 x i32>
119  ret <4 x i32> %ret
120}
121
122;==============================================================================;
123
124; One all-ones divisor in odd divisor
125define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind {
126; CHECK-SSE2-LABEL: test_urem_odd_allones_eq:
127; CHECK-SSE2:       # %bb.0:
128; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
129; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm0
130; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
131; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
132; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
133; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
134; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
135; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
136; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
137; CHECK-SSE2-NEXT:    retq
138;
139; CHECK-SSE41-LABEL: test_urem_odd_allones_eq:
140; CHECK-SSE41:       # %bb.0:
141; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
142; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459]
143; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
144; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
145; CHECK-SSE41-NEXT:    psrld $31, %xmm0
146; CHECK-SSE41-NEXT:    retq
147;
148; CHECK-AVX-LABEL: test_urem_odd_allones_eq:
149; CHECK-AVX:       # %bb.0:
150; CHECK-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
151; CHECK-AVX-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
152; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
153; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
154; CHECK-AVX-NEXT:    retq
155  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
156  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
157  %ret = zext <4 x i1> %cmp to <4 x i32>
158  ret <4 x i32> %ret
159}
160define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind {
161; CHECK-SSE2-LABEL: test_urem_odd_allones_ne:
162; CHECK-SSE2:       # %bb.0:
163; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
164; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm0
165; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
166; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
167; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
168; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
169; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
170; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
171; CHECK-SSE2-NEXT:    psrld $31, %xmm0
172; CHECK-SSE2-NEXT:    retq
173;
174; CHECK-SSE41-LABEL: test_urem_odd_allones_ne:
175; CHECK-SSE41:       # %bb.0:
176; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
177; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993460,858993460,2,858993460]
178; CHECK-SSE41-NEXT:    pmaxud %xmm0, %xmm1
179; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
180; CHECK-SSE41-NEXT:    psrld $31, %xmm0
181; CHECK-SSE41-NEXT:    retq
182;
183; CHECK-AVX-LABEL: test_urem_odd_allones_ne:
184; CHECK-AVX:       # %bb.0:
185; CHECK-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
186; CHECK-AVX-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm1
187; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
188; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
189; CHECK-AVX-NEXT:    retq
190  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
191  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
192  %ret = zext <4 x i1> %cmp to <4 x i32>
193  ret <4 x i32> %ret
194}
195
196; One all-ones divisor in even divisor
197define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
198; CHECK-SSE2-LABEL: test_urem_even_allones_eq:
199; CHECK-SSE2:       # %bb.0:
200; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
201; CHECK-SSE2-NEXT:    psrld $1, %xmm1
202; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
203; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
204; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
205; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
206; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
207; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
208; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
209; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
210; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
211; CHECK-SSE2-NEXT:    psrld $2, %xmm1
212; CHECK-SSE2-NEXT:    psrld $31, %xmm2
213; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
214; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
215; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
216; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
217; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
218; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
219; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
220; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
221; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
222; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
223; CHECK-SSE2-NEXT:    psrld $31, %xmm0
224; CHECK-SSE2-NEXT:    retq
225;
226; CHECK-SSE41-LABEL: test_urem_even_allones_eq:
227; CHECK-SSE41:       # %bb.0:
228; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
229; CHECK-SSE41-NEXT:    psrld $1, %xmm1
230; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
231; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
232; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
233; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
234; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm2
235; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
236; CHECK-SSE41-NEXT:    psrld $31, %xmm1
237; CHECK-SSE41-NEXT:    psrld $2, %xmm2
238; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
239; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
240; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
241; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
242; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
243; CHECK-SSE41-NEXT:    psrld $31, %xmm0
244; CHECK-SSE41-NEXT:    retq
245;
246; CHECK-AVX1-LABEL: test_urem_even_allones_eq:
247; CHECK-AVX1:       # %bb.0:
248; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
249; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
250; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm2, %xmm2
251; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
252; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm3
253; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
254; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
255; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
256; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
257; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
258; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
259; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
260; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
261; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
262; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
263; CHECK-AVX1-NEXT:    retq
264;
265; CHECK-AVX2-LABEL: test_urem_even_allones_eq:
266; CHECK-AVX2:       # %bb.0:
267; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
268; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
269; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
270; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
271; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
272; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
273; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
274; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
275; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
276; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
277; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
278; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
279; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
280; CHECK-AVX2-NEXT:    retq
281;
282; CHECK-AVX512VL-LABEL: test_urem_even_allones_eq:
283; CHECK-AVX512VL:       # %bb.0:
284; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
285; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
286; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
287; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
288; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
289; CHECK-AVX512VL-NEXT:    retq
290  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
291  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
292  %ret = zext <4 x i1> %cmp to <4 x i32>
293  ret <4 x i32> %ret
294}
295define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
296; CHECK-SSE2-LABEL: test_urem_even_allones_ne:
297; CHECK-SSE2:       # %bb.0:
298; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
299; CHECK-SSE2-NEXT:    psrld $1, %xmm1
300; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
301; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
302; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
303; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
304; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
305; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
306; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
307; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
308; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
309; CHECK-SSE2-NEXT:    psrld $2, %xmm1
310; CHECK-SSE2-NEXT:    psrld $31, %xmm2
311; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
312; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
313; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
314; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
315; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
316; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
317; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
318; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
319; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
320; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
321; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
322; CHECK-SSE2-NEXT:    retq
323;
324; CHECK-SSE41-LABEL: test_urem_even_allones_ne:
325; CHECK-SSE41:       # %bb.0:
326; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
327; CHECK-SSE41-NEXT:    psrld $1, %xmm1
328; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
329; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
330; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
331; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
332; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm2
333; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
334; CHECK-SSE41-NEXT:    psrld $31, %xmm1
335; CHECK-SSE41-NEXT:    psrld $2, %xmm2
336; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
337; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
338; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
339; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
340; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
341; CHECK-SSE41-NEXT:    pandn {{.*}}(%rip), %xmm0
342; CHECK-SSE41-NEXT:    retq
343;
344; CHECK-AVX1-LABEL: test_urem_even_allones_ne:
345; CHECK-AVX1:       # %bb.0:
346; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
347; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
348; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm2, %xmm2
349; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
350; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm3
351; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
352; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
353; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
354; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
355; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
356; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
357; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
358; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
359; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
360; CHECK-AVX1-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
361; CHECK-AVX1-NEXT:    retq
362;
363; CHECK-AVX2-LABEL: test_urem_even_allones_ne:
364; CHECK-AVX2:       # %bb.0:
365; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
366; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
367; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
368; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
369; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
370; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
371; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
372; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
373; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
374; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
375; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
376; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
377; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
378; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
379; CHECK-AVX2-NEXT:    retq
380;
381; CHECK-AVX512VL-LABEL: test_urem_even_allones_ne:
382; CHECK-AVX512VL:       # %bb.0:
383; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
384; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
385; CHECK-AVX512VL-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm1
386; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
387; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
388; CHECK-AVX512VL-NEXT:    retq
389  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
390  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
391  %ret = zext <4 x i1> %cmp to <4 x i32>
392  ret <4 x i32> %ret
393}
394
395; One all-ones divisor in odd+even divisor
396define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
397; CHECK-SSE2-LABEL: test_urem_odd_even_allones_eq:
398; CHECK-SSE2:       # %bb.0:
399; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535]
400; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
401; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
402; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
403; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
404; CHECK-SSE2-NEXT:    psrld $1, %xmm3
405; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
406; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
407; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
408; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
409; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
410; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
411; CHECK-SSE2-NEXT:    psrld $2, %xmm3
412; CHECK-SSE2-NEXT:    psrld $31, %xmm2
413; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
414; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [5,14,4294967295,100]
415; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
416; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
417; CHECK-SSE2-NEXT:    psrld $5, %xmm1
418; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
419; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
420; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
421; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
422; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
423; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
424; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
425; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
426; CHECK-SSE2-NEXT:    psrld $31, %xmm0
427; CHECK-SSE2-NEXT:    retq
428;
429; CHECK-SSE41-LABEL: test_urem_odd_even_allones_eq:
430; CHECK-SSE41:       # %bb.0:
431; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
432; CHECK-SSE41-NEXT:    psrld $1, %xmm1
433; CHECK-SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
434; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535]
435; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
436; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
437; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
438; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
439; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
440; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
441; CHECK-SSE41-NEXT:    psrld $2, %xmm2
442; CHECK-SSE41-NEXT:    psrld $5, %xmm3
443; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7]
444; CHECK-SSE41-NEXT:    psrld $31, %xmm1
445; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
446; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
447; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
448; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
449; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
450; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
451; CHECK-SSE41-NEXT:    psrld $31, %xmm0
452; CHECK-SSE41-NEXT:    retq
453;
454; CHECK-AVX1-LABEL: test_urem_odd_even_allones_eq:
455; CHECK-AVX1:       # %bb.0:
456; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
457; CHECK-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
458; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535]
459; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
460; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
461; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
462; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
463; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
464; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
465; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
466; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
467; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
468; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
469; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
470; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
471; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
472; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
473; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
474; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
475; CHECK-AVX1-NEXT:    retq
476;
477; CHECK-AVX2-LABEL: test_urem_odd_even_allones_eq:
478; CHECK-AVX2:       # %bb.0:
479; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535]
480; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
481; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
482; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
483; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
484; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
485; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
486; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
487; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
488; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
489; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
490; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
491; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
492; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
493; CHECK-AVX2-NEXT:    retq
494;
495; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_eq:
496; CHECK-AVX512VL:       # %bb.0:
497; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
498; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
499; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
500; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
501; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
502; CHECK-AVX512VL-NEXT:    retq
503  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
504  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
505  %ret = zext <4 x i1> %cmp to <4 x i32>
506  ret <4 x i32> %ret
507}
508define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
509; CHECK-SSE2-LABEL: test_urem_odd_even_allones_ne:
510; CHECK-SSE2:       # %bb.0:
511; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535]
512; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
513; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
514; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
515; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
516; CHECK-SSE2-NEXT:    psrld $1, %xmm3
517; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
518; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
519; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
520; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
521; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
522; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
523; CHECK-SSE2-NEXT:    psrld $2, %xmm3
524; CHECK-SSE2-NEXT:    psrld $31, %xmm2
525; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
526; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [5,14,4294967295,100]
527; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
528; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
529; CHECK-SSE2-NEXT:    psrld $5, %xmm1
530; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
531; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
532; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
533; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
534; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
535; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
536; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
537; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
538; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
539; CHECK-SSE2-NEXT:    retq
540;
541; CHECK-SSE41-LABEL: test_urem_odd_even_allones_ne:
542; CHECK-SSE41:       # %bb.0:
543; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
544; CHECK-SSE41-NEXT:    psrld $1, %xmm1
545; CHECK-SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
546; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535]
547; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
548; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
549; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
550; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
551; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
552; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
553; CHECK-SSE41-NEXT:    psrld $2, %xmm2
554; CHECK-SSE41-NEXT:    psrld $5, %xmm3
555; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7]
556; CHECK-SSE41-NEXT:    psrld $31, %xmm1
557; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
558; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
559; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
560; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
561; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
562; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
563; CHECK-SSE41-NEXT:    pandn {{.*}}(%rip), %xmm0
564; CHECK-SSE41-NEXT:    retq
565;
566; CHECK-AVX1-LABEL: test_urem_odd_even_allones_ne:
567; CHECK-AVX1:       # %bb.0:
568; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
569; CHECK-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
570; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535]
571; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
572; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
573; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
574; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
575; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
576; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
577; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
578; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
579; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
580; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
581; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
582; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
583; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
584; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
585; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
586; CHECK-AVX1-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
587; CHECK-AVX1-NEXT:    retq
588;
589; CHECK-AVX2-LABEL: test_urem_odd_even_allones_ne:
590; CHECK-AVX2:       # %bb.0:
591; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535]
592; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
593; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
594; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
595; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
596; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
597; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
598; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
599; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
600; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
601; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
602; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
603; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
604; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
605; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
606; CHECK-AVX2-NEXT:    retq
607;
608; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_ne:
609; CHECK-AVX512VL:       # %bb.0:
610; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
611; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
612; CHECK-AVX512VL-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm1
613; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
614; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
615; CHECK-AVX512VL-NEXT:    retq
616  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
617  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
618  %ret = zext <4 x i1> %cmp to <4 x i32>
619  ret <4 x i32> %ret
620}
621
622;------------------------------------------------------------------------------;
623
624; One power-of-two divisor in odd divisor
625define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
626; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo:
627; CHECK-SSE2:       # %bb.0:
628; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = <3435973837,u,268435456,u>
629; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
630; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
631; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
632; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
633; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
634; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
635; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm2
636; CHECK-SSE2-NEXT:    psrld $2, %xmm2
637; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
638; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
639; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
640; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
641; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
642; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
643; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
644; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
645; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
646; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
647; CHECK-SSE2-NEXT:    psrld $31, %xmm0
648; CHECK-SSE2-NEXT:    retq
649;
650; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo:
651; CHECK-SSE41:       # %bb.0:
652; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
653; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
654; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <3435973837,u,268435456,u>
655; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
656; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
657; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
658; CHECK-SSE41-NEXT:    psrld $2, %xmm1
659; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
660; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
661; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
662; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
663; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
664; CHECK-SSE41-NEXT:    psrld $31, %xmm0
665; CHECK-SSE41-NEXT:    retq
666;
667; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo:
668; CHECK-AVX1:       # %bb.0:
669; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
670; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
671; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm2
672; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
673; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
674; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
675; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
676; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
677; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
678; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
679; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
680; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
681; CHECK-AVX1-NEXT:    retq
682;
683; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo:
684; CHECK-AVX2:       # %bb.0:
685; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
686; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
687; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
688; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm2
689; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
690; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
691; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
692; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
693; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
694; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
695; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
696; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
697; CHECK-AVX2-NEXT:    retq
698;
699; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo:
700; CHECK-AVX512VL:       # %bb.0:
701; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
702; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
703; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
704; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
705; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
706; CHECK-AVX512VL-NEXT:    retq
707  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
708  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
709  %ret = zext <4 x i1> %cmp to <4 x i32>
710  ret <4 x i32> %ret
711}
712
713; One power-of-two divisor in even divisor
714define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
715; CHECK-SSE2-LABEL: test_urem_even_poweroftwo:
716; CHECK-SSE2:       # %bb.0:
717; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
718; CHECK-SSE2-NEXT:    psrld $1, %xmm1
719; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
720; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
721; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
722; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
723; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
724; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
725; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
726; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
727; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
728; CHECK-SSE2-NEXT:    psrld $2, %xmm1
729; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
730; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
731; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
732; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
733; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
734; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
735; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
736; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
737; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
738; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
739; CHECK-SSE2-NEXT:    psrld $31, %xmm0
740; CHECK-SSE2-NEXT:    retq
741;
742; CHECK-SSE41-LABEL: test_urem_even_poweroftwo:
743; CHECK-SSE41:       # %bb.0:
744; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
745; CHECK-SSE41-NEXT:    psrld $1, %xmm1
746; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
747; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
748; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
749; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
750; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm2
751; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
752; CHECK-SSE41-NEXT:    psrld $2, %xmm2
753; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
754; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
755; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
756; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
757; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
758; CHECK-SSE41-NEXT:    psrld $31, %xmm0
759; CHECK-SSE41-NEXT:    retq
760;
761; CHECK-AVX1-LABEL: test_urem_even_poweroftwo:
762; CHECK-AVX1:       # %bb.0:
763; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
764; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
765; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm2, %xmm2
766; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
767; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
768; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
769; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
770; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
771; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
772; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
773; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
774; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
775; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
776; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
777; CHECK-AVX1-NEXT:    retq
778;
779; CHECK-AVX2-LABEL: test_urem_even_poweroftwo:
780; CHECK-AVX2:       # %bb.0:
781; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
782; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
783; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
784; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
785; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
786; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
787; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
788; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
789; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
790; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
791; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
792; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
793; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
794; CHECK-AVX2-NEXT:    retq
795;
796; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo:
797; CHECK-AVX512VL:       # %bb.0:
798; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
799; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
800; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
801; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
802; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
803; CHECK-AVX512VL-NEXT:    retq
804  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
805  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
806  %ret = zext <4 x i1> %cmp to <4 x i32>
807  ret <4 x i32> %ret
808}
809
810; One power-of-two divisor in odd+even divisor
811define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
812; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo:
813; CHECK-SSE2:       # %bb.0:
814; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2454267027,268435456,1374389535]
815; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
816; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
817; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
818; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
819; CHECK-SSE2-NEXT:    psrld $1, %xmm3
820; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
821; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
822; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
823; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
824; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
825; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
826; CHECK-SSE2-NEXT:    psrld $2, %xmm3
827; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
828; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [5,14,16,100]
829; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
830; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
831; CHECK-SSE2-NEXT:    psrld $5, %xmm1
832; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
833; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
834; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
835; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
836; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
837; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
838; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
839; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
840; CHECK-SSE2-NEXT:    psrld $31, %xmm0
841; CHECK-SSE2-NEXT:    retq
842;
843; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo:
844; CHECK-SSE41:       # %bb.0:
845; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
846; CHECK-SSE41-NEXT:    psrld $1, %xmm1
847; CHECK-SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
848; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,2454267027,268435456,1374389535]
849; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
850; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
851; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
852; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
853; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
854; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
855; CHECK-SSE41-NEXT:    psrld $2, %xmm2
856; CHECK-SSE41-NEXT:    psrld $5, %xmm3
857; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7]
858; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
859; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
860; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
861; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
862; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
863; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
864; CHECK-SSE41-NEXT:    psrld $31, %xmm0
865; CHECK-SSE41-NEXT:    retq
866;
867; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo:
868; CHECK-AVX1:       # %bb.0:
869; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
870; CHECK-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
871; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,268435456,1374389535]
872; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
873; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
874; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
875; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
876; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
877; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
878; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
879; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
880; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
881; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
882; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
883; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
884; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
885; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
886; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
887; CHECK-AVX1-NEXT:    retq
888;
889; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo:
890; CHECK-AVX2:       # %bb.0:
891; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,268435456,1374389535]
892; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
893; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
894; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
895; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
896; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
897; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
898; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
899; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
900; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
901; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
902; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
903; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
904; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
905; CHECK-AVX2-NEXT:    retq
906;
907; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo:
908; CHECK-AVX512VL:       # %bb.0:
909; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
910; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
911; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
912; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
913; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
914; CHECK-AVX512VL-NEXT:    retq
915  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
916  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
917  %ret = zext <4 x i1> %cmp to <4 x i32>
918  ret <4 x i32> %ret
919}
920
921;------------------------------------------------------------------------------;
922
923; One one divisor in odd divisor
924define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
925; CHECK-SSE2-LABEL: test_urem_odd_one:
926; CHECK-SSE2:       # %bb.0:
927; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
928; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
929; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
930; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
931; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
932; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
933; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
934; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
935; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
936; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
937; CHECK-SSE2-NEXT:    retq
938;
939; CHECK-SSE41-LABEL: test_urem_odd_one:
940; CHECK-SSE41:       # %bb.0:
941; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
942; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459]
943; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
944; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
945; CHECK-SSE41-NEXT:    psrld $31, %xmm0
946; CHECK-SSE41-NEXT:    retq
947;
948; CHECK-AVX1-LABEL: test_urem_odd_one:
949; CHECK-AVX1:       # %bb.0:
950; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
951; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
952; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
953; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
954; CHECK-AVX1-NEXT:    retq
955;
956; CHECK-AVX2-LABEL: test_urem_odd_one:
957; CHECK-AVX2:       # %bb.0:
958; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
959; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
960; CHECK-AVX2-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
961; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
962; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
963; CHECK-AVX2-NEXT:    retq
964;
965; CHECK-AVX512VL-LABEL: test_urem_odd_one:
966; CHECK-AVX512VL:       # %bb.0:
967; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
968; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
969; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
970; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
971; CHECK-AVX512VL-NEXT:    retq
972  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5>
973  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
974  %ret = zext <4 x i1> %cmp to <4 x i32>
975  ret <4 x i32> %ret
976}
977
978; One one divisor in even divisor
979define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
980; CHECK-SSE2-LABEL: test_urem_even_one:
981; CHECK-SSE2:       # %bb.0:
982; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
983; CHECK-SSE2-NEXT:    psrld $1, %xmm1
984; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
985; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
986; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
987; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
988; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
989; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
990; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
991; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
992; CHECK-SSE2-NEXT:    psrld $2, %xmm2
993; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
994; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
995; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
996; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3]
997; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
998; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
999; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1000; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1001; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1002; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1003; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1004; CHECK-SSE2-NEXT:    retq
1005;
1006; CHECK-SSE41-LABEL: test_urem_even_one:
1007; CHECK-SSE41:       # %bb.0:
1008; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1009; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1010; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1011; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1012; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1013; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1014; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1015; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1016; CHECK-SSE41-NEXT:    psrld $2, %xmm2
1017; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
1018; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1019; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1020; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1021; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1022; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1023; CHECK-SSE41-NEXT:    retq
1024;
1025; CHECK-AVX1-LABEL: test_urem_even_one:
1026; CHECK-AVX1:       # %bb.0:
1027; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1028; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1029; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm2, %xmm2
1030; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1031; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1032; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
1033; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1034; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
1035; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1036; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1037; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1038; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1039; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1040; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1041; CHECK-AVX1-NEXT:    retq
1042;
1043; CHECK-AVX2-LABEL: test_urem_even_one:
1044; CHECK-AVX2:       # %bb.0:
1045; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
1046; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1047; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
1048; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
1049; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
1050; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1051; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1052; CHECK-AVX2-NEXT:    vpsrld $2, %xmm1, %xmm1
1053; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
1054; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1055; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1056; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1057; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1058; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1059; CHECK-AVX2-NEXT:    retq
1060;
1061; CHECK-AVX512VL-LABEL: test_urem_even_one:
1062; CHECK-AVX512VL:       # %bb.0:
1063; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
1064; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
1065; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1066; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1067; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1068; CHECK-AVX512VL-NEXT:    retq
1069  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
1070  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1071  %ret = zext <4 x i1> %cmp to <4 x i32>
1072  ret <4 x i32> %ret
1073}
1074
1075; One one divisor in odd+even divisor
1076define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
1077; CHECK-SSE2-LABEL: test_urem_odd_even_one:
1078; CHECK-SSE2:       # %bb.0:
1079; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2454267027,0,1374389535]
1080; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1081; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1082; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1083; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
1084; CHECK-SSE2-NEXT:    psrld $1, %xmm3
1085; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
1086; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1087; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1088; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1089; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1090; CHECK-SSE2-NEXT:    psrld $2, %xmm2
1091; CHECK-SSE2-NEXT:    psrld $5, %xmm1
1092; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm3
1093; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
1094; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3]
1095; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [5,14,1,100]
1096; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
1097; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm4
1098; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
1099; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
1100; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
1101; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1102; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
1103; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1104; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1105; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1106; CHECK-SSE2-NEXT:    retq
1107;
1108; CHECK-SSE41-LABEL: test_urem_odd_even_one:
1109; CHECK-SSE41:       # %bb.0:
1110; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1111; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1112; CHECK-SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
1113; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,2454267027,0,1374389535]
1114; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1115; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
1116; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
1117; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1118; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1119; CHECK-SSE41-NEXT:    psrld $2, %xmm1
1120; CHECK-SSE41-NEXT:    psrld $5, %xmm3
1121; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
1122; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm0[4,5],xmm3[6,7]
1123; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
1124; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
1125; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1126; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1127; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1128; CHECK-SSE41-NEXT:    retq
1129;
1130; CHECK-AVX1-LABEL: test_urem_odd_even_one:
1131; CHECK-AVX1:       # %bb.0:
1132; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1133; CHECK-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
1134; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,0,1374389535]
1135; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1136; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
1137; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
1138; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1139; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1140; CHECK-AVX1-NEXT:    vpsrld $2, %xmm2, %xmm2
1141; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
1142; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1143; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1144; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1145; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1146; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1147; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1148; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1149; CHECK-AVX1-NEXT:    retq
1150;
1151; CHECK-AVX2-LABEL: test_urem_odd_even_one:
1152; CHECK-AVX2:       # %bb.0:
1153; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,0,1374389535]
1154; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1155; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
1156; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1157; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
1158; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
1159; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1160; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1161; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1162; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
1163; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1164; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1165; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1166; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1167; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1168; CHECK-AVX2-NEXT:    retq
1169;
1170; CHECK-AVX512VL-LABEL: test_urem_odd_even_one:
1171; CHECK-AVX512VL:       # %bb.0:
1172; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1173; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1174; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1175; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1176; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1177; CHECK-AVX512VL-NEXT:    retq
1178  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100>
1179  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1180  %ret = zext <4 x i1> %cmp to <4 x i32>
1181  ret <4 x i32> %ret
1182}
1183
1184;------------------------------------------------------------------------------;
1185
1186; One INT_MIN divisor in odd divisor
1187define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
1188; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN:
1189; CHECK-SSE2:       # %bb.0:
1190; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = <3435973837,u,2,u>
1191; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1192; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1193; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1194; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1195; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1196; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1197; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm2
1198; CHECK-SSE2-NEXT:    psrld $2, %xmm2
1199; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
1200; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1201; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1202; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1203; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1204; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1205; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1206; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
1207; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1208; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1209; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1210; CHECK-SSE2-NEXT:    retq
1211;
1212; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN:
1213; CHECK-SSE41:       # %bb.0:
1214; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1215; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1216; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <3435973837,u,2,u>
1217; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
1218; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1219; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1220; CHECK-SSE41-NEXT:    psrld $2, %xmm1
1221; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
1222; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
1223; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
1224; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1225; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1226; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1227; CHECK-SSE41-NEXT:    retq
1228;
1229; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN:
1230; CHECK-AVX1:       # %bb.0:
1231; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1232; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
1233; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm2
1234; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1235; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1236; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
1237; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
1238; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1239; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1240; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1241; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1242; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1243; CHECK-AVX1-NEXT:    retq
1244;
1245; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN:
1246; CHECK-AVX2:       # %bb.0:
1247; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1248; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
1249; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
1250; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm2
1251; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1252; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1253; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1254; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1255; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1256; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1257; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1258; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1259; CHECK-AVX2-NEXT:    retq
1260;
1261; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN:
1262; CHECK-AVX512VL:       # %bb.0:
1263; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1264; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1265; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1266; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1267; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1268; CHECK-AVX512VL-NEXT:    retq
1269  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
1270  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1271  %ret = zext <4 x i1> %cmp to <4 x i32>
1272  ret <4 x i32> %ret
1273}
1274
1275; One INT_MIN divisor in even divisor
1276define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
1277; CHECK-SSE2-LABEL: test_urem_even_INT_MIN:
1278; CHECK-SSE2:       # %bb.0:
1279; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1280; CHECK-SSE2-NEXT:    psrld $1, %xmm1
1281; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1282; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
1283; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1284; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1285; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1286; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1287; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1288; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1289; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
1290; CHECK-SSE2-NEXT:    psrld $2, %xmm1
1291; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
1292; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1293; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1294; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1295; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1296; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1297; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1298; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1299; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1300; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1301; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1302; CHECK-SSE2-NEXT:    retq
1303;
1304; CHECK-SSE41-LABEL: test_urem_even_INT_MIN:
1305; CHECK-SSE41:       # %bb.0:
1306; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1307; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1308; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1309; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1310; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1311; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1312; CHECK-SSE41-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1313; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1314; CHECK-SSE41-NEXT:    psrld $2, %xmm2
1315; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
1316; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1317; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1318; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1319; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1320; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1321; CHECK-SSE41-NEXT:    retq
1322;
1323; CHECK-AVX1-LABEL: test_urem_even_INT_MIN:
1324; CHECK-AVX1:       # %bb.0:
1325; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1326; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1327; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm2, %xmm2
1328; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1329; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1330; CHECK-AVX1-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
1331; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1332; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
1333; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
1334; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1335; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1336; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1337; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1338; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1339; CHECK-AVX1-NEXT:    retq
1340;
1341; CHECK-AVX2-LABEL: test_urem_even_INT_MIN:
1342; CHECK-AVX2:       # %bb.0:
1343; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
1344; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1345; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
1346; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
1347; CHECK-AVX2-NEXT:    vpmuludq {{.*}}(%rip), %xmm1, %xmm1
1348; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1349; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1350; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1351; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1352; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1353; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1354; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1355; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1356; CHECK-AVX2-NEXT:    retq
1357;
1358; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN:
1359; CHECK-AVX512VL:       # %bb.0:
1360; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1361; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1362; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1363; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1364; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1365; CHECK-AVX512VL-NEXT:    retq
1366  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
1367  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1368  %ret = zext <4 x i1> %cmp to <4 x i32>
1369  ret <4 x i32> %ret
1370}
1371
1372; One INT_MIN divisor in odd+even divisor
1373define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
1374; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN:
1375; CHECK-SSE2:       # %bb.0:
1376; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2,1374389535]
1377; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1378; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1379; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1380; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
1381; CHECK-SSE2-NEXT:    psrld $1, %xmm3
1382; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
1383; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1384; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1385; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1386; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1387; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1388; CHECK-SSE2-NEXT:    psrld $2, %xmm3
1389; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
1390; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [5,14,2147483648,100]
1391; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
1392; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1393; CHECK-SSE2-NEXT:    psrld $5, %xmm1
1394; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
1395; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1396; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1397; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1398; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1399; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1400; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1401; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1402; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1403; CHECK-SSE2-NEXT:    retq
1404;
1405; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN:
1406; CHECK-SSE41:       # %bb.0:
1407; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1408; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1409; CHECK-SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
1410; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2,1374389535]
1411; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1412; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
1413; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
1414; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1415; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
1416; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1417; CHECK-SSE41-NEXT:    psrld $2, %xmm2
1418; CHECK-SSE41-NEXT:    psrld $5, %xmm3
1419; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7]
1420; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1421; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1422; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1423; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1424; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1425; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1426; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1427; CHECK-SSE41-NEXT:    retq
1428;
1429; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN:
1430; CHECK-AVX1:       # %bb.0:
1431; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1432; CHECK-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
1433; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2,1374389535]
1434; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1435; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
1436; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
1437; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1438; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1439; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
1440; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
1441; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1442; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1443; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1444; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1445; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1446; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1447; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1448; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1449; CHECK-AVX1-NEXT:    retq
1450;
1451; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN:
1452; CHECK-AVX2:       # %bb.0:
1453; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2,1374389535]
1454; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1455; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
1456; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1457; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
1458; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
1459; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1460; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1461; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1462; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1463; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1464; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1465; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1466; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1467; CHECK-AVX2-NEXT:    retq
1468;
1469; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN:
1470; CHECK-AVX512VL:       # %bb.0:
1471; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1472; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1473; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1474; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1475; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1476; CHECK-AVX512VL-NEXT:    retq
1477  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
1478  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1479  %ret = zext <4 x i1> %cmp to <4 x i32>
1480  ret <4 x i32> %ret
1481}
1482
1483;==============================================================================;
1484
1485; One all-ones divisor and power-of-two divisor divisor in odd divisor
1486define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1487; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo:
1488; CHECK-SSE2:       # %bb.0:
1489; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837]
1490; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1491; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1492; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1493; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1494; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1495; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
1496; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
1497; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1498; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
1499; CHECK-SSE2-NEXT:    psrld $2, %xmm1
1500; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
1501; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [5,4294967295,16,5]
1502; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
1503; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1504; CHECK-SSE2-NEXT:    psrld $31, %xmm3
1505; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
1506; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1507; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1508; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1509; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1510; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1511; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1512; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1513; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1514; CHECK-SSE2-NEXT:    retq
1515;
1516; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo:
1517; CHECK-SSE41:       # %bb.0:
1518; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837]
1519; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1520; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1521; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
1522; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
1523; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1524; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
1525; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1526; CHECK-SSE41-NEXT:    psrld $2, %xmm2
1527; CHECK-SSE41-NEXT:    psrld $31, %xmm3
1528; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1529; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1530; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1531; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1532; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1533; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1534; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1535; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1536; CHECK-SSE41-NEXT:    retq
1537;
1538; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo:
1539; CHECK-AVX1:       # %bb.0:
1540; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837]
1541; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1542; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1543; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
1544; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
1545; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1546; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1547; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
1548; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
1549; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
1550; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1551; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1552; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1553; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1554; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1555; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1556; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1557; CHECK-AVX1-NEXT:    retq
1558;
1559; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo:
1560; CHECK-AVX2:       # %bb.0:
1561; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837]
1562; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1563; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1564; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
1565; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
1566; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1567; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1568; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1569; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1570; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1571; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1572; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1573; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1574; CHECK-AVX2-NEXT:    retq
1575;
1576; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo:
1577; CHECK-AVX512VL:       # %bb.0:
1578; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1579; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1580; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1581; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1582; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1583; CHECK-AVX512VL-NEXT:    retq
1584  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
1585  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1586  %ret = zext <4 x i1> %cmp to <4 x i32>
1587  ret <4 x i32> %ret
1588}
1589
1590; One all-ones divisor and power-of-two divisor divisor in even divisor
1591define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1592; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo:
1593; CHECK-SSE2:       # %bb.0:
1594; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1595; CHECK-SSE2-NEXT:    psrld $1, %xmm1
1596; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1597; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3]
1598; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,2454267027]
1599; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1600; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm4
1601; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,3,2,3]
1602; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
1603; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1604; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1605; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1606; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm2
1607; CHECK-SSE2-NEXT:    psrld $2, %xmm2
1608; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
1609; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [14,4294967295,16,14]
1610; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1611; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1612; CHECK-SSE2-NEXT:    psrld $31, %xmm4
1613; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1],xmm2[3,3]
1614; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1615; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
1616; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1617; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1618; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
1619; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1620; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1621; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1622; CHECK-SSE2-NEXT:    retq
1623;
1624; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo:
1625; CHECK-SSE41:       # %bb.0:
1626; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1627; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1628; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
1629; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1630; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,2454267027]
1631; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1632; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm4
1633; CHECK-SSE41-NEXT:    pmuludq %xmm3, %xmm1
1634; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1635; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
1636; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
1637; CHECK-SSE41-NEXT:    psrld $2, %xmm2
1638; CHECK-SSE41-NEXT:    psrld $31, %xmm4
1639; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm2[4,5,6,7]
1640; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1641; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
1642; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1643; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1644; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1645; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1646; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1647; CHECK-SSE41-NEXT:    retq
1648;
1649; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo:
1650; CHECK-AVX1:       # %bb.0:
1651; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1652; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
1653; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1654; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,2454267027]
1655; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1656; CHECK-AVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm2
1657; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
1658; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1659; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1660; CHECK-AVX1-NEXT:    vpsrld $2, %xmm3, %xmm3
1661; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
1662; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
1663; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1664; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1665; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1666; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1667; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1668; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1669; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1670; CHECK-AVX1-NEXT:    retq
1671;
1672; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo:
1673; CHECK-AVX2:       # %bb.0:
1674; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,268435456,2454267027]
1675; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1676; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
1677; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1678; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
1679; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
1680; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1681; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1682; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1683; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1684; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1685; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1686; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1687; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1688; CHECK-AVX2-NEXT:    retq
1689;
1690; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo:
1691; CHECK-AVX512VL:       # %bb.0:
1692; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1693; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1694; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1695; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1696; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1697; CHECK-AVX512VL-NEXT:    retq
1698  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
1699  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1700  %ret = zext <4 x i1> %cmp to <4 x i32>
1701  ret <4 x i32> %ret
1702}
1703
1704; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
1705define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1706; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1707; CHECK-SSE2:       # %bb.0:
1708; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535]
1709; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1710; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1711; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1712; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1713; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1714; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
1715; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
1716; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1717; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
1718; CHECK-SSE2-NEXT:    psrld $2, %xmm1
1719; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
1720; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [5,4294967295,16,100]
1721; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1722; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1723; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm4
1724; CHECK-SSE2-NEXT:    psrld $5, %xmm4
1725; CHECK-SSE2-NEXT:    psrld $31, %xmm3
1726; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm4[3,3]
1727; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1728; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
1729; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1730; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1731; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
1732; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1733; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1734; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1735; CHECK-SSE2-NEXT:    retq
1736;
1737; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1738; CHECK-SSE41:       # %bb.0:
1739; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535]
1740; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1741; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1742; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
1743; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm2
1744; CHECK-SSE41-NEXT:    psrld $5, %xmm2
1745; CHECK-SSE41-NEXT:    psrld $31, %xmm3
1746; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1747; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
1748; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
1749; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1750; CHECK-SSE41-NEXT:    psrld $2, %xmm1
1751; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
1752; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1753; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
1754; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
1755; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1756; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1757; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1758; CHECK-SSE41-NEXT:    retq
1759;
1760; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1761; CHECK-AVX1:       # %bb.0:
1762; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535]
1763; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1764; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1765; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
1766; CHECK-AVX1-NEXT:    vpsrld $5, %xmm2, %xmm3
1767; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
1768; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
1769; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
1770; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
1771; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1772; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
1773; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
1774; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1775; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1776; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1777; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1778; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1779; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1780; CHECK-AVX1-NEXT:    retq
1781;
1782; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1783; CHECK-AVX2:       # %bb.0:
1784; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535]
1785; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1786; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1787; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
1788; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
1789; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1790; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1791; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1792; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1793; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1794; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1795; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1796; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1797; CHECK-AVX2-NEXT:    retq
1798;
1799; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1800; CHECK-AVX512VL:       # %bb.0:
1801; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1802; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1803; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1804; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1805; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1806; CHECK-AVX512VL-NEXT:    retq
1807  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
1808  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1809  %ret = zext <4 x i1> %cmp to <4 x i32>
1810  ret <4 x i32> %ret
1811}
1812
1813;------------------------------------------------------------------------------;
1814
1815; One all-ones divisor and one one divisor in odd divisor
1816define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
1817; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one:
1818; CHECK-SSE2:       # %bb.0:
1819; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,4294967295,0,3435973837]
1820; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1821; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
1822; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1823; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1824; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1825; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1826; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1827; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
1828; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
1829; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
1830; CHECK-SSE2-NEXT:    retq
1831;
1832; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one:
1833; CHECK-SSE41:       # %bb.0:
1834; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
1835; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459]
1836; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1837; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1838; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1839; CHECK-SSE41-NEXT:    retq
1840;
1841; CHECK-AVX-LABEL: test_urem_odd_allones_and_one:
1842; CHECK-AVX:       # %bb.0:
1843; CHECK-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1844; CHECK-AVX-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1845; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1846; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
1847; CHECK-AVX-NEXT:    retq
1848  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
1849  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1850  %ret = zext <4 x i1> %cmp to <4 x i32>
1851  ret <4 x i32> %ret
1852}
1853
1854; One all-ones divisor and one one divisor in even divisor
1855define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
1856; CHECK-SSE2-LABEL: test_urem_even_allones_and_one:
1857; CHECK-SSE2:       # %bb.0:
1858; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1859; CHECK-SSE2-NEXT:    psrld $1, %xmm1
1860; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1861; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3]
1862; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2147483649,0,2454267027]
1863; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1864; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm4
1865; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,3,2,3]
1866; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
1867; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1868; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1869; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1870; CHECK-SSE2-NEXT:    psrld $2, %xmm1
1871; CHECK-SSE2-NEXT:    psrld $31, %xmm4
1872; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,0],xmm1[3,3]
1873; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [14,4294967295,1,14]
1874; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1875; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
1876; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1877; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
1878; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1879; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1880; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1881; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
1882; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1883; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1884; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1885; CHECK-SSE2-NEXT:    retq
1886;
1887; CHECK-SSE41-LABEL: test_urem_even_allones_and_one:
1888; CHECK-SSE41:       # %bb.0:
1889; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1890; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1891; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
1892; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1893; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2147483649,0,2454267027]
1894; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1895; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm4
1896; CHECK-SSE41-NEXT:    pmuludq %xmm3, %xmm1
1897; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1898; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
1899; CHECK-SSE41-NEXT:    psrld $2, %xmm1
1900; CHECK-SSE41-NEXT:    psrld $31, %xmm4
1901; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm1[0,1],xmm4[2,3],xmm1[4,5,6,7]
1902; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm0[4,5],xmm4[6,7]
1903; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm4
1904; CHECK-SSE41-NEXT:    psubd %xmm4, %xmm0
1905; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1906; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1907; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1908; CHECK-SSE41-NEXT:    retq
1909;
1910; CHECK-AVX1-LABEL: test_urem_even_allones_and_one:
1911; CHECK-AVX1:       # %bb.0:
1912; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1913; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
1914; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1915; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2454267027,2147483649,0,2454267027]
1916; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1917; CHECK-AVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm2
1918; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
1919; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1920; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1921; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
1922; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
1923; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
1924; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
1925; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1926; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1927; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1928; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1929; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1930; CHECK-AVX1-NEXT:    retq
1931;
1932; CHECK-AVX2-LABEL: test_urem_even_allones_and_one:
1933; CHECK-AVX2:       # %bb.0:
1934; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
1935; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1936; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
1937; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1938; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
1939; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
1940; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1941; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1942; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
1943; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
1944; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1945; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1946; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1947; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1948; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1949; CHECK-AVX2-NEXT:    retq
1950;
1951; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one:
1952; CHECK-AVX512VL:       # %bb.0:
1953; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1954; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1955; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1956; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1957; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1958; CHECK-AVX512VL-NEXT:    retq
1959  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
1960  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1961  %ret = zext <4 x i1> %cmp to <4 x i32>
1962  ret <4 x i32> %ret
1963}
1964
1965; One all-ones divisor and one one divisor in odd+even divisor
1966define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
1967; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_one:
1968; CHECK-SSE2:       # %bb.0:
1969; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535]
1970; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1971; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1972; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm3
1973; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm2
1974; CHECK-SSE2-NEXT:    psrld $5, %xmm2
1975; CHECK-SSE2-NEXT:    psrld $31, %xmm3
1976; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
1977; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [5,4294967295,1,100]
1978; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1979; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm4
1980; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
1981; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1982; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1983; CHECK-SSE2-NEXT:    psrld $2, %xmm1
1984; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
1985; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1986; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1987; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1988; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
1989; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1990; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1991; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1992; CHECK-SSE2-NEXT:    retq
1993;
1994; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_one:
1995; CHECK-SSE41:       # %bb.0:
1996; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535]
1997; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1998; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1999; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
2000; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm2
2001; CHECK-SSE41-NEXT:    psrld $5, %xmm2
2002; CHECK-SSE41-NEXT:    psrld $31, %xmm3
2003; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2004; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
2005; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2006; CHECK-SSE41-NEXT:    psrld $2, %xmm1
2007; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2008; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2009; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
2010; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
2011; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2012; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2013; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2014; CHECK-SSE41-NEXT:    retq
2015;
2016; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_one:
2017; CHECK-AVX1:       # %bb.0:
2018; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535]
2019; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2020; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2021; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2022; CHECK-AVX1-NEXT:    vpsrld $5, %xmm2, %xmm3
2023; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
2024; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
2025; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2026; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2027; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
2028; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2029; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2030; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2031; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2032; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2033; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2034; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2035; CHECK-AVX1-NEXT:    retq
2036;
2037; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_one:
2038; CHECK-AVX2:       # %bb.0:
2039; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535]
2040; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2041; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2042; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2043; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2044; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2045; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2046; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
2047; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
2048; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2049; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2050; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2051; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2052; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2053; CHECK-AVX2-NEXT:    retq
2054;
2055; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one:
2056; CHECK-AVX512VL:       # %bb.0:
2057; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2058; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2059; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2060; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2061; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2062; CHECK-AVX512VL-NEXT:    retq
2063  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
2064  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
2065  %ret = zext <4 x i1> %cmp to <4 x i32>
2066  ret <4 x i32> %ret
2067}
2068
2069;------------------------------------------------------------------------------;
2070
2071; One power-of-two divisor divisor and one divisor in odd divisor
2072define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
2073; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo_and_one:
2074; CHECK-SSE2:       # %bb.0:
2075; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837]
2076; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
2077; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2078; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
2079; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2080; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2081; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
2082; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
2083; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2084; CHECK-SSE2-NEXT:    psrld $2, %xmm2
2085; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
2086; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [5,16,1,5]
2087; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
2088; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm4
2089; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
2090; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3]
2091; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2092; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
2093; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
2094; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
2095; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2096; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2097; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2098; CHECK-SSE2-NEXT:    retq
2099;
2100; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo_and_one:
2101; CHECK-SSE41:       # %bb.0:
2102; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837]
2103; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2104; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2105; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
2106; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
2107; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2108; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2109; CHECK-SSE41-NEXT:    psrld $2, %xmm1
2110; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
2111; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2112; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
2113; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
2114; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2115; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2116; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2117; CHECK-SSE41-NEXT:    retq
2118;
2119; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo_and_one:
2120; CHECK-AVX1:       # %bb.0:
2121; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837]
2122; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2123; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2124; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2125; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2126; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2127; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2128; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
2129; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
2130; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2131; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2132; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2133; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2134; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2135; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2136; CHECK-AVX1-NEXT:    retq
2137;
2138; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo_and_one:
2139; CHECK-AVX2:       # %bb.0:
2140; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837]
2141; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2142; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2143; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2144; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2145; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2146; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2147; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
2148; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
2149; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2150; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2151; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2152; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2153; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2154; CHECK-AVX2-NEXT:    retq
2155;
2156; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one:
2157; CHECK-AVX512VL:       # %bb.0:
2158; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2159; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2160; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2161; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2162; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2163; CHECK-AVX512VL-NEXT:    retq
2164  %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
2165  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
2166  %ret = zext <4 x i1> %cmp to <4 x i32>
2167  ret <4 x i32> %ret
2168}
2169
2170; One power-of-two divisor divisor and one divisor in even divisor
2171define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
2172; CHECK-SSE2-LABEL: test_urem_even_poweroftwo_and_one:
2173; CHECK-SSE2:       # %bb.0:
2174; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2175; CHECK-SSE2-NEXT:    psrld $1, %xmm1
2176; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
2177; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3]
2178; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,268435456,0,2454267027]
2179; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2180; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm4
2181; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,3,2,3]
2182; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
2183; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
2184; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2185; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2186; CHECK-SSE2-NEXT:    psrld $2, %xmm1
2187; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[3,3]
2188; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [14,16,1,14]
2189; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
2190; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
2191; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2192; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
2193; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
2194; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2195; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
2196; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
2197; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2198; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2199; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2200; CHECK-SSE2-NEXT:    retq
2201;
2202; CHECK-SSE41-LABEL: test_urem_even_poweroftwo_and_one:
2203; CHECK-SSE41:       # %bb.0:
2204; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
2205; CHECK-SSE41-NEXT:    psrld $1, %xmm1
2206; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
2207; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2208; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,268435456,0,2454267027]
2209; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2210; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm4
2211; CHECK-SSE41-NEXT:    pmuludq %xmm3, %xmm1
2212; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2213; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
2214; CHECK-SSE41-NEXT:    psrld $2, %xmm1
2215; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5,6,7]
2216; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2217; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
2218; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
2219; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2220; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2221; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2222; CHECK-SSE41-NEXT:    retq
2223;
2224; CHECK-AVX1-LABEL: test_urem_even_poweroftwo_and_one:
2225; CHECK-AVX1:       # %bb.0:
2226; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
2227; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
2228; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2229; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2454267027,268435456,0,2454267027]
2230; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2231; CHECK-AVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm2
2232; CHECK-AVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
2233; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2234; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2235; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
2236; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
2237; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2238; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2239; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2240; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2241; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2242; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2243; CHECK-AVX1-NEXT:    retq
2244;
2245; CHECK-AVX2-LABEL: test_urem_even_poweroftwo_and_one:
2246; CHECK-AVX2:       # %bb.0:
2247; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,268435456,0,2454267027]
2248; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2249; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
2250; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2251; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
2252; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
2253; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2254; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2255; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
2256; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
2257; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2258; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2259; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2260; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2261; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2262; CHECK-AVX2-NEXT:    retq
2263;
2264; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one:
2265; CHECK-AVX512VL:       # %bb.0:
2266; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2267; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2268; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2269; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2270; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2271; CHECK-AVX512VL-NEXT:    retq
2272  %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
2273  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
2274  %ret = zext <4 x i1> %cmp to <4 x i32>
2275  ret <4 x i32> %ret
2276}
2277
2278; One power-of-two divisor divisor and one divisor in odd+even divisor
2279define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
2280; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo_and_one:
2281; CHECK-SSE2:       # %bb.0:
2282; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535]
2283; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2284; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2285; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm3
2286; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm2
2287; CHECK-SSE2-NEXT:    psrld $5, %xmm2
2288; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
2289; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [5,16,1,100]
2290; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2291; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm4
2292; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
2293; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
2294; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2295; CHECK-SSE2-NEXT:    psrld $2, %xmm1
2296; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
2297; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
2298; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2299; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
2300; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
2301; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2302; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2303; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2304; CHECK-SSE2-NEXT:    retq
2305;
2306; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo_and_one:
2307; CHECK-SSE41:       # %bb.0:
2308; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535]
2309; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2310; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2311; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
2312; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm2
2313; CHECK-SSE41-NEXT:    psrld $5, %xmm2
2314; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2315; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
2316; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2317; CHECK-SSE41-NEXT:    psrld $2, %xmm1
2318; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2319; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2320; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
2321; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
2322; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2323; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2324; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2325; CHECK-SSE41-NEXT:    retq
2326;
2327; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo_and_one:
2328; CHECK-AVX1:       # %bb.0:
2329; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535]
2330; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2331; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2332; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2333; CHECK-AVX1-NEXT:    vpsrld $5, %xmm2, %xmm3
2334; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
2335; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2336; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2337; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
2338; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2339; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2340; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2341; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2342; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2343; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2344; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2345; CHECK-AVX1-NEXT:    retq
2346;
2347; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo_and_one:
2348; CHECK-AVX2:       # %bb.0:
2349; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535]
2350; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2351; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2352; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2353; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2354; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2355; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2356; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
2357; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
2358; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2359; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2360; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2361; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2362; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2363; CHECK-AVX2-NEXT:    retq
2364;
2365; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one:
2366; CHECK-AVX512VL:       # %bb.0:
2367; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2368; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2369; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2370; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2371; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2372; CHECK-AVX512VL-NEXT:    retq
2373  %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
2374  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
2375  %ret = zext <4 x i1> %cmp to <4 x i32>
2376  ret <4 x i32> %ret
2377}
2378
2379;------------------------------------------------------------------------------;
2380
2381define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
2382; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
2383; CHECK-SSE2:       # %bb.0:
2384; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0]
2385; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
2386; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2387; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
2388; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2389; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2390; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
2391; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
2392; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2393; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
2394; CHECK-SSE2-NEXT:    psrld $2, %xmm1
2395; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
2396; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [5,4294967295,16,1]
2397; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
2398; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2399; CHECK-SSE2-NEXT:    psrld $31, %xmm3
2400; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
2401; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2402; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2403; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2404; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2405; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
2406; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2407; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2408; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2409; CHECK-SSE2-NEXT:    retq
2410;
2411; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
2412; CHECK-SSE41:       # %bb.0:
2413; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0]
2414; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm2
2415; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm2
2416; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3]
2417; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2418; CHECK-SSE41-NEXT:    psrld $2, %xmm2
2419; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
2420; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2421; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2422; CHECK-SSE41-NEXT:    pmuludq %xmm1, %xmm3
2423; CHECK-SSE41-NEXT:    psrld $31, %xmm3
2424; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
2425; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5],xmm0[6,7]
2426; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
2427; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
2428; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2429; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2430; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2431; CHECK-SSE41-NEXT:    retq
2432;
2433; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
2434; CHECK-AVX1:       # %bb.0:
2435; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0]
2436; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
2437; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[3,3,3,3]
2438; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2439; CHECK-AVX1-NEXT:    vpsrld $2, %xmm2, %xmm2
2440; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
2441; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2442; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2443; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
2444; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2445; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
2446; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2447; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2448; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2449; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2450; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2451; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2452; CHECK-AVX1-NEXT:    retq
2453;
2454; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
2455; CHECK-AVX2:       # %bb.0:
2456; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0]
2457; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2458; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2459; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2460; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
2461; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2462; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2463; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
2464; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3]
2465; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2466; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2467; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2468; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2469; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2470; CHECK-AVX2-NEXT:    retq
2471;
2472; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
2473; CHECK-AVX512VL:       # %bb.0:
2474; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2475; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2476; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2477; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2478; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2479; CHECK-AVX512VL-NEXT:    retq
2480  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
2481  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
2482  %ret = zext <4 x i1> %cmp to <4 x i32>
2483  ret <4 x i32> %ret
2484}
2485
2486define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
2487; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
2488; CHECK-SSE2:       # %bb.0:
2489; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2147483649,268435456,0]
2490; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2491; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2492; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm3
2493; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
2494; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm4
2495; CHECK-SSE2-NEXT:    psrld $1, %xmm4
2496; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,1],xmm0[2,3]
2497; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm4
2498; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3]
2499; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2500; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm2
2501; CHECK-SSE2-NEXT:    psrld $2, %xmm2
2502; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
2503; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [14,4294967295,16,1]
2504; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2505; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2506; CHECK-SSE2-NEXT:    psrld $31, %xmm3
2507; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
2508; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2509; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
2510; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2511; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2512; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
2513; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2514; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2515; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2516; CHECK-SSE2-NEXT:    retq
2517;
2518; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
2519; CHECK-SSE41:       # %bb.0:
2520; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
2521; CHECK-SSE41-NEXT:    psrld $1, %xmm1
2522; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7]
2523; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2147483649,268435456,0]
2524; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
2525; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
2526; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2527; CHECK-SSE41-NEXT:    psrld $2, %xmm1
2528; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
2529; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2530; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2531; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
2532; CHECK-SSE41-NEXT:    psrld $31, %xmm3
2533; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2534; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5],xmm0[6,7]
2535; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
2536; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
2537; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2538; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2539; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2540; CHECK-SSE41-NEXT:    retq
2541;
2542; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
2543; CHECK-AVX1:       # %bb.0:
2544; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
2545; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7]
2546; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2454267027,2147483649,268435456,0]
2547; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
2548; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
2549; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2550; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
2551; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
2552; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2553; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2554; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
2555; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
2556; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2557; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2558; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2559; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2560; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2561; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2562; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2563; CHECK-AVX1-NEXT:    retq
2564;
2565; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
2566; CHECK-AVX2:       # %bb.0:
2567; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,268435456,0]
2568; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2569; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
2570; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2571; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
2572; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
2573; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2574; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2575; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
2576; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3]
2577; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2578; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2579; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2580; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2581; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2582; CHECK-AVX2-NEXT:    retq
2583;
2584; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
2585; CHECK-AVX512VL:       # %bb.0:
2586; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2587; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2588; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2589; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2590; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2591; CHECK-AVX512VL-NEXT:    retq
2592  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
2593  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
2594  %ret = zext <4 x i1> %cmp to <4 x i32>
2595  ret <4 x i32> %ret
2596}
2597