• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefix=CHECK-AVX1
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefix=CHECK-AVX2
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefix=CHECK-AVX512VL
7
8; Odd+Even divisors
9define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
10; CHECK-SSE2-LABEL: test_srem_odd_even:
11; CHECK-SSE2:       # %bb.0:
12; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1717986919,2454267027,1374389535,1374389535]
13; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
14; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
15; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
16; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
18; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm4
19; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3]
20; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
21; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
22; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
23; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
24; CHECK-SSE2-NEXT:    pand %xmm3, %xmm4
25; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [0,4294967295,0,0]
26; CHECK-SSE2-NEXT:    pand %xmm0, %xmm3
27; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
28; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm2
29; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
30; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
31; CHECK-SSE2-NEXT:    psrad $5, %xmm3
32; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
33; CHECK-SSE2-NEXT:    psrad $3, %xmm4
34; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
35; CHECK-SSE2-NEXT:    psrad $1, %xmm5
36; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
37; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
38; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
39; CHECK-SSE2-NEXT:    psrld $31, %xmm2
40; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
41; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,14,25,100]
42; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
43; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
44; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
45; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
46; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
47; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
48; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
49; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
50; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
51; CHECK-SSE2-NEXT:    psrld $31, %xmm0
52; CHECK-SSE2-NEXT:    retq
53;
54; CHECK-SSE41-LABEL: test_srem_odd_even:
55; CHECK-SSE41:       # %bb.0:
56; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535]
57; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
58; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
59; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
60; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
61; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
62; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
63; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
64; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
65; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm0[2,3],xmm3[4,5,6,7]
66; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
67; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
68; CHECK-SSE41-NEXT:    psrad $5, %xmm1
69; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm4
70; CHECK-SSE41-NEXT:    psrad $3, %xmm4
71; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5,6,7]
72; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm5
73; CHECK-SSE41-NEXT:    psrad $1, %xmm5
74; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm4[4,5,6,7]
75; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm1[2,3],xmm5[4,5],xmm1[6,7]
76; CHECK-SSE41-NEXT:    psrld $31, %xmm3
77; CHECK-SSE41-NEXT:    paddd %xmm5, %xmm3
78; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
79; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
80; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm0
81; CHECK-SSE41-NEXT:    psrld $31, %xmm0
82; CHECK-SSE41-NEXT:    retq
83;
84; CHECK-AVX1-LABEL: test_srem_odd_even:
85; CHECK-AVX1:       # %bb.0:
86; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535]
87; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
88; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
89; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
90; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
91; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
92; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
93; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
94; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7]
95; CHECK-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
96; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm3
97; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm4
98; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
99; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm5
100; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
101; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
102; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
103; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
104; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
105; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
106; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
107; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
108; CHECK-AVX1-NEXT:    retq
109;
110; CHECK-AVX2-LABEL: test_srem_odd_even:
111; CHECK-AVX2:       # %bb.0:
112; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535]
113; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
114; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
115; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
116; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
117; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
118; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
119; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
120; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1],xmm2[2,3]
121; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
122; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm3
123; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
124; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
125; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
126; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
127; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
128; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
129; CHECK-AVX2-NEXT:    retq
130;
131; CHECK-AVX512VL-LABEL: test_srem_odd_even:
132; CHECK-AVX512VL:       # %bb.0:
133; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
134; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
135; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
136; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
137; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
138; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
139; CHECK-AVX512VL-NEXT:    retq
140  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
141  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
142  %ret = zext <4 x i1> %cmp to <4 x i32>
143  ret <4 x i32> %ret
144}
145
146;==============================================================================;
147
148; One all-ones divisor in odd divisor
149define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind {
150; CHECK-SSE2-LABEL: test_srem_odd_allones_eq:
151; CHECK-SSE2:       # %bb.0:
152; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
153; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
154; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
155; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
156; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
157; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
158; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
159; CHECK-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm0
160; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
161; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
162; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
163; CHECK-SSE2-NEXT:    retq
164;
165; CHECK-SSE41-LABEL: test_srem_odd_allones_eq:
166; CHECK-SSE41:       # %bb.0:
167; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
168; CHECK-SSE41-NEXT:    paddd {{.*}}(%rip), %xmm0
169; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
170; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
171; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
172; CHECK-SSE41-NEXT:    psrld $31, %xmm0
173; CHECK-SSE41-NEXT:    retq
174;
175; CHECK-AVX1-LABEL: test_srem_odd_allones_eq:
176; CHECK-AVX1:       # %bb.0:
177; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
178; CHECK-AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
179; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
180; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
181; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
182; CHECK-AVX1-NEXT:    retq
183;
184; CHECK-AVX2-LABEL: test_srem_odd_allones_eq:
185; CHECK-AVX2:       # %bb.0:
186; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
187; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
188; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
189; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
190; CHECK-AVX2-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
191; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
192; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
193; CHECK-AVX2-NEXT:    retq
194;
195; CHECK-AVX512VL-LABEL: test_srem_odd_allones_eq:
196; CHECK-AVX512VL:       # %bb.0:
197; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
198; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
199; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
200; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
201; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
202; CHECK-AVX512VL-NEXT:    retq
203  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
204  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
205  %ret = zext <4 x i1> %cmp to <4 x i32>
206  ret <4 x i32> %ret
207}
208define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind {
209; CHECK-SSE2-LABEL: test_srem_odd_allones_ne:
210; CHECK-SSE2:       # %bb.0:
211; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
212; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
213; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
214; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
215; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
216; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
217; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
218; CHECK-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm0
219; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
220; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
221; CHECK-SSE2-NEXT:    psrld $31, %xmm0
222; CHECK-SSE2-NEXT:    retq
223;
224; CHECK-SSE41-LABEL: test_srem_odd_allones_ne:
225; CHECK-SSE41:       # %bb.0:
226; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
227; CHECK-SSE41-NEXT:    paddd {{.*}}(%rip), %xmm0
228; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
229; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
230; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
231; CHECK-SSE41-NEXT:    pandn {{.*}}(%rip), %xmm0
232; CHECK-SSE41-NEXT:    retq
233;
234; CHECK-AVX1-LABEL: test_srem_odd_allones_ne:
235; CHECK-AVX1:       # %bb.0:
236; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
237; CHECK-AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
238; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
239; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
240; CHECK-AVX1-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
241; CHECK-AVX1-NEXT:    retq
242;
243; CHECK-AVX2-LABEL: test_srem_odd_allones_ne:
244; CHECK-AVX2:       # %bb.0:
245; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
246; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
247; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
248; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
249; CHECK-AVX2-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
250; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
251; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
252; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
253; CHECK-AVX2-NEXT:    retq
254;
255; CHECK-AVX512VL-LABEL: test_srem_odd_allones_ne:
256; CHECK-AVX512VL:       # %bb.0:
257; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
258; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
259; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
260; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
261; CHECK-AVX512VL-NEXT:    vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0
262; CHECK-AVX512VL-NEXT:    retq
263  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
264  %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
265  %ret = zext <4 x i1> %cmp to <4 x i32>
266  ret <4 x i32> %ret
267}
268
269; One all-ones divisor in even divisor
270define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
271; CHECK-SSE2-LABEL: test_srem_even_allones_eq:
272; CHECK-SSE2:       # %bb.0:
273; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
274; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
275; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
276; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
277; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
278; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
279; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027]
280; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
281; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
282; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm5
283; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,3,2,3]
284; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
285; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2454267027,2454267027,2454267027,2454267027]
286; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm6
287; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,3,2,3]
288; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
289; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm3
290; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = <1,u,4294967295,u>
291; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm4
292; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
293; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm5
294; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
295; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
296; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
297; CHECK-SSE2-NEXT:    movdqa %xmm4, %xmm3
298; CHECK-SSE2-NEXT:    psrad $3, %xmm3
299; CHECK-SSE2-NEXT:    movdqa %xmm4, %xmm5
300; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[2,0],xmm3[3,0]
301; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0,2]
302; CHECK-SSE2-NEXT:    psrld $31, %xmm4
303; CHECK-SSE2-NEXT:    pand %xmm2, %xmm4
304; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
305; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
306; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm4
307; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
308; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
309; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
310; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
311; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
312; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
313; CHECK-SSE2-NEXT:    psrld $31, %xmm0
314; CHECK-SSE2-NEXT:    retq
315;
316; CHECK-SSE41-LABEL: test_srem_even_allones_eq:
317; CHECK-SSE41:       # %bb.0:
318; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
319; CHECK-SSE41-NEXT:    pmuldq {{.*}}(%rip), %xmm1
320; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,0,0,0]
321; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
322; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
323; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
324; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
325; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm1
326; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm1
327; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
328; CHECK-SSE41-NEXT:    psrad $3, %xmm2
329; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
330; CHECK-SSE41-NEXT:    psrld $31, %xmm1
331; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
332; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
333; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm1
334; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
335; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
336; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
337; CHECK-SSE41-NEXT:    psrld $31, %xmm0
338; CHECK-SSE41-NEXT:    retq
339;
340; CHECK-AVX1-LABEL: test_srem_even_allones_eq:
341; CHECK-AVX1:       # %bb.0:
342; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
343; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm1, %xmm1
344; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
345; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
346; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
347; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
348; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
349; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
350; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
351; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
352; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
353; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
354; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
355; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
356; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
357; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
358; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
359; CHECK-AVX1-NEXT:    retq
360;
361; CHECK-AVX2-LABEL: test_srem_even_allones_eq:
362; CHECK-AVX2:       # %bb.0:
363; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
364; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
365; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm1, %xmm1
366; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
367; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
368; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
369; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
370; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
371; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
372; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
373; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
374; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
375; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
376; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
377; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
378; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
379; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
380; CHECK-AVX2-NEXT:    retq
381;
382; CHECK-AVX512VL-LABEL: test_srem_even_allones_eq:
383; CHECK-AVX512VL:       # %bb.0:
384; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
385; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
386; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
387; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
388; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
389; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
390; CHECK-AVX512VL-NEXT:    retq
391  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
392  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
393  %ret = zext <4 x i1> %cmp to <4 x i32>
394  ret <4 x i32> %ret
395}
396define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
397; CHECK-SSE2-LABEL: test_srem_even_allones_ne:
398; CHECK-SSE2:       # %bb.0:
399; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
400; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
401; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
402; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
403; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
404; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
405; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027]
406; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
407; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
408; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm5
409; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
410; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
411; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
412; CHECK-SSE2-NEXT:    pmuludq %xmm6, %xmm3
413; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
414; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
415; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm5
416; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
417; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm6
418; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3]
419; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm6 = <1,u,4294967295,u>
420; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm6
421; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
422; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1]
423; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm6
424; CHECK-SSE2-NEXT:    movdqa %xmm6, %xmm4
425; CHECK-SSE2-NEXT:    psrad $3, %xmm4
426; CHECK-SSE2-NEXT:    movdqa %xmm6, %xmm5
427; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0]
428; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2]
429; CHECK-SSE2-NEXT:    psrld $31, %xmm6
430; CHECK-SSE2-NEXT:    pand %xmm2, %xmm6
431; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm6
432; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
433; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm6
434; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3]
435; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
436; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
437; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
438; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm0
439; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
440; CHECK-SSE2-NEXT:    pandn %xmm3, %xmm0
441; CHECK-SSE2-NEXT:    retq
442;
443; CHECK-SSE41-LABEL: test_srem_even_allones_ne:
444; CHECK-SSE41:       # %bb.0:
445; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
446; CHECK-SSE41-NEXT:    pmuldq {{.*}}(%rip), %xmm1
447; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,0,0,0]
448; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
449; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
450; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
451; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
452; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm1
453; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm1
454; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
455; CHECK-SSE41-NEXT:    psrad $3, %xmm2
456; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
457; CHECK-SSE41-NEXT:    psrld $31, %xmm1
458; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
459; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
460; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm1
461; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
462; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
463; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
464; CHECK-SSE41-NEXT:    pandn {{.*}}(%rip), %xmm0
465; CHECK-SSE41-NEXT:    retq
466;
467; CHECK-AVX1-LABEL: test_srem_even_allones_ne:
468; CHECK-AVX1:       # %bb.0:
469; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
470; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm1, %xmm1
471; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
472; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
473; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
474; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
475; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
476; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
477; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
478; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
479; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
480; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
481; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
482; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
483; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
484; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
485; CHECK-AVX1-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
486; CHECK-AVX1-NEXT:    retq
487;
488; CHECK-AVX2-LABEL: test_srem_even_allones_ne:
489; CHECK-AVX2:       # %bb.0:
490; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
491; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
492; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm1, %xmm1
493; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
494; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
495; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
496; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
497; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
498; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
499; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
500; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
501; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
502; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
503; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
504; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
505; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
506; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
507; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
508; CHECK-AVX2-NEXT:    retq
509;
510; CHECK-AVX512VL-LABEL: test_srem_even_allones_ne:
511; CHECK-AVX512VL:       # %bb.0:
512; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
513; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
514; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
515; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
516; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
517; CHECK-AVX512VL-NEXT:    vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0
518; CHECK-AVX512VL-NEXT:    retq
519  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
520  %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
521  %ret = zext <4 x i1> %cmp to <4 x i32>
522  ret <4 x i32> %ret
523}
524
525; One all-ones divisor in odd+even divisor
526define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
527; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq:
528; CHECK-SSE2:       # %bb.0:
529; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
530; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
531; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
532; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
533; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
534; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
535; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
536; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
537; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
538; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535]
539; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
540; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
541; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
542; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
543; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
544; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
545; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
546; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
547; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
548; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
549; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
550; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
551; CHECK-SSE2-NEXT:    pand %xmm0, %xmm5
552; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
553; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm2
554; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
555; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
556; CHECK-SSE2-NEXT:    psrad $5, %xmm3
557; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
558; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
559; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
560; CHECK-SSE2-NEXT:    psrad $3, %xmm3
561; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
562; CHECK-SSE2-NEXT:    psrad $1, %xmm5
563; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
564; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
565; CHECK-SSE2-NEXT:    psrld $31, %xmm2
566; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
567; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
568; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,14,4294967295,100]
569; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
570; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
571; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
572; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
573; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
574; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
575; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
576; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
577; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
578; CHECK-SSE2-NEXT:    psrld $31, %xmm0
579; CHECK-SSE2-NEXT:    retq
580;
581; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq:
582; CHECK-SSE41:       # %bb.0:
583; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
584; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
585; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
586; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
587; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
588; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
589; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
590; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
591; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
592; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
593; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
594; CHECK-SSE41-NEXT:    psrad $5, %xmm1
595; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
596; CHECK-SSE41-NEXT:    psrad $3, %xmm3
597; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
598; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
599; CHECK-SSE41-NEXT:    psrad $1, %xmm1
600; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
601; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
602; CHECK-SSE41-NEXT:    psrld $31, %xmm2
603; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
604; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
605; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
606; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
607; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
608; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
609; CHECK-SSE41-NEXT:    psrld $31, %xmm0
610; CHECK-SSE41-NEXT:    retq
611;
612; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq:
613; CHECK-AVX1:       # %bb.0:
614; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
615; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
616; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
617; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
618; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
619; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
620; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
621; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
622; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
623; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm2
624; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
625; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
626; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
627; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
628; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
629; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
630; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
631; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
632; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
633; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
634; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
635; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
636; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
637; CHECK-AVX1-NEXT:    retq
638;
639; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq:
640; CHECK-AVX2:       # %bb.0:
641; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
642; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
643; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
644; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
645; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
646; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
647; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
648; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
649; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
650; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
651; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
652; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
653; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
654; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
655; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
656; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
657; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
658; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
659; CHECK-AVX2-NEXT:    retq
660;
661; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq:
662; CHECK-AVX512VL:       # %bb.0:
663; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
664; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
665; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
666; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
667; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
668; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
669; CHECK-AVX512VL-NEXT:    retq
670  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
671  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
672  %ret = zext <4 x i1> %cmp to <4 x i32>
673  ret <4 x i32> %ret
674}
675define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
676; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne:
677; CHECK-SSE2:       # %bb.0:
678; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
679; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
680; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
681; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
682; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
683; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
684; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
685; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
686; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
687; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535]
688; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
689; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
690; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
691; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
692; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
693; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
694; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
695; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
696; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
697; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
698; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
699; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
700; CHECK-SSE2-NEXT:    pand %xmm0, %xmm5
701; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
702; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm2
703; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
704; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
705; CHECK-SSE2-NEXT:    psrad $5, %xmm3
706; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
707; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
708; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
709; CHECK-SSE2-NEXT:    psrad $3, %xmm3
710; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
711; CHECK-SSE2-NEXT:    psrad $1, %xmm5
712; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
713; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
714; CHECK-SSE2-NEXT:    psrld $31, %xmm2
715; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
716; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
717; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,14,4294967295,100]
718; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
719; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
720; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
721; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
722; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
723; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
724; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
725; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
726; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
727; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
728; CHECK-SSE2-NEXT:    retq
729;
730; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne:
731; CHECK-SSE41:       # %bb.0:
732; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
733; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
734; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
735; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
736; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
737; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
738; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
739; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
740; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
741; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
742; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
743; CHECK-SSE41-NEXT:    psrad $5, %xmm1
744; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
745; CHECK-SSE41-NEXT:    psrad $3, %xmm3
746; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
747; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
748; CHECK-SSE41-NEXT:    psrad $1, %xmm1
749; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
750; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
751; CHECK-SSE41-NEXT:    psrld $31, %xmm2
752; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
753; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
754; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
755; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
756; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
757; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
758; CHECK-SSE41-NEXT:    pandn {{.*}}(%rip), %xmm0
759; CHECK-SSE41-NEXT:    retq
760;
761; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne:
762; CHECK-AVX1:       # %bb.0:
763; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
764; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
765; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
766; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
767; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
768; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
769; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
770; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
771; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
772; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm2
773; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
774; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
775; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
776; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
777; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
778; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
779; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
780; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
781; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
782; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
783; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
784; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
785; CHECK-AVX1-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
786; CHECK-AVX1-NEXT:    retq
787;
788; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne:
789; CHECK-AVX2:       # %bb.0:
790; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
791; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
792; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
793; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
794; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
795; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
796; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
797; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
798; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
799; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
800; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
801; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
802; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
803; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
804; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
805; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
806; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
807; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
808; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
809; CHECK-AVX2-NEXT:    retq
810;
811; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne:
812; CHECK-AVX512VL:       # %bb.0:
813; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
814; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
815; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
816; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
817; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
818; CHECK-AVX512VL-NEXT:    vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0
819; CHECK-AVX512VL-NEXT:    retq
820  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
821  %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
822  %ret = zext <4 x i1> %cmp to <4 x i32>
823  ret <4 x i32> %ret
824}
825
826;------------------------------------------------------------------------------;
827
828; One power-of-two divisor in odd divisor
829define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
830; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo:
831; CHECK-SSE2:       # %bb.0:
832; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
833; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm2
834; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
835; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1717986919,1717986919,2147483649,1717986919]
836; CHECK-SSE2-NEXT:    pand %xmm3, %xmm2
837; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [0,0,4294967295,0]
838; CHECK-SSE2-NEXT:    pand %xmm0, %xmm4
839; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm2
840; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm3
841; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
842; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
843; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm5
844; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
845; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
846; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm3
847; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm3
848; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm2
849; CHECK-SSE2-NEXT:    psrad $1, %xmm2
850; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm4
851; CHECK-SSE2-NEXT:    psrad $3, %xmm4
852; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[2,0],xmm2[3,0]
853; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0,2]
854; CHECK-SSE2-NEXT:    psrld $31, %xmm3
855; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm3
856; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
857; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm3
858; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
859; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
860; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
861; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
862; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
863; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
864; CHECK-SSE2-NEXT:    psrld $31, %xmm0
865; CHECK-SSE2-NEXT:    retq
866;
867; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo:
868; CHECK-SSE41:       # %bb.0:
869; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
870; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
871; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
872; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
873; CHECK-SSE41-NEXT:    pmuldq {{.*}}(%rip), %xmm3
874; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm4 = <1717986919,u,2147483649,u>
875; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm4
876; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
877; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
878; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm4
879; CHECK-SSE41-NEXT:    movdqa %xmm4, %xmm2
880; CHECK-SSE41-NEXT:    psrad $3, %xmm2
881; CHECK-SSE41-NEXT:    movdqa %xmm4, %xmm3
882; CHECK-SSE41-NEXT:    psrad $1, %xmm3
883; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
884; CHECK-SSE41-NEXT:    psrld $31, %xmm4
885; CHECK-SSE41-NEXT:    paddd %xmm3, %xmm4
886; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm4
887; CHECK-SSE41-NEXT:    psubd %xmm4, %xmm0
888; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
889; CHECK-SSE41-NEXT:    psrld $31, %xmm0
890; CHECK-SSE41-NEXT:    retq
891;
892; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo:
893; CHECK-AVX1:       # %bb.0:
894; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
895; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
896; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
897; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm3, %xmm3
898; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm4
899; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
900; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
901; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
902; CHECK-AVX1-NEXT:    vpsrad $3, %xmm2, %xmm3
903; CHECK-AVX1-NEXT:    vpsrad $1, %xmm2, %xmm4
904; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5],xmm4[6,7]
905; CHECK-AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
906; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
907; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm2, %xmm2
908; CHECK-AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
909; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
910; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
911; CHECK-AVX1-NEXT:    retq
912;
913; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo:
914; CHECK-AVX2:       # %bb.0:
915; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
916; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm1[0,1],xmm0[2],xmm1[3]
917; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
918; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm4 = [1717986919,1717986919,1717986919,1717986919]
919; CHECK-AVX2-NEXT:    vpmuldq %xmm4, %xmm3, %xmm3
920; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm4
921; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
922; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm4[0],xmm3[1],xmm4[2],xmm3[3]
923; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
924; CHECK-AVX2-NEXT:    vpsrld $31, %xmm2, %xmm3
925; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm2, %xmm2
926; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
927; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm2, %xmm2
928; CHECK-AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
929; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
930; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
931; CHECK-AVX2-NEXT:    retq
932;
933; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo:
934; CHECK-AVX512VL:       # %bb.0:
935; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
936; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
937; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
938; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
939; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
940; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
941; CHECK-AVX512VL-NEXT:    retq
942  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
943  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
944  %ret = zext <4 x i1> %cmp to <4 x i32>
945  ret <4 x i32> %ret
946}
947
948; One power-of-two divisor in even divisor
949define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
950; CHECK-SSE2-LABEL: test_srem_even_poweroftwo:
951; CHECK-SSE2:       # %bb.0:
952; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
953; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm2
954; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
955; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483649,2454267027]
956; CHECK-SSE2-NEXT:    pand %xmm3, %xmm2
957; CHECK-SSE2-NEXT:    paddd %xmm0, %xmm2
958; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm3
959; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
960; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
961; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm4
962; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
963; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
964; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm3
965; CHECK-SSE2-NEXT:    paddd %xmm0, %xmm3
966; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm2
967; CHECK-SSE2-NEXT:    psrld $31, %xmm2
968; CHECK-SSE2-NEXT:    psrad $3, %xmm3
969; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm3
970; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
971; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm3
972; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
973; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
974; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
975; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
976; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
977; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
978; CHECK-SSE2-NEXT:    psrld $31, %xmm0
979; CHECK-SSE2-NEXT:    retq
980;
981; CHECK-SSE41-LABEL: test_srem_even_poweroftwo:
982; CHECK-SSE41:       # %bb.0:
983; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
984; CHECK-SSE41-NEXT:    pmuldq {{.*}}(%rip), %xmm1
985; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <2454267027,u,2147483649,u>
986; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
987; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
988; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
989; CHECK-SSE41-NEXT:    paddd %xmm0, %xmm2
990; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
991; CHECK-SSE41-NEXT:    psrld $31, %xmm1
992; CHECK-SSE41-NEXT:    psrad $3, %xmm2
993; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
994; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
995; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
996; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
997; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
998; CHECK-SSE41-NEXT:    psrld $31, %xmm0
999; CHECK-SSE41-NEXT:    retq
1000;
1001; CHECK-AVX1-LABEL: test_srem_even_poweroftwo:
1002; CHECK-AVX1:       # %bb.0:
1003; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1004; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm1, %xmm1
1005; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1006; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1007; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1008; CHECK-AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
1009; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm2
1010; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm1
1011; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1012; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1013; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1014; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1015; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1016; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1017; CHECK-AVX1-NEXT:    retq
1018;
1019; CHECK-AVX2-LABEL: test_srem_even_poweroftwo:
1020; CHECK-AVX2:       # %bb.0:
1021; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1022; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1023; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm1, %xmm1
1024; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1025; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1026; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1027; CHECK-AVX2-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
1028; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
1029; CHECK-AVX2-NEXT:    vpsrad $3, %xmm1, %xmm1
1030; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1031; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1032; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1033; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1034; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1035; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1036; CHECK-AVX2-NEXT:    retq
1037;
1038; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo:
1039; CHECK-AVX512VL:       # %bb.0:
1040; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1041; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1042; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1043; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1044; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1045; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1046; CHECK-AVX512VL-NEXT:    retq
1047  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
1048  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1049  %ret = zext <4 x i1> %cmp to <4 x i32>
1050  ret <4 x i32> %ret
1051}
1052
1053; One power-of-two divisor in odd+even divisor
1054define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
1055; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo:
1056; CHECK-SSE2:       # %bb.0:
1057; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1717986919,2454267027,2147483649,1374389535]
1058; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1059; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm1
1060; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1061; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1062; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1063; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm4
1064; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3]
1065; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1066; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1067; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
1068; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1069; CHECK-SSE2-NEXT:    pand %xmm3, %xmm4
1070; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [0,4294967295,4294967295,0]
1071; CHECK-SSE2-NEXT:    pand %xmm0, %xmm3
1072; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
1073; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm2
1074; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
1075; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1076; CHECK-SSE2-NEXT:    psrad $5, %xmm3
1077; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
1078; CHECK-SSE2-NEXT:    psrad $3, %xmm4
1079; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
1080; CHECK-SSE2-NEXT:    psrad $1, %xmm5
1081; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
1082; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1083; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
1084; CHECK-SSE2-NEXT:    psrld $31, %xmm2
1085; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
1086; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,14,16,100]
1087; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1088; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
1089; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1090; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1091; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
1092; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1093; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1094; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1095; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1096; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1097; CHECK-SSE2-NEXT:    retq
1098;
1099; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo:
1100; CHECK-SSE41:       # %bb.0:
1101; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535]
1102; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1103; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1104; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
1105; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
1106; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1107; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1108; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1109; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm3
1110; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7]
1111; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
1112; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
1113; CHECK-SSE41-NEXT:    psrad $5, %xmm1
1114; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm4
1115; CHECK-SSE41-NEXT:    psrad $3, %xmm4
1116; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5,6,7]
1117; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm5
1118; CHECK-SSE41-NEXT:    psrad $1, %xmm5
1119; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm4[4,5,6,7]
1120; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm1[2,3],xmm5[4,5],xmm1[6,7]
1121; CHECK-SSE41-NEXT:    psrld $31, %xmm3
1122; CHECK-SSE41-NEXT:    paddd %xmm5, %xmm3
1123; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
1124; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
1125; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm0
1126; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1127; CHECK-SSE41-NEXT:    retq
1128;
1129; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo:
1130; CHECK-AVX1:       # %bb.0:
1131; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535]
1132; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1133; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1134; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1135; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1136; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1137; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1138; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1139; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7]
1140; CHECK-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
1141; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm3
1142; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm4
1143; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1144; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm5
1145; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
1146; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1147; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
1148; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
1149; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1150; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1151; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1152; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1153; CHECK-AVX1-NEXT:    retq
1154;
1155; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo:
1156; CHECK-AVX2:       # %bb.0:
1157; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535]
1158; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1159; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1160; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1161; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1162; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1163; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1164; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1165; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3]
1166; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
1167; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm3
1168; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
1169; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
1170; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1171; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1172; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1173; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1174; CHECK-AVX2-NEXT:    retq
1175;
1176; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo:
1177; CHECK-AVX512VL:       # %bb.0:
1178; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1179; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1180; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1181; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1182; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1183; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1184; CHECK-AVX512VL-NEXT:    retq
1185  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
1186  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1187  %ret = zext <4 x i1> %cmp to <4 x i32>
1188  ret <4 x i32> %ret
1189}
1190
1191;------------------------------------------------------------------------------;
1192
1193; One one divisor in odd divisor
1194define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind {
1195; CHECK-SSE2-LABEL: test_srem_odd_one:
1196; CHECK-SSE2:       # %bb.0:
1197; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
1198; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1199; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
1200; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1201; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1202; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
1203; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1204; CHECK-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm0
1205; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
1206; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
1207; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
1208; CHECK-SSE2-NEXT:    retq
1209;
1210; CHECK-SSE41-LABEL: test_srem_odd_one:
1211; CHECK-SSE41:       # %bb.0:
1212; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
1213; CHECK-SSE41-NEXT:    paddd {{.*}}(%rip), %xmm0
1214; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
1215; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1216; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1217; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1218; CHECK-SSE41-NEXT:    retq
1219;
1220; CHECK-AVX1-LABEL: test_srem_odd_one:
1221; CHECK-AVX1:       # %bb.0:
1222; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1223; CHECK-AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1224; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1225; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1226; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1227; CHECK-AVX1-NEXT:    retq
1228;
1229; CHECK-AVX2-LABEL: test_srem_odd_one:
1230; CHECK-AVX2:       # %bb.0:
1231; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
1232; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
1233; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
1234; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1235; CHECK-AVX2-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1236; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1237; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1238; CHECK-AVX2-NEXT:    retq
1239;
1240; CHECK-AVX512VL-LABEL: test_srem_odd_one:
1241; CHECK-AVX512VL:       # %bb.0:
1242; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
1243; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1244; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1245; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1246; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1247; CHECK-AVX512VL-NEXT:    retq
1248  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5>
1249  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1250  %ret = zext <4 x i1> %cmp to <4 x i32>
1251  ret <4 x i32> %ret
1252}
1253
1254; One one divisor in even divisor
1255define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
1256; CHECK-SSE2-LABEL: test_srem_even_one:
1257; CHECK-SSE2:       # %bb.0:
1258; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
1259; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
1260; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
1261; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1262; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
1263; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1264; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027]
1265; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
1266; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
1267; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm5
1268; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,3,2,3]
1269; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1270; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm5
1271; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
1272; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
1273; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm3
1274; CHECK-SSE2-NEXT:    paddd %xmm0, %xmm3
1275; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm4
1276; CHECK-SSE2-NEXT:    psrad $3, %xmm4
1277; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm5
1278; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0]
1279; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2]
1280; CHECK-SSE2-NEXT:    psrld $31, %xmm3
1281; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
1282; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm3
1283; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1284; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm3
1285; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1286; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1287; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1288; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1289; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
1290; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1291; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1292; CHECK-SSE2-NEXT:    retq
1293;
1294; CHECK-SSE41-LABEL: test_srem_even_one:
1295; CHECK-SSE41:       # %bb.0:
1296; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1297; CHECK-SSE41-NEXT:    pmuldq {{.*}}(%rip), %xmm1
1298; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,0,0,0]
1299; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
1300; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1301; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1302; CHECK-SSE41-NEXT:    paddd %xmm0, %xmm2
1303; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
1304; CHECK-SSE41-NEXT:    psrad $3, %xmm1
1305; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
1306; CHECK-SSE41-NEXT:    psrld $31, %xmm2
1307; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
1308; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
1309; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
1310; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1311; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1312; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
1313; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1314; CHECK-SSE41-NEXT:    retq
1315;
1316; CHECK-AVX1-LABEL: test_srem_even_one:
1317; CHECK-AVX1:       # %bb.0:
1318; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1319; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm1, %xmm1
1320; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1321; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1322; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1323; CHECK-AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
1324; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
1325; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
1326; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
1327; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1328; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
1329; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
1330; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1331; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1332; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
1333; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1334; CHECK-AVX1-NEXT:    retq
1335;
1336; CHECK-AVX2-LABEL: test_srem_even_one:
1337; CHECK-AVX2:       # %bb.0:
1338; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1339; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1340; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm1, %xmm1
1341; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1342; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1343; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1344; CHECK-AVX2-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
1345; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
1346; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1347; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
1348; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
1349; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1350; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1351; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1352; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
1353; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1354; CHECK-AVX2-NEXT:    retq
1355;
1356; CHECK-AVX512VL-LABEL: test_srem_even_one:
1357; CHECK-AVX512VL:       # %bb.0:
1358; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
1359; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1360; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
1361; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1362; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1363; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1364; CHECK-AVX512VL-NEXT:    retq
1365  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
1366  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1367  %ret = zext <4 x i1> %cmp to <4 x i32>
1368  ret <4 x i32> %ret
1369}
1370
1371; One one divisor in odd+even divisor
1372define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
1373; CHECK-SSE2-LABEL: test_srem_odd_even_one:
1374; CHECK-SSE2:       # %bb.0:
1375; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1717986919,2454267027,0,1374389535]
1376; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1377; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1378; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1379; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1380; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1381; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm4
1382; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3]
1383; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1384; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1385; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
1386; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1387; CHECK-SSE2-NEXT:    pand %xmm2, %xmm4
1388; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,0,0]
1389; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
1390; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm2
1391; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm3
1392; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,4294967295,0]
1393; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
1394; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
1395; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1396; CHECK-SSE2-NEXT:    psrad $5, %xmm3
1397; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
1398; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1399; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1400; CHECK-SSE2-NEXT:    psrad $3, %xmm3
1401; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
1402; CHECK-SSE2-NEXT:    psrad $1, %xmm5
1403; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
1404; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
1405; CHECK-SSE2-NEXT:    psrld $31, %xmm2
1406; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
1407; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
1408; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,14,1,100]
1409; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1410; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
1411; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1412; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1413; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
1414; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1415; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1416; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1417; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1418; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1419; CHECK-SSE2-NEXT:    retq
1420;
1421; CHECK-SSE41-LABEL: test_srem_odd_even_one:
1422; CHECK-SSE41:       # %bb.0:
1423; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
1424; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1425; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1426; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
1427; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
1428; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1429; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1430; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1431; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm3
1432; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7]
1433; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
1434; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
1435; CHECK-SSE41-NEXT:    psrad $5, %xmm1
1436; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm4
1437; CHECK-SSE41-NEXT:    psrad $3, %xmm4
1438; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
1439; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
1440; CHECK-SSE41-NEXT:    psrad $1, %xmm1
1441; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
1442; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
1443; CHECK-SSE41-NEXT:    psrld $31, %xmm3
1444; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
1445; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
1446; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
1447; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
1448; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm0
1449; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1450; CHECK-SSE41-NEXT:    retq
1451;
1452; CHECK-AVX1-LABEL: test_srem_odd_even_one:
1453; CHECK-AVX1:       # %bb.0:
1454; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
1455; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1456; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1457; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1458; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1459; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1460; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1461; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1462; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7]
1463; CHECK-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
1464; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm3
1465; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm4
1466; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1467; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm4
1468; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
1469; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1470; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
1471; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
1472; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
1473; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1474; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1475; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1476; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1477; CHECK-AVX1-NEXT:    retq
1478;
1479; CHECK-AVX2-LABEL: test_srem_odd_even_one:
1480; CHECK-AVX2:       # %bb.0:
1481; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
1482; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1483; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1484; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1485; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1486; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1487; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1488; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1489; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3]
1490; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
1491; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm3
1492; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm3[0,1],xmm2[2],xmm3[3]
1493; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
1494; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
1495; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1496; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1497; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1498; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1499; CHECK-AVX2-NEXT:    retq
1500;
1501; CHECK-AVX512VL-LABEL: test_srem_odd_even_one:
1502; CHECK-AVX512VL:       # %bb.0:
1503; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1504; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1505; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1506; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
1507; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1508; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1509; CHECK-AVX512VL-NEXT:    retq
1510  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100>
1511  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1512  %ret = zext <4 x i1> %cmp to <4 x i32>
1513  ret <4 x i32> %ret
1514}
1515
1516;------------------------------------------------------------------------------;
1517
1518; One INT_MIN divisor in odd divisor
1519define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
1520; CHECK-SSE2-LABEL: test_srem_odd_INT_MIN:
1521; CHECK-SSE2:       # %bb.0:
1522; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1523; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1524; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
1525; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
1526; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1527; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm0
1528; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
1529; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm1
1530; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1531; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
1532; CHECK-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm3
1533; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm3
1534; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm3
1535; CHECK-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
1536; CHECK-SSE2-NEXT:    pxor %xmm3, %xmm0
1537; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
1538; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0,2]
1539; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1540; CHECK-SSE2-NEXT:    retq
1541;
1542; CHECK-SSE41-LABEL: test_srem_odd_INT_MIN:
1543; CHECK-SSE41:       # %bb.0:
1544; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1545; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1546; CHECK-SSE41-NEXT:    pand %xmm0, %xmm2
1547; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm2
1548; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
1549; CHECK-SSE41-NEXT:    paddd {{.*}}(%rip), %xmm0
1550; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,0,858993458]
1551; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1552; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1553; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
1554; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1555; CHECK-SSE41-NEXT:    retq
1556;
1557; CHECK-AVX1-LABEL: test_srem_odd_INT_MIN:
1558; CHECK-AVX1:       # %bb.0:
1559; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1560; CHECK-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm2
1561; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1562; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1563; CHECK-AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1564; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm2
1565; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1566; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1567; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1568; CHECK-AVX1-NEXT:    retq
1569;
1570; CHECK-AVX2-LABEL: test_srem_odd_INT_MIN:
1571; CHECK-AVX2:       # %bb.0:
1572; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1573; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1574; CHECK-AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm2
1575; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1576; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1577; CHECK-AVX2-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1578; CHECK-AVX2-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm2
1579; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1580; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1581; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1582; CHECK-AVX2-NEXT:    retq
1583;
1584; CHECK-AVX512VL-LABEL: test_srem_odd_INT_MIN:
1585; CHECK-AVX512VL:       # %bb.0:
1586; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1587; CHECK-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm2
1588; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1589; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1590; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1591; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm2
1592; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1593; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1594; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1595; CHECK-AVX512VL-NEXT:    retq
1596  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
1597  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1598  %ret = zext <4 x i1> %cmp to <4 x i32>
1599  ret <4 x i32> %ret
1600}
1601
1602; One INT_MIN divisor in even divisor
1603define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
1604; CHECK-SSE2-LABEL: test_srem_even_INT_MIN:
1605; CHECK-SSE2:       # %bb.0:
1606; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1607; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm2
1608; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1609; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483647,2454267027]
1610; CHECK-SSE2-NEXT:    pand %xmm3, %xmm2
1611; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4294967295,4294967295,0,4294967295]
1612; CHECK-SSE2-NEXT:    pand %xmm0, %xmm4
1613; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm4
1614; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm3
1615; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
1616; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1617; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,2454267027,2454267027,2454267027]
1618; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm5
1619; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
1620; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1621; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm2
1622; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = <1,u,4294967295,u>
1623; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm4
1624; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
1625; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm3
1626; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1627; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
1628; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm4
1629; CHECK-SSE2-NEXT:    movdqa %xmm4, %xmm2
1630; CHECK-SSE2-NEXT:    psrad $3, %xmm2
1631; CHECK-SSE2-NEXT:    movdqa %xmm4, %xmm3
1632; CHECK-SSE2-NEXT:    psrad $30, %xmm3
1633; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[3,0]
1634; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0,2]
1635; CHECK-SSE2-NEXT:    psrld $31, %xmm4
1636; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm4
1637; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1638; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm4
1639; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
1640; CHECK-SSE2-NEXT:    pmuludq {{.*}}(%rip), %xmm2
1641; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1642; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1643; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
1644; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1645; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1646; CHECK-SSE2-NEXT:    retq
1647;
1648; CHECK-SSE41-LABEL: test_srem_even_INT_MIN:
1649; CHECK-SSE41:       # %bb.0:
1650; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1651; CHECK-SSE41-NEXT:    pmuldq {{.*}}(%rip), %xmm1
1652; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <2454267027,u,2147483647,u>
1653; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
1654; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1655; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1656; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
1657; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm1
1658; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm1
1659; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
1660; CHECK-SSE41-NEXT:    psrad $30, %xmm2
1661; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm3
1662; CHECK-SSE41-NEXT:    psrad $3, %xmm3
1663; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
1664; CHECK-SSE41-NEXT:    psrld $31, %xmm1
1665; CHECK-SSE41-NEXT:    paddd %xmm3, %xmm1
1666; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
1667; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
1668; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1669; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1670; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1671; CHECK-SSE41-NEXT:    retq
1672;
1673; CHECK-AVX1-LABEL: test_srem_even_INT_MIN:
1674; CHECK-AVX1:       # %bb.0:
1675; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1676; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm1, %xmm1
1677; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1678; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1679; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1680; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
1681; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1682; CHECK-AVX1-NEXT:    vpsrad $30, %xmm1, %xmm2
1683; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
1684; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
1685; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
1686; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
1687; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1688; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1689; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1690; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1691; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1692; CHECK-AVX1-NEXT:    retq
1693;
1694; CHECK-AVX2-LABEL: test_srem_even_INT_MIN:
1695; CHECK-AVX2:       # %bb.0:
1696; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1697; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1698; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm1, %xmm1
1699; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1700; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1701; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1702; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
1703; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1704; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
1705; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
1706; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1707; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1708; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1709; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1710; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1711; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1712; CHECK-AVX2-NEXT:    retq
1713;
1714; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN:
1715; CHECK-AVX512VL:       # %bb.0:
1716; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1717; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
1718; CHECK-AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378]
1719; CHECK-AVX512VL-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
1720; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm2, %xmm2
1721; CHECK-AVX512VL-NEXT:    vpminud %xmm3, %xmm2, %xmm3
1722; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1723; CHECK-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1724; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1725; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1726; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1727; CHECK-AVX512VL-NEXT:    retq
1728  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
1729  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1730  %ret = zext <4 x i1> %cmp to <4 x i32>
1731  ret <4 x i32> %ret
1732}
1733
1734; One INT_MIN divisor in odd+even divisor
1735define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
1736; CHECK-SSE2-LABEL: test_srem_odd_even_INT_MIN:
1737; CHECK-SSE2:       # %bb.0:
1738; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
1739; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1740; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1741; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
1742; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1743; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1744; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
1745; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1746; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1747; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1717986919,2454267027,2147483647,1374389535]
1748; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1749; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
1750; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1751; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
1752; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
1753; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1754; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1755; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1756; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
1757; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1758; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
1759; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
1760; CHECK-SSE2-NEXT:    pand %xmm0, %xmm5
1761; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
1762; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm2
1763; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
1764; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1765; CHECK-SSE2-NEXT:    psrad $5, %xmm3
1766; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
1767; CHECK-SSE2-NEXT:    psrad $30, %xmm4
1768; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1769; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1770; CHECK-SSE2-NEXT:    psrad $3, %xmm3
1771; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
1772; CHECK-SSE2-NEXT:    psrad $1, %xmm5
1773; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
1774; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
1775; CHECK-SSE2-NEXT:    psrld $31, %xmm2
1776; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
1777; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,14,2147483648,100]
1778; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1779; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
1780; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1781; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1782; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
1783; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1784; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1785; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1786; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1787; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1788; CHECK-SSE2-NEXT:    retq
1789;
1790; CHECK-SSE41-LABEL: test_srem_odd_even_INT_MIN:
1791; CHECK-SSE41:       # %bb.0:
1792; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
1793; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1794; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1795; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
1796; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
1797; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1798; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1799; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
1800; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
1801; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
1802; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
1803; CHECK-SSE41-NEXT:    psrad $5, %xmm1
1804; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
1805; CHECK-SSE41-NEXT:    psrad $3, %xmm3
1806; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1807; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
1808; CHECK-SSE41-NEXT:    psrad $30, %xmm1
1809; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm4
1810; CHECK-SSE41-NEXT:    psrad $1, %xmm4
1811; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
1812; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1813; CHECK-SSE41-NEXT:    psrld $31, %xmm2
1814; CHECK-SSE41-NEXT:    paddd %xmm4, %xmm2
1815; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1816; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1817; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1818; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1819; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1820; CHECK-SSE41-NEXT:    retq
1821;
1822; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN:
1823; CHECK-AVX1:       # %bb.0:
1824; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
1825; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1826; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1827; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1828; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1829; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1830; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1831; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
1832; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1833; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm2
1834; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
1835; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1836; CHECK-AVX1-NEXT:    vpsrad $30, %xmm1, %xmm3
1837; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm4
1838; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1839; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1840; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
1841; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
1842; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1843; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1844; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1845; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1846; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1847; CHECK-AVX1-NEXT:    retq
1848;
1849; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN:
1850; CHECK-AVX2:       # %bb.0:
1851; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
1852; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1853; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1854; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1855; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1856; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1857; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1858; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
1859; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1860; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
1861; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
1862; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1863; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1864; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1865; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1866; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1867; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1868; CHECK-AVX2-NEXT:    retq
1869;
1870; CHECK-AVX512VL-LABEL: test_srem_odd_even_INT_MIN:
1871; CHECK-AVX512VL:       # %bb.0:
1872; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1873; CHECK-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm2
1874; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1875; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
1876; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
1877; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1878; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm2
1879; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1880; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1881; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1882; CHECK-AVX512VL-NEXT:    retq
1883  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
1884  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1885  %ret = zext <4 x i1> %cmp to <4 x i32>
1886  ret <4 x i32> %ret
1887}
1888
1889;==============================================================================;
1890
1891; One all-ones divisor and power-of-two divisor divisor in odd divisor
1892define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1893; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo:
1894; CHECK-SSE2:       # %bb.0:
1895; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,4294967295,1,0]
1896; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
1897; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1898; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
1899; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1900; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1901; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
1902; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1903; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1904; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1717986919,0,2147483649,1717986919]
1905; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1906; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
1907; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1908; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
1909; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
1910; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1911; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1912; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1913; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
1914; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1915; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
1916; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [0,0,4294967295,0]
1917; CHECK-SSE2-NEXT:    pand %xmm0, %xmm5
1918; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
1919; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm2
1920; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
1921; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
1922; CHECK-SSE2-NEXT:    psrad $1, %xmm3
1923; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
1924; CHECK-SSE2-NEXT:    psrad $3, %xmm4
1925; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1926; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
1927; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
1928; CHECK-SSE2-NEXT:    psrld $31, %xmm2
1929; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
1930; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
1931; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,4294967295,16,5]
1932; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1933; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
1934; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1935; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1936; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
1937; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1938; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1939; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
1940; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1941; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1942; CHECK-SSE2-NEXT:    retq
1943;
1944; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo:
1945; CHECK-SSE41:       # %bb.0:
1946; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919]
1947; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1948; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1949; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
1950; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
1951; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1952; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1953; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,1,0]
1954; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
1955; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
1956; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
1957; CHECK-SSE41-NEXT:    psrad $1, %xmm1
1958; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
1959; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1960; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm4
1961; CHECK-SSE41-NEXT:    psrad $3, %xmm4
1962; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm1[0,1,2,3],xmm4[4,5,6,7]
1963; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1964; CHECK-SSE41-NEXT:    psrld $31, %xmm2
1965; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1966; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7]
1967; CHECK-SSE41-NEXT:    paddd %xmm4, %xmm2
1968; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
1969; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
1970; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1971; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1972; CHECK-SSE41-NEXT:    retq
1973;
1974; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo:
1975; CHECK-AVX1:       # %bb.0:
1976; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919]
1977; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1978; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1979; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1980; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
1981; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1982; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1983; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
1984; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
1985; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm2
1986; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm2[4,5,6,7]
1987; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm4
1988; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
1989; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1990; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
1991; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1992; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
1993; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
1994; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
1995; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
1996; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
1997; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1998; CHECK-AVX1-NEXT:    retq
1999;
2000; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo:
2001; CHECK-AVX2:       # %bb.0:
2002; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919]
2003; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2004; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2005; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2006; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2007; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2008; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2009; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2010; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2011; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
2012; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2013; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3]
2014; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2015; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2016; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2017; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2018; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2019; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2020; CHECK-AVX2-NEXT:    retq
2021;
2022; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo:
2023; CHECK-AVX512VL:       # %bb.0:
2024; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2025; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2026; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2027; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2028; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2029; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2030; CHECK-AVX512VL-NEXT:    retq
2031  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
2032  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2033  %ret = zext <4 x i1> %cmp to <4 x i32>
2034  ret <4 x i32> %ret
2035}
2036
2037; One all-ones divisor and power-of-two divisor divisor in even divisor
2038define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
2039; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo:
2040; CHECK-SSE2:       # %bb.0:
2041; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,4294967295]
2042; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
2043; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
2044; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2045; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2046; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2047; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,0,2147483649,2454267027]
2048; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
2049; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
2050; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
2051; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm3
2052; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
2053; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2054; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2055; CHECK-SSE2-NEXT:    pmuludq %xmm6, %xmm5
2056; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
2057; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2058; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm3
2059; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1,4294967295,1,1]
2060; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm5
2061; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm5
2062; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2063; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2064; CHECK-SSE2-NEXT:    pmuludq %xmm6, %xmm4
2065; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2066; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
2067; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm5
2068; CHECK-SSE2-NEXT:    movdqa %xmm5, %xmm3
2069; CHECK-SSE2-NEXT:    psrad $3, %xmm3
2070; CHECK-SSE2-NEXT:    movdqa %xmm5, %xmm4
2071; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,0],xmm3[0,0]
2072; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[2,0],xmm3[2,3]
2073; CHECK-SSE2-NEXT:    psrld $31, %xmm5
2074; CHECK-SSE2-NEXT:    pand %xmm2, %xmm5
2075; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
2076; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [14,4294967295,16,14]
2077; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
2078; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm5
2079; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2080; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2081; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2082; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2083; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2084; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm0
2085; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2086; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2087; CHECK-SSE2-NEXT:    retq
2088;
2089; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo:
2090; CHECK-SSE41:       # %bb.0:
2091; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027]
2092; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2093; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2094; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2095; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2096; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2097; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2098; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1,4294967295,1,1]
2099; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
2100; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
2101; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
2102; CHECK-SSE41-NEXT:    psrad $3, %xmm1
2103; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
2104; CHECK-SSE41-NEXT:    psrld $31, %xmm2
2105; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
2106; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5,6,7]
2107; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
2108; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
2109; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
2110; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
2111; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2112; CHECK-SSE41-NEXT:    retq
2113;
2114; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo:
2115; CHECK-AVX1:       # %bb.0:
2116; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027]
2117; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2118; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2119; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2120; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2121; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2122; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2123; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2124; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2125; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
2126; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7]
2127; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2128; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2129; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
2130; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
2131; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2132; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2133; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2134; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2135; CHECK-AVX1-NEXT:    retq
2136;
2137; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo:
2138; CHECK-AVX2:       # %bb.0:
2139; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027]
2140; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2141; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2142; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2143; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2144; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2145; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2146; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2147; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2148; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
2149; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2150; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3]
2151; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2152; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2153; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2154; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2155; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2156; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2157; CHECK-AVX2-NEXT:    retq
2158;
2159; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo:
2160; CHECK-AVX512VL:       # %bb.0:
2161; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2162; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2163; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2164; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2165; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2166; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2167; CHECK-AVX512VL-NEXT:    retq
2168  %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
2169  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2170  %ret = zext <4 x i1> %cmp to <4 x i32>
2171  ret <4 x i32> %ret
2172}
2173
2174; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
2175define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
2176; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2177; CHECK-SSE2:       # %bb.0:
2178; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,4294967295,1,0]
2179; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
2180; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2181; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
2182; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2183; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2184; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
2185; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2186; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2187; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1717986919,0,2147483649,1374389535]
2188; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2189; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
2190; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
2191; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
2192; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
2193; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2194; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2195; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2196; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2197; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2198; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
2199; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [0,0,4294967295,0]
2200; CHECK-SSE2-NEXT:    pand %xmm0, %xmm5
2201; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
2202; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm2
2203; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2204; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2205; CHECK-SSE2-NEXT:    psrad $5, %xmm3
2206; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
2207; CHECK-SSE2-NEXT:    psrad $3, %xmm4
2208; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2209; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2210; CHECK-SSE2-NEXT:    psrad $1, %xmm3
2211; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
2212; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
2213; CHECK-SSE2-NEXT:    psrld $31, %xmm2
2214; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
2215; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2216; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,4294967295,16,100]
2217; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2218; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2219; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2220; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2221; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
2222; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2223; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2224; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
2225; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2226; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2227; CHECK-SSE2-NEXT:    retq
2228;
2229; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2230; CHECK-SSE41:       # %bb.0:
2231; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535]
2232; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2233; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2234; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2235; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2236; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2237; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2238; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,1,0]
2239; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
2240; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
2241; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
2242; CHECK-SSE41-NEXT:    psrad $5, %xmm1
2243; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2244; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
2245; CHECK-SSE41-NEXT:    psrad $3, %xmm3
2246; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm4
2247; CHECK-SSE41-NEXT:    psrad $1, %xmm4
2248; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
2249; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm1[2,3],xmm4[4,5],xmm1[6,7]
2250; CHECK-SSE41-NEXT:    psrld $31, %xmm2
2251; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2252; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7]
2253; CHECK-SSE41-NEXT:    paddd %xmm4, %xmm2
2254; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
2255; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
2256; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2257; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2258; CHECK-SSE41-NEXT:    retq
2259;
2260; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2261; CHECK-AVX1:       # %bb.0:
2262; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535]
2263; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2264; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2265; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2266; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2267; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2268; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2269; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2270; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2271; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm2
2272; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
2273; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
2274; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm4
2275; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
2276; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
2277; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2278; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2279; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
2280; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
2281; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2282; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2283; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2284; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2285; CHECK-AVX1-NEXT:    retq
2286;
2287; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2288; CHECK-AVX2:       # %bb.0:
2289; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535]
2290; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2291; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2292; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2293; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2294; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2295; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2296; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2297; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2298; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
2299; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2300; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3]
2301; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2302; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2303; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2304; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2305; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2306; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2307; CHECK-AVX2-NEXT:    retq
2308;
2309; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2310; CHECK-AVX512VL:       # %bb.0:
2311; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2312; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2313; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2314; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2315; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2316; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2317; CHECK-AVX512VL-NEXT:    retq
2318  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
2319  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2320  %ret = zext <4 x i1> %cmp to <4 x i32>
2321  ret <4 x i32> %ret
2322}
2323
2324;------------------------------------------------------------------------------;
2325
2326; One all-ones divisor and one one divisor in odd divisor
2327define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
2328; CHECK-SSE2-LABEL: test_srem_odd_allones_and_one:
2329; CHECK-SSE2:       # %bb.0:
2330; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
2331; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2332; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
2333; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2334; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2335; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
2336; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2337; CHECK-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm0
2338; CHECK-SSE2-NEXT:    pxor {{.*}}(%rip), %xmm0
2339; CHECK-SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
2340; CHECK-SSE2-NEXT:    pandn {{.*}}(%rip), %xmm0
2341; CHECK-SSE2-NEXT:    retq
2342;
2343; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one:
2344; CHECK-SSE41:       # %bb.0:
2345; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
2346; CHECK-SSE41-NEXT:    paddd {{.*}}(%rip), %xmm0
2347; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458]
2348; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
2349; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2350; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2351; CHECK-SSE41-NEXT:    retq
2352;
2353; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one:
2354; CHECK-AVX1:       # %bb.0:
2355; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2356; CHECK-AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2357; CHECK-AVX1-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2358; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2359; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2360; CHECK-AVX1-NEXT:    retq
2361;
2362; CHECK-AVX2-LABEL: test_srem_odd_allones_and_one:
2363; CHECK-AVX2:       # %bb.0:
2364; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
2365; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
2366; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
2367; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
2368; CHECK-AVX2-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2369; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2370; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2371; CHECK-AVX2-NEXT:    retq
2372;
2373; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_one:
2374; CHECK-AVX512VL:       # %bb.0:
2375; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
2376; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
2377; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2378; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2379; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2380; CHECK-AVX512VL-NEXT:    retq
2381  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
2382  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2383  %ret = zext <4 x i1> %cmp to <4 x i32>
2384  ret <4 x i32> %ret
2385}
2386
2387; One all-ones divisor and one one divisor in even divisor
2388define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
2389; CHECK-SSE2-LABEL: test_srem_even_allones_and_one:
2390; CHECK-SSE2:       # %bb.0:
2391; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,0,0,4294967295]
2392; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
2393; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
2394; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2395; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2396; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2397; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,0,0,2454267027]
2398; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
2399; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
2400; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
2401; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm3
2402; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
2403; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,2,3,3]
2404; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2405; CHECK-SSE2-NEXT:    pmuludq %xmm6, %xmm5
2406; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
2407; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2408; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm3
2409; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1,4294967295,1,1]
2410; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm5
2411; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm5
2412; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2413; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2414; CHECK-SSE2-NEXT:    pmuludq %xmm6, %xmm4
2415; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2416; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
2417; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm5
2418; CHECK-SSE2-NEXT:    movdqa %xmm5, %xmm3
2419; CHECK-SSE2-NEXT:    psrad $3, %xmm3
2420; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,3],xmm5[1,2]
2421; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2,3,1]
2422; CHECK-SSE2-NEXT:    psrld $31, %xmm5
2423; CHECK-SSE2-NEXT:    pand %xmm2, %xmm5
2424; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm5
2425; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [14,4294967295,1,14]
2426; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
2427; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm5
2428; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2429; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2430; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2431; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2432; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2433; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm0
2434; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2435; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2436; CHECK-SSE2-NEXT:    retq
2437;
2438; CHECK-SSE41-LABEL: test_srem_even_allones_and_one:
2439; CHECK-SSE41:       # %bb.0:
2440; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027]
2441; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2442; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2443; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2444; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2445; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2446; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2447; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1,4294967295,1,1]
2448; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
2449; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
2450; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
2451; CHECK-SSE41-NEXT:    psrad $3, %xmm1
2452; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7]
2453; CHECK-SSE41-NEXT:    psrld $31, %xmm2
2454; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
2455; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7]
2456; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
2457; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
2458; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
2459; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
2460; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2461; CHECK-SSE41-NEXT:    retq
2462;
2463; CHECK-AVX1-LABEL: test_srem_even_allones_and_one:
2464; CHECK-AVX1:       # %bb.0:
2465; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027]
2466; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2467; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2468; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2469; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2470; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2471; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2472; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2473; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2474; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
2475; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7]
2476; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2477; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2478; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
2479; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
2480; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2481; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2482; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2483; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2484; CHECK-AVX1-NEXT:    retq
2485;
2486; CHECK-AVX2-LABEL: test_srem_even_allones_and_one:
2487; CHECK-AVX2:       # %bb.0:
2488; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027]
2489; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2490; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2491; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2492; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2493; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2494; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2495; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2496; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2497; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
2498; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2499; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
2500; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2501; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2502; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2503; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2504; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2505; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2506; CHECK-AVX2-NEXT:    retq
2507;
2508; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_one:
2509; CHECK-AVX512VL:       # %bb.0:
2510; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
2511; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
2512; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
2513; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2514; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2515; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2516; CHECK-AVX512VL-NEXT:    retq
2517  %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
2518  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2519  %ret = zext <4 x i1> %cmp to <4 x i32>
2520  ret <4 x i32> %ret
2521}
2522
2523; One all-ones divisor and one one divisor in odd+even divisor
2524define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
2525; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one:
2526; CHECK-SSE2:       # %bb.0:
2527; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,4294967295,1,0]
2528; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
2529; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
2530; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
2531; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2532; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2533; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
2534; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2535; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2536; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1717986919,0,0,1374389535]
2537; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2538; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
2539; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
2540; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[2,2,3,3]
2541; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm1
2542; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2543; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2544; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2545; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2546; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2547; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
2548; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm2
2549; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2550; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2551; CHECK-SSE2-NEXT:    psrad $5, %xmm3
2552; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
2553; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2554; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2555; CHECK-SSE2-NEXT:    psrad $1, %xmm3
2556; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
2557; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
2558; CHECK-SSE2-NEXT:    psrld $31, %xmm2
2559; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
2560; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2561; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,4294967295,1,100]
2562; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2563; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2564; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2565; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2566; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
2567; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2568; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2569; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
2570; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2571; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2572; CHECK-SSE2-NEXT:    retq
2573;
2574; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one:
2575; CHECK-SSE41:       # %bb.0:
2576; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535]
2577; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2578; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2579; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2580; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2581; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2582; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2583; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,1,0]
2584; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
2585; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
2586; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
2587; CHECK-SSE41-NEXT:    psrad $5, %xmm1
2588; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2589; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
2590; CHECK-SSE41-NEXT:    psrad $1, %xmm3
2591; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2592; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3],xmm3[4,5],xmm1[6,7]
2593; CHECK-SSE41-NEXT:    psrld $31, %xmm2
2594; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2595; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7]
2596; CHECK-SSE41-NEXT:    paddd %xmm3, %xmm2
2597; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
2598; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
2599; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2600; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2601; CHECK-SSE41-NEXT:    retq
2602;
2603; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one:
2604; CHECK-AVX1:       # %bb.0:
2605; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535]
2606; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2607; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2608; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2609; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2610; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2611; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2612; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2613; CHECK-AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2614; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm2
2615; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
2616; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
2617; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
2618; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
2619; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2620; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2621; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
2622; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
2623; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2624; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2625; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2626; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2627; CHECK-AVX1-NEXT:    retq
2628;
2629; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one:
2630; CHECK-AVX2:       # %bb.0:
2631; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535]
2632; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2633; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2634; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2635; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2636; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2637; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2638; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm2
2639; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2640; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
2641; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2642; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
2643; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2644; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2645; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2646; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2647; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2648; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2649; CHECK-AVX2-NEXT:    retq
2650;
2651; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one:
2652; CHECK-AVX512VL:       # %bb.0:
2653; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2654; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2655; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2656; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2657; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2658; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2659; CHECK-AVX512VL-NEXT:    retq
2660  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
2661  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2662  %ret = zext <4 x i1> %cmp to <4 x i32>
2663  ret <4 x i32> %ret
2664}
2665
2666;------------------------------------------------------------------------------;
2667
2668; One power-of-two divisor divisor and one divisor in odd divisor
2669define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
2670; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one:
2671; CHECK-SSE2:       # %bb.0:
2672; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1717986919,2147483649,0,1717986919]
2673; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2674; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
2675; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
2676; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
2677; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2678; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm4
2679; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3]
2680; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2681; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2682; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2683; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2684; CHECK-SSE2-NEXT:    pand %xmm2, %xmm4
2685; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,0,0]
2686; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
2687; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm2
2688; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm3
2689; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,4294967295,0]
2690; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
2691; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2692; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2693; CHECK-SSE2-NEXT:    psrad $1, %xmm3
2694; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
2695; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2696; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
2697; CHECK-SSE2-NEXT:    psrad $3, %xmm5
2698; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
2699; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
2700; CHECK-SSE2-NEXT:    psrld $31, %xmm2
2701; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
2702; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2703; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,16,1,5]
2704; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2705; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2706; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2707; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2708; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
2709; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2710; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2711; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
2712; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2713; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2714; CHECK-SSE2-NEXT:    retq
2715;
2716; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one:
2717; CHECK-SSE41:       # %bb.0:
2718; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919]
2719; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2720; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2721; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2722; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2723; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2724; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2725; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
2726; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm3
2727; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7]
2728; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
2729; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
2730; CHECK-SSE41-NEXT:    psrad $1, %xmm1
2731; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm4
2732; CHECK-SSE41-NEXT:    psrad $3, %xmm4
2733; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
2734; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
2735; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
2736; CHECK-SSE41-NEXT:    psrld $31, %xmm3
2737; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
2738; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
2739; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
2740; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
2741; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm0
2742; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2743; CHECK-SSE41-NEXT:    retq
2744;
2745; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one:
2746; CHECK-AVX1:       # %bb.0:
2747; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919]
2748; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2749; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2750; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2751; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2752; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2753; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2754; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2755; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7]
2756; CHECK-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
2757; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
2758; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm4
2759; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
2760; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
2761; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
2762; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2763; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
2764; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
2765; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2766; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2767; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
2768; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2769; CHECK-AVX1-NEXT:    retq
2770;
2771; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one:
2772; CHECK-AVX2:       # %bb.0:
2773; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919]
2774; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2775; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2776; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2777; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2778; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2779; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2780; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2781; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3]
2782; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
2783; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm3
2784; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm3[0,1],xmm2[2],xmm3[3]
2785; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2786; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
2787; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2788; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2789; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
2790; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2791; CHECK-AVX2-NEXT:    retq
2792;
2793; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one:
2794; CHECK-AVX512VL:       # %bb.0:
2795; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2796; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2797; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2798; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2799; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2800; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2801; CHECK-AVX512VL-NEXT:    retq
2802  %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
2803  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2804  %ret = zext <4 x i1> %cmp to <4 x i32>
2805  ret <4 x i32> %ret
2806}
2807
2808; One power-of-two divisor divisor and one divisor in even divisor
2809define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
2810; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one:
2811; CHECK-SSE2:       # %bb.0:
2812; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
2813; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
2814; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
2815; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2816; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2817; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2818; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2454267027,2147483649,0,2454267027]
2819; CHECK-SSE2-NEXT:    pand %xmm5, %xmm4
2820; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm4
2821; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm3
2822; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm3
2823; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
2824; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2825; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2826; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm6
2827; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,3,2,3]
2828; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2829; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm3
2830; CHECK-SSE2-NEXT:    paddd %xmm0, %xmm3
2831; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm4
2832; CHECK-SSE2-NEXT:    psrad $3, %xmm4
2833; CHECK-SSE2-NEXT:    movdqa %xmm3, %xmm5
2834; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0]
2835; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2]
2836; CHECK-SSE2-NEXT:    psrld $31, %xmm3
2837; CHECK-SSE2-NEXT:    pand %xmm2, %xmm3
2838; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm3
2839; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [14,16,1,14]
2840; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2841; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm3
2842; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2843; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2844; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm2
2845; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2846; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
2847; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
2848; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2849; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2850; CHECK-SSE2-NEXT:    retq
2851;
2852; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one:
2853; CHECK-SSE41:       # %bb.0:
2854; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
2855; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2856; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2857; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2858; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2859; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2860; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2861; CHECK-SSE41-NEXT:    paddd %xmm0, %xmm1
2862; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
2863; CHECK-SSE41-NEXT:    psrad $3, %xmm2
2864; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
2865; CHECK-SSE41-NEXT:    psrld $31, %xmm1
2866; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
2867; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
2868; CHECK-SSE41-NEXT:    paddd %xmm2, %xmm1
2869; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
2870; CHECK-SSE41-NEXT:    psubd %xmm1, %xmm0
2871; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
2872; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2873; CHECK-SSE41-NEXT:    retq
2874;
2875; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one:
2876; CHECK-AVX1:       # %bb.0:
2877; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
2878; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2879; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2880; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2881; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2882; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2883; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2884; CHECK-AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
2885; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
2886; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
2887; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
2888; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2889; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
2890; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
2891; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2892; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2893; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2894; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2895; CHECK-AVX1-NEXT:    retq
2896;
2897; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one:
2898; CHECK-AVX2:       # %bb.0:
2899; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
2900; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2901; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2902; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
2903; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
2904; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2905; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2906; CHECK-AVX2-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
2907; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
2908; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2909; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
2910; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
2911; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
2912; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
2913; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
2914; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
2915; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2916; CHECK-AVX2-NEXT:    retq
2917;
2918; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one:
2919; CHECK-AVX512VL:       # %bb.0:
2920; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
2921; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
2922; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
2923; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
2924; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2925; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2926; CHECK-AVX512VL-NEXT:    retq
2927  %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
2928  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2929  %ret = zext <4 x i1> %cmp to <4 x i32>
2930  ret <4 x i32> %ret
2931}
2932
2933; One power-of-two divisor divisor and one divisor in odd+even divisor
2934define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
2935; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one:
2936; CHECK-SSE2:       # %bb.0:
2937; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1717986919,2147483649,0,1374389535]
2938; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2939; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
2940; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
2941; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
2942; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2943; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm4
2944; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3]
2945; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2946; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
2947; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
2948; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2949; CHECK-SSE2-NEXT:    pand %xmm2, %xmm4
2950; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,0,0]
2951; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
2952; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm2
2953; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm3
2954; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,4294967295,4294967295,0]
2955; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
2956; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
2957; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2958; CHECK-SSE2-NEXT:    psrad $5, %xmm3
2959; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
2960; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2961; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
2962; CHECK-SSE2-NEXT:    psrad $3, %xmm3
2963; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm5
2964; CHECK-SSE2-NEXT:    psrad $1, %xmm5
2965; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
2966; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
2967; CHECK-SSE2-NEXT:    psrld $31, %xmm2
2968; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
2969; CHECK-SSE2-NEXT:    paddd %xmm5, %xmm2
2970; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,16,1,100]
2971; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2972; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
2973; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2974; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2975; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
2976; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2977; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2978; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
2979; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2980; CHECK-SSE2-NEXT:    psrld $31, %xmm0
2981; CHECK-SSE2-NEXT:    retq
2982;
2983; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one:
2984; CHECK-SSE41:       # %bb.0:
2985; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535]
2986; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2987; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2988; CHECK-SSE41-NEXT:    pmuldq %xmm2, %xmm3
2989; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm1
2990; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2991; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2992; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
2993; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm3
2994; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7]
2995; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
2996; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
2997; CHECK-SSE41-NEXT:    psrad $5, %xmm1
2998; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm4
2999; CHECK-SSE41-NEXT:    psrad $3, %xmm4
3000; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
3001; CHECK-SSE41-NEXT:    movdqa %xmm3, %xmm1
3002; CHECK-SSE41-NEXT:    psrad $1, %xmm1
3003; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
3004; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
3005; CHECK-SSE41-NEXT:    psrld $31, %xmm3
3006; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
3007; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm3
3008; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm3
3009; CHECK-SSE41-NEXT:    psubd %xmm3, %xmm0
3010; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm0
3011; CHECK-SSE41-NEXT:    psrld $31, %xmm0
3012; CHECK-SSE41-NEXT:    retq
3013;
3014; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one:
3015; CHECK-AVX1:       # %bb.0:
3016; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535]
3017; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
3018; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3019; CHECK-AVX1-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
3020; CHECK-AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
3021; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3022; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3023; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
3024; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7]
3025; CHECK-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
3026; CHECK-AVX1-NEXT:    vpsrad $5, %xmm1, %xmm3
3027; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm4
3028; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
3029; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm4
3030; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
3031; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
3032; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
3033; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
3034; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
3035; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
3036; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
3037; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
3038; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
3039; CHECK-AVX1-NEXT:    retq
3040;
3041; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one:
3042; CHECK-AVX2:       # %bb.0:
3043; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535]
3044; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
3045; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3046; CHECK-AVX2-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
3047; CHECK-AVX2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
3048; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3049; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
3050; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
3051; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3]
3052; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
3053; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm3
3054; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm3[0,1],xmm2[2],xmm3[3]
3055; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
3056; CHECK-AVX2-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
3057; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
3058; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
3059; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
3060; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
3061; CHECK-AVX2-NEXT:    retq
3062;
3063; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one:
3064; CHECK-AVX512VL:       # %bb.0:
3065; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
3066; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
3067; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
3068; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
3069; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
3070; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
3071; CHECK-AVX512VL-NEXT:    retq
3072  %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
3073  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
3074  %ret = zext <4 x i1> %cmp to <4 x i32>
3075  ret <4 x i32> %ret
3076}
3077
3078;------------------------------------------------------------------------------;
3079
3080define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
3081; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3082; CHECK-SSE2:       # %bb.0:
3083; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,4294967295,1,1]
3084; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
3085; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
3086; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
3087; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3088; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
3089; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
3090; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
3091; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
3092; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
3093; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm4
3094; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
3095; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1717986919,0,2147483649,0]
3096; CHECK-SSE2-NEXT:    pand %xmm2, %xmm4
3097; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [0,0,4294967295,0]
3098; CHECK-SSE2-NEXT:    pand %xmm0, %xmm5
3099; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
3100; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm2
3101; CHECK-SSE2-NEXT:    psrlq $32, %xmm2
3102; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm2
3103; CHECK-SSE2-NEXT:    paddd %xmm3, %xmm2
3104; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm3
3105; CHECK-SSE2-NEXT:    psrad $3, %xmm3
3106; CHECK-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm2[1]
3107; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm4
3108; CHECK-SSE2-NEXT:    psrad $1, %xmm4
3109; CHECK-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm2[0]
3110; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,3],xmm3[0,3]
3111; CHECK-SSE2-NEXT:    psrld $31, %xmm2
3112; CHECK-SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
3113; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm2
3114; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [5,4294967295,16,1]
3115; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
3116; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
3117; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3118; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3119; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
3120; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
3121; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3122; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
3123; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
3124; CHECK-SSE2-NEXT:    psrld $31, %xmm0
3125; CHECK-SSE2-NEXT:    retq
3126;
3127; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3128; CHECK-SSE41:       # %bb.0:
3129; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,4294967295,1,1]
3130; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm1
3131; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <1717986919,u,2147483649,u>
3132; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
3133; CHECK-SSE41-NEXT:    psrlq $32, %xmm2
3134; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
3135; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
3136; CHECK-SSE41-NEXT:    psrad $3, %xmm1
3137; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm3
3138; CHECK-SSE41-NEXT:    psrad $1, %xmm3
3139; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
3140; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
3141; CHECK-SSE41-NEXT:    psrld $31, %xmm2
3142; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
3143; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
3144; CHECK-SSE41-NEXT:    paddd %xmm3, %xmm2
3145; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
3146; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
3147; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
3148; CHECK-SSE41-NEXT:    psrld $31, %xmm0
3149; CHECK-SSE41-NEXT:    retq
3150;
3151; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3152; CHECK-AVX1:       # %bb.0:
3153; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
3154; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3155; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
3156; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
3157; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
3158; CHECK-AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
3159; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
3160; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
3161; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
3162; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
3163; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
3164; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
3165; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
3166; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
3167; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
3168; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
3169; CHECK-AVX1-NEXT:    retq
3170;
3171; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3172; CHECK-AVX2:       # %bb.0:
3173; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
3174; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3175; CHECK-AVX2-NEXT:    vpsrlq $32, %xmm2, %xmm2
3176; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
3177; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
3178; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
3179; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3]
3180; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
3181; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
3182; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
3183; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
3184; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
3185; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
3186; CHECK-AVX2-NEXT:    retq
3187;
3188; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3189; CHECK-AVX512VL:       # %bb.0:
3190; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
3191; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
3192; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
3193; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
3194; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
3195; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
3196; CHECK-AVX512VL-NEXT:    retq
3197  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
3198  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
3199  %ret = zext <4 x i1> %cmp to <4 x i32>
3200  ret <4 x i32> %ret
3201}
3202
3203define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
3204; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3205; CHECK-SSE2:       # %bb.0:
3206; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,4294967295,1,1]
3207; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
3208; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
3209; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3210; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3211; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3212; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
3213; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
3214; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3215; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0]
3216; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm4
3217; CHECK-SSE2-NEXT:    pand %xmm3, %xmm4
3218; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
3219; CHECK-SSE2-NEXT:    pxor %xmm5, %xmm5
3220; CHECK-SSE2-NEXT:    pcmpgtd %xmm0, %xmm5
3221; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2454267027,0,2147483649,0]
3222; CHECK-SSE2-NEXT:    pand %xmm6, %xmm5
3223; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm5
3224; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm6
3225; CHECK-SSE2-NEXT:    psrlq $32, %xmm6
3226; CHECK-SSE2-NEXT:    psubd %xmm5, %xmm6
3227; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm6
3228; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
3229; CHECK-SSE2-NEXT:    movdqa %xmm6, %xmm4
3230; CHECK-SSE2-NEXT:    psrad $3, %xmm4
3231; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
3232; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3233; CHECK-SSE2-NEXT:    psrld $31, %xmm6
3234; CHECK-SSE2-NEXT:    pand %xmm3, %xmm6
3235; CHECK-SSE2-NEXT:    paddd %xmm4, %xmm6
3236; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [14,4294967295,16,1]
3237; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
3238; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm6
3239; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3]
3240; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3241; CHECK-SSE2-NEXT:    pmuludq %xmm3, %xmm2
3242; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3243; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3244; CHECK-SSE2-NEXT:    psubd %xmm4, %xmm0
3245; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
3246; CHECK-SSE2-NEXT:    psrld $31, %xmm0
3247; CHECK-SSE2-NEXT:    retq
3248;
3249; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3250; CHECK-SSE41:       # %bb.0:
3251; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,4294967295,1,1]
3252; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm1
3253; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <2454267027,u,2147483649,u>
3254; CHECK-SSE41-NEXT:    pmuldq %xmm0, %xmm2
3255; CHECK-SSE41-NEXT:    psrlq $32, %xmm2
3256; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
3257; CHECK-SSE41-NEXT:    movdqa %xmm2, %xmm1
3258; CHECK-SSE41-NEXT:    psrad $3, %xmm1
3259; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3260; CHECK-SSE41-NEXT:    psrld $31, %xmm2
3261; CHECK-SSE41-NEXT:    pxor %xmm3, %xmm3
3262; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
3263; CHECK-SSE41-NEXT:    paddd %xmm1, %xmm2
3264; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
3265; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
3266; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
3267; CHECK-SSE41-NEXT:    psrld $31, %xmm0
3268; CHECK-SSE41-NEXT:    retq
3269;
3270; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3271; CHECK-AVX1:       # %bb.0:
3272; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
3273; CHECK-AVX1-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3274; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
3275; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
3276; CHECK-AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
3277; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
3278; CHECK-AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
3279; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
3280; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
3281; CHECK-AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
3282; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
3283; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
3284; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
3285; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
3286; CHECK-AVX1-NEXT:    retq
3287;
3288; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3289; CHECK-AVX2:       # %bb.0:
3290; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
3291; CHECK-AVX2-NEXT:    vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3292; CHECK-AVX2-NEXT:    vpsrlq $32, %xmm2, %xmm2
3293; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
3294; CHECK-AVX2-NEXT:    vpsrld $31, %xmm1, %xmm2
3295; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
3296; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3]
3297; CHECK-AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
3298; CHECK-AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
3299; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
3300; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
3301; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
3302; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
3303; CHECK-AVX2-NEXT:    retq
3304;
3305; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3306; CHECK-AVX512VL:       # %bb.0:
3307; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
3308; CHECK-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
3309; CHECK-AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
3310; CHECK-AVX512VL-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm1
3311; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
3312; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
3313; CHECK-AVX512VL-NEXT:    retq
3314  %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
3315  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
3316  %ret = zext <4 x i1> %cmp to <4 x i32>
3317  ret <4 x i32> %ret
3318}
3319