• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
5
6; fold (srem x, 1) -> 0
7define i32 @combine_srem_by_one(i32 %x) {
8; CHECK-LABEL: combine_srem_by_one:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    xorl %eax, %eax
11; CHECK-NEXT:    retq
12  %1 = srem i32 %x, 1
13  ret i32 %1
14}
15
16define <4 x i32> @combine_vec_srem_by_one(<4 x i32> %x) {
17; SSE-LABEL: combine_vec_srem_by_one:
18; SSE:       # %bb.0:
19; SSE-NEXT:    xorps %xmm0, %xmm0
20; SSE-NEXT:    retq
21;
22; AVX-LABEL: combine_vec_srem_by_one:
23; AVX:       # %bb.0:
24; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
25; AVX-NEXT:    retq
26  %1 = srem <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
27  ret <4 x i32> %1
28}
29
30; fold (srem x, -1) -> 0
31define i32 @combine_srem_by_negone(i32 %x) {
32; CHECK-LABEL: combine_srem_by_negone:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    xorl %eax, %eax
35; CHECK-NEXT:    retq
36  %1 = srem i32 %x, -1
37  ret i32 %1
38}
39
40define <4 x i32> @combine_vec_srem_by_negone(<4 x i32> %x) {
41; SSE-LABEL: combine_vec_srem_by_negone:
42; SSE:       # %bb.0:
43; SSE-NEXT:    xorps %xmm0, %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: combine_vec_srem_by_negone:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
49; AVX-NEXT:    retq
50  %1 = srem <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
51  ret <4 x i32> %1
52}
53
54; TODO fold (srem x, INT_MIN)
55define i32 @combine_srem_by_minsigned(i32 %x) {
56; CHECK-LABEL: combine_srem_by_minsigned:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
59; CHECK-NEXT:    movl %edi, %eax
60; CHECK-NEXT:    sarl $31, %eax
61; CHECK-NEXT:    shrl %eax
62; CHECK-NEXT:    addl %edi, %eax
63; CHECK-NEXT:    andl $-2147483648, %eax # imm = 0x80000000
64; CHECK-NEXT:    leal (%rax,%rdi), %eax
65; CHECK-NEXT:    retq
66  %1 = srem i32 %x, -2147483648
67  ret i32 %1
68}
69
70define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
71; SSE-LABEL: combine_vec_srem_by_minsigned:
72; SSE:       # %bb.0:
73; SSE-NEXT:    movdqa %xmm0, %xmm1
74; SSE-NEXT:    psrad $31, %xmm1
75; SSE-NEXT:    psrld $1, %xmm1
76; SSE-NEXT:    paddd %xmm0, %xmm1
77; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
78; SSE-NEXT:    psubd %xmm1, %xmm0
79; SSE-NEXT:    retq
80;
81; AVX1-LABEL: combine_vec_srem_by_minsigned:
82; AVX1:       # %bb.0:
83; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
84; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
85; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
86; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
87; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
88; AVX1-NEXT:    retq
89;
90; AVX2-LABEL: combine_vec_srem_by_minsigned:
91; AVX2:       # %bb.0:
92; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
93; AVX2-NEXT:    vpsrld $1, %xmm1, %xmm1
94; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
95; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
96; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
97; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
98; AVX2-NEXT:    retq
99  %1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
100  ret <4 x i32> %1
101}
102
103; TODO fold (srem x, x) -> 0
104define i32 @combine_srem_dupe(i32 %x) {
105; CHECK-LABEL: combine_srem_dupe:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    movl %edi, %eax
108; CHECK-NEXT:    cltd
109; CHECK-NEXT:    idivl %edi
110; CHECK-NEXT:    movl %edx, %eax
111; CHECK-NEXT:    retq
112  %1 = srem i32 %x, %x
113  ret i32 %1
114}
115
116define <4 x i32> @combine_vec_srem_dupe(<4 x i32> %x) {
117; SSE-LABEL: combine_vec_srem_dupe:
118; SSE:       # %bb.0:
119; SSE-NEXT:    pextrd $1, %xmm0, %ecx
120; SSE-NEXT:    movl %ecx, %eax
121; SSE-NEXT:    cltd
122; SSE-NEXT:    idivl %ecx
123; SSE-NEXT:    movl %edx, %ecx
124; SSE-NEXT:    movd %xmm0, %esi
125; SSE-NEXT:    movl %esi, %eax
126; SSE-NEXT:    cltd
127; SSE-NEXT:    idivl %esi
128; SSE-NEXT:    movd %edx, %xmm1
129; SSE-NEXT:    pinsrd $1, %ecx, %xmm1
130; SSE-NEXT:    pextrd $2, %xmm0, %ecx
131; SSE-NEXT:    movl %ecx, %eax
132; SSE-NEXT:    cltd
133; SSE-NEXT:    idivl %ecx
134; SSE-NEXT:    pinsrd $2, %edx, %xmm1
135; SSE-NEXT:    pextrd $3, %xmm0, %ecx
136; SSE-NEXT:    movl %ecx, %eax
137; SSE-NEXT:    cltd
138; SSE-NEXT:    idivl %ecx
139; SSE-NEXT:    pinsrd $3, %edx, %xmm1
140; SSE-NEXT:    movdqa %xmm1, %xmm0
141; SSE-NEXT:    retq
142;
143; AVX-LABEL: combine_vec_srem_dupe:
144; AVX:       # %bb.0:
145; AVX-NEXT:    vpextrd $1, %xmm0, %ecx
146; AVX-NEXT:    movl %ecx, %eax
147; AVX-NEXT:    cltd
148; AVX-NEXT:    idivl %ecx
149; AVX-NEXT:    movl %edx, %ecx
150; AVX-NEXT:    vmovd %xmm0, %esi
151; AVX-NEXT:    movl %esi, %eax
152; AVX-NEXT:    cltd
153; AVX-NEXT:    idivl %esi
154; AVX-NEXT:    vmovd %edx, %xmm1
155; AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
156; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
157; AVX-NEXT:    movl %ecx, %eax
158; AVX-NEXT:    cltd
159; AVX-NEXT:    idivl %ecx
160; AVX-NEXT:    vpinsrd $2, %edx, %xmm1, %xmm1
161; AVX-NEXT:    vpextrd $3, %xmm0, %ecx
162; AVX-NEXT:    movl %ecx, %eax
163; AVX-NEXT:    cltd
164; AVX-NEXT:    idivl %ecx
165; AVX-NEXT:    vpinsrd $3, %edx, %xmm1, %xmm0
166; AVX-NEXT:    retq
167  %1 = srem <4 x i32> %x, %x
168  ret <4 x i32> %1
169}
170
171; fold (srem x, y) -> (urem x, y) iff x and y are positive
172define <4 x i32> @combine_vec_srem_by_pos0(<4 x i32> %x) {
173; SSE-LABEL: combine_vec_srem_by_pos0:
174; SSE:       # %bb.0:
175; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
176; SSE-NEXT:    retq
177;
178; AVX1-LABEL: combine_vec_srem_by_pos0:
179; AVX1:       # %bb.0:
180; AVX1-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
181; AVX1-NEXT:    retq
182;
183; AVX2-LABEL: combine_vec_srem_by_pos0:
184; AVX2:       # %bb.0:
185; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [3,3,3,3]
186; AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
187; AVX2-NEXT:    retq
188  %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
189  %2 = srem <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
190  ret <4 x i32> %2
191}
192
193define <4 x i32> @combine_vec_srem_by_pos1(<4 x i32> %x) {
194; SSE-LABEL: combine_vec_srem_by_pos1:
195; SSE:       # %bb.0:
196; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
197; SSE-NEXT:    retq
198;
199; AVX-LABEL: combine_vec_srem_by_pos1:
200; AVX:       # %bb.0:
201; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
202; AVX-NEXT:    retq
203  %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
204  %2 = srem <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16>
205  ret <4 x i32> %2
206}
207
208; fold (srem x, (1 << c)) -> x - (x / (1 << c)) * (1 << c).
209define <4 x i32> @combine_vec_srem_by_pow2a(<4 x i32> %x) {
210; SSE-LABEL: combine_vec_srem_by_pow2a:
211; SSE:       # %bb.0:
212; SSE-NEXT:    movdqa %xmm0, %xmm1
213; SSE-NEXT:    psrad $31, %xmm1
214; SSE-NEXT:    psrld $30, %xmm1
215; SSE-NEXT:    paddd %xmm0, %xmm1
216; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
217; SSE-NEXT:    psubd %xmm1, %xmm0
218; SSE-NEXT:    retq
219;
220; AVX1-LABEL: combine_vec_srem_by_pow2a:
221; AVX1:       # %bb.0:
222; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
223; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm1
224; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
225; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
226; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
227; AVX1-NEXT:    retq
228;
229; AVX2-LABEL: combine_vec_srem_by_pow2a:
230; AVX2:       # %bb.0:
231; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
232; AVX2-NEXT:    vpsrld $30, %xmm1, %xmm1
233; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
234; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292]
235; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
236; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
237; AVX2-NEXT:    retq
238  %1 = srem <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
239  ret <4 x i32> %1
240}
241
242define <4 x i32> @combine_vec_srem_by_pow2a_neg(<4 x i32> %x) {
243; SSE-LABEL: combine_vec_srem_by_pow2a_neg:
244; SSE:       # %bb.0:
245; SSE-NEXT:    movdqa %xmm0, %xmm1
246; SSE-NEXT:    psrad $31, %xmm1
247; SSE-NEXT:    psrld $30, %xmm1
248; SSE-NEXT:    paddd %xmm0, %xmm1
249; SSE-NEXT:    psrad $2, %xmm1
250; SSE-NEXT:    pxor %xmm2, %xmm2
251; SSE-NEXT:    psubd %xmm1, %xmm2
252; SSE-NEXT:    pslld $2, %xmm2
253; SSE-NEXT:    paddd %xmm2, %xmm0
254; SSE-NEXT:    retq
255;
256; AVX-LABEL: combine_vec_srem_by_pow2a_neg:
257; AVX:       # %bb.0:
258; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
259; AVX-NEXT:    vpsrld $30, %xmm1, %xmm1
260; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
261; AVX-NEXT:    vpsrad $2, %xmm1, %xmm1
262; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
263; AVX-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
264; AVX-NEXT:    vpslld $2, %xmm1, %xmm1
265; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
266; AVX-NEXT:    retq
267  %1 = srem <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
268  ret <4 x i32> %1
269}
270
271define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
272; SSE-LABEL: combine_vec_srem_by_pow2b:
273; SSE:       # %bb.0:
274; SSE-NEXT:    movdqa %xmm0, %xmm1
275; SSE-NEXT:    psrld $31, %xmm1
276; SSE-NEXT:    movdqa %xmm0, %xmm2
277; SSE-NEXT:    psrad $31, %xmm2
278; SSE-NEXT:    movdqa %xmm2, %xmm3
279; SSE-NEXT:    psrld $29, %xmm3
280; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
281; SSE-NEXT:    psrld $30, %xmm2
282; SSE-NEXT:    pxor %xmm1, %xmm1
283; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
284; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
285; SSE-NEXT:    paddd %xmm0, %xmm1
286; SSE-NEXT:    movdqa %xmm1, %xmm2
287; SSE-NEXT:    movdqa %xmm1, %xmm3
288; SSE-NEXT:    psrad $2, %xmm3
289; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
290; SSE-NEXT:    psrad $3, %xmm1
291; SSE-NEXT:    psrad $1, %xmm2
292; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
293; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
294; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2,3,4,5,6,7]
295; SSE-NEXT:    pmulld {{.*}}(%rip), %xmm3
296; SSE-NEXT:    psubd %xmm3, %xmm0
297; SSE-NEXT:    retq
298;
299; AVX1-LABEL: combine_vec_srem_by_pow2b:
300; AVX1:       # %bb.0:
301; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
302; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
303; AVX1-NEXT:    vpsrld $29, %xmm2, %xmm3
304; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
305; AVX1-NEXT:    vpsrld $30, %xmm2, %xmm2
306; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
307; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
308; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
309; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
310; AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
311; AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
312; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
313; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm3
314; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
315; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
316; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
317; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
318; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
319; AVX1-NEXT:    retq
320;
321; AVX2-LABEL: combine_vec_srem_by_pow2b:
322; AVX2:       # %bb.0:
323; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
324; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
325; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
326; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3]
327; AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
328; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
329; AVX2-NEXT:    vpsllvd %xmm2, %xmm1, %xmm1
330; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
331; AVX2-NEXT:    retq
332  %1 = srem <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8>
333  ret <4 x i32> %1
334}
335
336define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
337; SSE-LABEL: combine_vec_srem_by_pow2b_neg:
338; SSE:       # %bb.0:
339; SSE-NEXT:    movdqa %xmm0, %xmm1
340; SSE-NEXT:    psrad $31, %xmm1
341; SSE-NEXT:    movdqa %xmm1, %xmm2
342; SSE-NEXT:    psrld $28, %xmm2
343; SSE-NEXT:    movdqa %xmm1, %xmm3
344; SSE-NEXT:    psrld $30, %xmm3
345; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
346; SSE-NEXT:    movdqa %xmm0, %xmm2
347; SSE-NEXT:    psrld $31, %xmm2
348; SSE-NEXT:    psrld $29, %xmm1
349; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
350; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
351; SSE-NEXT:    paddd %xmm0, %xmm1
352; SSE-NEXT:    movdqa %xmm1, %xmm2
353; SSE-NEXT:    psrad $4, %xmm2
354; SSE-NEXT:    movdqa %xmm1, %xmm3
355; SSE-NEXT:    psrad $2, %xmm3
356; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
357; SSE-NEXT:    movdqa %xmm1, %xmm2
358; SSE-NEXT:    psrad $3, %xmm2
359; SSE-NEXT:    psrad $1, %xmm1
360; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
361; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
362; SSE-NEXT:    pmulld {{.*}}(%rip), %xmm1
363; SSE-NEXT:    paddd %xmm0, %xmm1
364; SSE-NEXT:    movdqa %xmm1, %xmm0
365; SSE-NEXT:    retq
366;
367; AVX1-LABEL: combine_vec_srem_by_pow2b_neg:
368; AVX1:       # %bb.0:
369; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
370; AVX1-NEXT:    vpsrld $28, %xmm1, %xmm2
371; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm3
372; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
373; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm3
374; AVX1-NEXT:    vpsrld $29, %xmm1, %xmm1
375; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
376; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
377; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
378; AVX1-NEXT:    vpsrad $4, %xmm1, %xmm2
379; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm3
380; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
381; AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
382; AVX1-NEXT:    vpsrad $1, %xmm1, %xmm1
383; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
384; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
385; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
386; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
387; AVX1-NEXT:    retq
388;
389; AVX2-LABEL: combine_vec_srem_by_pow2b_neg:
390; AVX2:       # %bb.0:
391; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
392; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
393; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
394; AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
395; AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
396; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
397; AVX2-NEXT:    retq
398  %1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16>
399  ret <4 x i32> %1
400}
401
402; OSS-Fuzz #6883
403; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=6883
404define i32 @ossfuzz6883() {
405; CHECK-LABEL: ossfuzz6883:
406; CHECK:       # %bb.0:
407; CHECK-NEXT:    movl (%rax), %ecx
408; CHECK-NEXT:    movl %ecx, %eax
409; CHECK-NEXT:    cltd
410; CHECK-NEXT:    idivl %ecx
411; CHECK-NEXT:    movl %edx, %esi
412; CHECK-NEXT:    movl $1, %edi
413; CHECK-NEXT:    cltd
414; CHECK-NEXT:    idivl %edi
415; CHECK-NEXT:    movl %edx, %edi
416; CHECK-NEXT:    xorl %edx, %edx
417; CHECK-NEXT:    movl %ecx, %eax
418; CHECK-NEXT:    divl %edi
419; CHECK-NEXT:    andl %esi, %eax
420; CHECK-NEXT:    retq
421  %B17 = or i32 0, 2147483647
422  %L6 = load i32, i32* undef
423  %B11 = sdiv i32 %L6, %L6
424  %B13 = udiv i32 %B17, %B17
425  %B14 = srem i32 %B11, %B13
426  %B16 = srem i32 %L6, %L6
427  %B10 = udiv i32 %L6, %B14
428  %B6 = and i32 %B16, %B10
429  ret i32 %B6
430}
431