• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5
6define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
7; SSE-LABEL: fold_srem_vec_1:
8; SSE:       # %bb.0:
9; SSE-NEXT:    pextrw $3, %xmm0, %eax
10; SSE-NEXT:    movswl %ax, %ecx
11; SSE-NEXT:    imull $32081, %ecx, %ecx # imm = 0x7D51
12; SSE-NEXT:    shrl $16, %ecx
13; SSE-NEXT:    subl %eax, %ecx
14; SSE-NEXT:    movzwl %cx, %ecx
15; SSE-NEXT:    movswl %cx, %edx
16; SSE-NEXT:    shrl $15, %ecx
17; SSE-NEXT:    sarl $9, %edx
18; SSE-NEXT:    addl %ecx, %edx
19; SSE-NEXT:    imull $-1003, %edx, %ecx # imm = 0xFC15
20; SSE-NEXT:    subl %ecx, %eax
21; SSE-NEXT:    movd %xmm0, %ecx
22; SSE-NEXT:    movswl %cx, %edx
23; SSE-NEXT:    imull $-21385, %edx, %edx # imm = 0xAC77
24; SSE-NEXT:    shrl $16, %edx
25; SSE-NEXT:    addl %ecx, %edx
26; SSE-NEXT:    movzwl %dx, %edx
27; SSE-NEXT:    movswl %dx, %esi
28; SSE-NEXT:    shrl $15, %edx
29; SSE-NEXT:    sarl $6, %esi
30; SSE-NEXT:    addl %edx, %esi
31; SSE-NEXT:    imull $95, %esi, %edx
32; SSE-NEXT:    subl %edx, %ecx
33; SSE-NEXT:    movd %ecx, %xmm1
34; SSE-NEXT:    pextrw $1, %xmm0, %ecx
35; SSE-NEXT:    movswl %cx, %edx
36; SSE-NEXT:    imull $-16913, %edx, %edx # imm = 0xBDEF
37; SSE-NEXT:    movl %edx, %esi
38; SSE-NEXT:    shrl $31, %esi
39; SSE-NEXT:    sarl $21, %edx
40; SSE-NEXT:    addl %esi, %edx
41; SSE-NEXT:    imull $-124, %edx, %edx
42; SSE-NEXT:    subl %edx, %ecx
43; SSE-NEXT:    pinsrw $1, %ecx, %xmm1
44; SSE-NEXT:    pextrw $2, %xmm0, %ecx
45; SSE-NEXT:    movswl %cx, %edx
46; SSE-NEXT:    imull $2675, %edx, %edx # imm = 0xA73
47; SSE-NEXT:    movl %edx, %esi
48; SSE-NEXT:    shrl $31, %esi
49; SSE-NEXT:    sarl $18, %edx
50; SSE-NEXT:    addl %esi, %edx
51; SSE-NEXT:    imull $98, %edx, %edx
52; SSE-NEXT:    subl %edx, %ecx
53; SSE-NEXT:    pinsrw $2, %ecx, %xmm1
54; SSE-NEXT:    pinsrw $3, %eax, %xmm1
55; SSE-NEXT:    movdqa %xmm1, %xmm0
56; SSE-NEXT:    retq
57;
58; AVX-LABEL: fold_srem_vec_1:
59; AVX:       # %bb.0:
60; AVX-NEXT:    vpextrw $3, %xmm0, %eax
61; AVX-NEXT:    movswl %ax, %ecx
62; AVX-NEXT:    imull $32081, %ecx, %ecx # imm = 0x7D51
63; AVX-NEXT:    shrl $16, %ecx
64; AVX-NEXT:    subl %eax, %ecx
65; AVX-NEXT:    movzwl %cx, %ecx
66; AVX-NEXT:    movswl %cx, %edx
67; AVX-NEXT:    shrl $15, %ecx
68; AVX-NEXT:    sarl $9, %edx
69; AVX-NEXT:    addl %ecx, %edx
70; AVX-NEXT:    imull $-1003, %edx, %ecx # imm = 0xFC15
71; AVX-NEXT:    subl %ecx, %eax
72; AVX-NEXT:    vmovd %xmm0, %ecx
73; AVX-NEXT:    movswl %cx, %edx
74; AVX-NEXT:    imull $-21385, %edx, %edx # imm = 0xAC77
75; AVX-NEXT:    shrl $16, %edx
76; AVX-NEXT:    addl %ecx, %edx
77; AVX-NEXT:    movzwl %dx, %edx
78; AVX-NEXT:    movswl %dx, %esi
79; AVX-NEXT:    shrl $15, %edx
80; AVX-NEXT:    sarl $6, %esi
81; AVX-NEXT:    addl %edx, %esi
82; AVX-NEXT:    imull $95, %esi, %edx
83; AVX-NEXT:    subl %edx, %ecx
84; AVX-NEXT:    vmovd %ecx, %xmm1
85; AVX-NEXT:    vpextrw $1, %xmm0, %ecx
86; AVX-NEXT:    movswl %cx, %edx
87; AVX-NEXT:    imull $-16913, %edx, %edx # imm = 0xBDEF
88; AVX-NEXT:    movl %edx, %esi
89; AVX-NEXT:    shrl $31, %esi
90; AVX-NEXT:    sarl $21, %edx
91; AVX-NEXT:    addl %esi, %edx
92; AVX-NEXT:    imull $-124, %edx, %edx
93; AVX-NEXT:    subl %edx, %ecx
94; AVX-NEXT:    vpinsrw $1, %ecx, %xmm1, %xmm1
95; AVX-NEXT:    vpextrw $2, %xmm0, %ecx
96; AVX-NEXT:    movswl %cx, %edx
97; AVX-NEXT:    imull $2675, %edx, %edx # imm = 0xA73
98; AVX-NEXT:    movl %edx, %esi
99; AVX-NEXT:    shrl $31, %esi
100; AVX-NEXT:    sarl $18, %edx
101; AVX-NEXT:    addl %esi, %edx
102; AVX-NEXT:    imull $98, %edx, %edx
103; AVX-NEXT:    subl %edx, %ecx
104; AVX-NEXT:    vpinsrw $2, %ecx, %xmm1, %xmm0
105; AVX-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
106; AVX-NEXT:    retq
107  %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
108  ret <4 x i16> %1
109}
110
111define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
112; SSE-LABEL: fold_srem_vec_2:
113; SSE:       # %bb.0:
114; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151]
115; SSE-NEXT:    pmulhw %xmm0, %xmm1
116; SSE-NEXT:    paddw %xmm0, %xmm1
117; SSE-NEXT:    movdqa %xmm1, %xmm2
118; SSE-NEXT:    psrlw $15, %xmm2
119; SSE-NEXT:    psraw $6, %xmm1
120; SSE-NEXT:    paddw %xmm2, %xmm1
121; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm1
122; SSE-NEXT:    psubw %xmm1, %xmm0
123; SSE-NEXT:    retq
124;
125; AVX-LABEL: fold_srem_vec_2:
126; AVX:       # %bb.0:
127; AVX-NEXT:    vpmulhw {{.*}}(%rip), %xmm0, %xmm1
128; AVX-NEXT:    vpaddw %xmm0, %xmm1, %xmm1
129; AVX-NEXT:    vpsrlw $15, %xmm1, %xmm2
130; AVX-NEXT:    vpsraw $6, %xmm1, %xmm1
131; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
132; AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm1, %xmm1
133; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
134; AVX-NEXT:    retq
135  %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
136  ret <4 x i16> %1
137}
138
139
140; Don't fold if we can combine srem with sdiv.
141define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
142; SSE-LABEL: combine_srem_sdiv:
143; SSE:       # %bb.0:
144; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151]
145; SSE-NEXT:    pmulhw %xmm0, %xmm1
146; SSE-NEXT:    paddw %xmm0, %xmm1
147; SSE-NEXT:    movdqa %xmm1, %xmm2
148; SSE-NEXT:    psrlw $15, %xmm2
149; SSE-NEXT:    psraw $6, %xmm1
150; SSE-NEXT:    paddw %xmm2, %xmm1
151; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [95,95,95,95,95,95,95,95]
152; SSE-NEXT:    pmullw %xmm1, %xmm2
153; SSE-NEXT:    psubw %xmm2, %xmm0
154; SSE-NEXT:    paddw %xmm1, %xmm0
155; SSE-NEXT:    retq
156;
157; AVX-LABEL: combine_srem_sdiv:
158; AVX:       # %bb.0:
159; AVX-NEXT:    vpmulhw {{.*}}(%rip), %xmm0, %xmm1
160; AVX-NEXT:    vpaddw %xmm0, %xmm1, %xmm1
161; AVX-NEXT:    vpsrlw $15, %xmm1, %xmm2
162; AVX-NEXT:    vpsraw $6, %xmm1, %xmm1
163; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
164; AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm1, %xmm2
165; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
166; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
167; AVX-NEXT:    retq
168  %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
169  %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
170  %3 = add <4 x i16> %1, %2
171  ret <4 x i16> %3
172}
173
174; Don't fold for divisors that are a power of two.
175define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
176; SSE-LABEL: dont_fold_srem_power_of_two:
177; SSE:       # %bb.0:
178; SSE-NEXT:    pextrw $1, %xmm0, %eax
179; SSE-NEXT:    leal 31(%rax), %ecx
180; SSE-NEXT:    testw %ax, %ax
181; SSE-NEXT:    cmovnsl %eax, %ecx
182; SSE-NEXT:    andl $-32, %ecx
183; SSE-NEXT:    subl %ecx, %eax
184; SSE-NEXT:    movd %xmm0, %ecx
185; SSE-NEXT:    leal 63(%rcx), %edx
186; SSE-NEXT:    testw %cx, %cx
187; SSE-NEXT:    cmovnsl %ecx, %edx
188; SSE-NEXT:    andl $-64, %edx
189; SSE-NEXT:    subl %edx, %ecx
190; SSE-NEXT:    movd %ecx, %xmm1
191; SSE-NEXT:    pinsrw $1, %eax, %xmm1
192; SSE-NEXT:    pextrw $2, %xmm0, %eax
193; SSE-NEXT:    leal 7(%rax), %ecx
194; SSE-NEXT:    testw %ax, %ax
195; SSE-NEXT:    cmovnsl %eax, %ecx
196; SSE-NEXT:    andl $-8, %ecx
197; SSE-NEXT:    subl %ecx, %eax
198; SSE-NEXT:    pinsrw $2, %eax, %xmm1
199; SSE-NEXT:    pextrw $3, %xmm0, %eax
200; SSE-NEXT:    movswl %ax, %ecx
201; SSE-NEXT:    imull $-21385, %ecx, %ecx # imm = 0xAC77
202; SSE-NEXT:    shrl $16, %ecx
203; SSE-NEXT:    addl %eax, %ecx
204; SSE-NEXT:    movzwl %cx, %ecx
205; SSE-NEXT:    movswl %cx, %edx
206; SSE-NEXT:    shrl $15, %ecx
207; SSE-NEXT:    sarl $6, %edx
208; SSE-NEXT:    addl %ecx, %edx
209; SSE-NEXT:    imull $95, %edx, %ecx
210; SSE-NEXT:    subl %ecx, %eax
211; SSE-NEXT:    pinsrw $3, %eax, %xmm1
212; SSE-NEXT:    movdqa %xmm1, %xmm0
213; SSE-NEXT:    retq
214;
215; AVX-LABEL: dont_fold_srem_power_of_two:
216; AVX:       # %bb.0:
217; AVX-NEXT:    vpextrw $1, %xmm0, %eax
218; AVX-NEXT:    leal 31(%rax), %ecx
219; AVX-NEXT:    testw %ax, %ax
220; AVX-NEXT:    cmovnsl %eax, %ecx
221; AVX-NEXT:    andl $-32, %ecx
222; AVX-NEXT:    subl %ecx, %eax
223; AVX-NEXT:    vmovd %xmm0, %ecx
224; AVX-NEXT:    leal 63(%rcx), %edx
225; AVX-NEXT:    testw %cx, %cx
226; AVX-NEXT:    cmovnsl %ecx, %edx
227; AVX-NEXT:    andl $-64, %edx
228; AVX-NEXT:    subl %edx, %ecx
229; AVX-NEXT:    vmovd %ecx, %xmm1
230; AVX-NEXT:    vpinsrw $1, %eax, %xmm1, %xmm1
231; AVX-NEXT:    vpextrw $2, %xmm0, %eax
232; AVX-NEXT:    leal 7(%rax), %ecx
233; AVX-NEXT:    testw %ax, %ax
234; AVX-NEXT:    cmovnsl %eax, %ecx
235; AVX-NEXT:    andl $-8, %ecx
236; AVX-NEXT:    subl %ecx, %eax
237; AVX-NEXT:    vpinsrw $2, %eax, %xmm1, %xmm1
238; AVX-NEXT:    vpextrw $3, %xmm0, %eax
239; AVX-NEXT:    movswl %ax, %ecx
240; AVX-NEXT:    imull $-21385, %ecx, %ecx # imm = 0xAC77
241; AVX-NEXT:    shrl $16, %ecx
242; AVX-NEXT:    addl %eax, %ecx
243; AVX-NEXT:    movzwl %cx, %ecx
244; AVX-NEXT:    movswl %cx, %edx
245; AVX-NEXT:    shrl $15, %ecx
246; AVX-NEXT:    sarl $6, %edx
247; AVX-NEXT:    addl %ecx, %edx
248; AVX-NEXT:    imull $95, %edx, %ecx
249; AVX-NEXT:    subl %ecx, %eax
250; AVX-NEXT:    vpinsrw $3, %eax, %xmm1, %xmm0
251; AVX-NEXT:    retq
252  %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
253  ret <4 x i16> %1
254}
255
256; Don't fold if the divisor is one.
257define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
258; SSE-LABEL: dont_fold_srem_one:
259; SSE:       # %bb.0:
260; SSE-NEXT:    pextrw $2, %xmm0, %eax
261; SSE-NEXT:    movswl %ax, %ecx
262; SSE-NEXT:    imull $-19945, %ecx, %ecx # imm = 0xB217
263; SSE-NEXT:    shrl $16, %ecx
264; SSE-NEXT:    addl %eax, %ecx
265; SSE-NEXT:    movzwl %cx, %ecx
266; SSE-NEXT:    movswl %cx, %edx
267; SSE-NEXT:    shrl $15, %ecx
268; SSE-NEXT:    sarl $4, %edx
269; SSE-NEXT:    addl %ecx, %edx
270; SSE-NEXT:    leal (%rdx,%rdx,2), %ecx
271; SSE-NEXT:    shll $3, %ecx
272; SSE-NEXT:    subl %ecx, %edx
273; SSE-NEXT:    addl %eax, %edx
274; SSE-NEXT:    pextrw $1, %xmm0, %eax
275; SSE-NEXT:    movswl %ax, %ecx
276; SSE-NEXT:    imull $12827, %ecx, %ecx # imm = 0x321B
277; SSE-NEXT:    movl %ecx, %esi
278; SSE-NEXT:    shrl $31, %esi
279; SSE-NEXT:    sarl $23, %ecx
280; SSE-NEXT:    addl %esi, %ecx
281; SSE-NEXT:    imull $654, %ecx, %ecx # imm = 0x28E
282; SSE-NEXT:    subl %ecx, %eax
283; SSE-NEXT:    pxor %xmm1, %xmm1
284; SSE-NEXT:    pinsrw $1, %eax, %xmm1
285; SSE-NEXT:    pinsrw $2, %edx, %xmm1
286; SSE-NEXT:    pextrw $3, %xmm0, %eax
287; SSE-NEXT:    movswl %ax, %ecx
288; SSE-NEXT:    imull $12375, %ecx, %ecx # imm = 0x3057
289; SSE-NEXT:    movl %ecx, %edx
290; SSE-NEXT:    shrl $31, %edx
291; SSE-NEXT:    sarl $26, %ecx
292; SSE-NEXT:    addl %edx, %ecx
293; SSE-NEXT:    imull $5423, %ecx, %ecx # imm = 0x152F
294; SSE-NEXT:    subl %ecx, %eax
295; SSE-NEXT:    pinsrw $3, %eax, %xmm1
296; SSE-NEXT:    movdqa %xmm1, %xmm0
297; SSE-NEXT:    retq
298;
299; AVX-LABEL: dont_fold_srem_one:
300; AVX:       # %bb.0:
301; AVX-NEXT:    vpextrw $2, %xmm0, %eax
302; AVX-NEXT:    movswl %ax, %ecx
303; AVX-NEXT:    imull $-19945, %ecx, %ecx # imm = 0xB217
304; AVX-NEXT:    shrl $16, %ecx
305; AVX-NEXT:    addl %eax, %ecx
306; AVX-NEXT:    movzwl %cx, %ecx
307; AVX-NEXT:    movswl %cx, %edx
308; AVX-NEXT:    shrl $15, %ecx
309; AVX-NEXT:    sarl $4, %edx
310; AVX-NEXT:    addl %ecx, %edx
311; AVX-NEXT:    leal (%rdx,%rdx,2), %ecx
312; AVX-NEXT:    shll $3, %ecx
313; AVX-NEXT:    subl %ecx, %edx
314; AVX-NEXT:    addl %eax, %edx
315; AVX-NEXT:    vpextrw $1, %xmm0, %eax
316; AVX-NEXT:    movswl %ax, %ecx
317; AVX-NEXT:    imull $12827, %ecx, %ecx # imm = 0x321B
318; AVX-NEXT:    movl %ecx, %esi
319; AVX-NEXT:    shrl $31, %esi
320; AVX-NEXT:    sarl $23, %ecx
321; AVX-NEXT:    addl %esi, %ecx
322; AVX-NEXT:    imull $654, %ecx, %ecx # imm = 0x28E
323; AVX-NEXT:    subl %ecx, %eax
324; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
325; AVX-NEXT:    vpinsrw $1, %eax, %xmm1, %xmm1
326; AVX-NEXT:    vpinsrw $2, %edx, %xmm1, %xmm1
327; AVX-NEXT:    vpextrw $3, %xmm0, %eax
328; AVX-NEXT:    movswl %ax, %ecx
329; AVX-NEXT:    imull $12375, %ecx, %ecx # imm = 0x3057
330; AVX-NEXT:    movl %ecx, %edx
331; AVX-NEXT:    shrl $31, %edx
332; AVX-NEXT:    sarl $26, %ecx
333; AVX-NEXT:    addl %edx, %ecx
334; AVX-NEXT:    imull $5423, %ecx, %ecx # imm = 0x152F
335; AVX-NEXT:    subl %ecx, %eax
336; AVX-NEXT:    vpinsrw $3, %eax, %xmm1, %xmm0
337; AVX-NEXT:    retq
338  %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
339  ret <4 x i16> %1
340}
341
342; Don't fold if the divisor is 2^15.
343define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
344; SSE-LABEL: dont_fold_urem_i16_smax:
345; SSE:       # %bb.0:
346; SSE-NEXT:    pextrw $2, %xmm0, %eax
347; SSE-NEXT:    movswl %ax, %ecx
348; SSE-NEXT:    imull $-19945, %ecx, %ecx # imm = 0xB217
349; SSE-NEXT:    shrl $16, %ecx
350; SSE-NEXT:    addl %eax, %ecx
351; SSE-NEXT:    movzwl %cx, %ecx
352; SSE-NEXT:    movswl %cx, %edx
353; SSE-NEXT:    shrl $15, %ecx
354; SSE-NEXT:    sarl $4, %edx
355; SSE-NEXT:    addl %ecx, %edx
356; SSE-NEXT:    leal (%rdx,%rdx,2), %ecx
357; SSE-NEXT:    shll $3, %ecx
358; SSE-NEXT:    subl %ecx, %edx
359; SSE-NEXT:    addl %eax, %edx
360; SSE-NEXT:    pextrw $1, %xmm0, %eax
361; SSE-NEXT:    leal 32767(%rax), %ecx
362; SSE-NEXT:    testw %ax, %ax
363; SSE-NEXT:    cmovnsl %eax, %ecx
364; SSE-NEXT:    andl $-32768, %ecx # imm = 0x8000
365; SSE-NEXT:    addl %eax, %ecx
366; SSE-NEXT:    pxor %xmm1, %xmm1
367; SSE-NEXT:    pinsrw $1, %ecx, %xmm1
368; SSE-NEXT:    pinsrw $2, %edx, %xmm1
369; SSE-NEXT:    pextrw $3, %xmm0, %eax
370; SSE-NEXT:    movswl %ax, %ecx
371; SSE-NEXT:    imull $12375, %ecx, %ecx # imm = 0x3057
372; SSE-NEXT:    movl %ecx, %edx
373; SSE-NEXT:    shrl $31, %edx
374; SSE-NEXT:    sarl $26, %ecx
375; SSE-NEXT:    addl %edx, %ecx
376; SSE-NEXT:    imull $5423, %ecx, %ecx # imm = 0x152F
377; SSE-NEXT:    subl %ecx, %eax
378; SSE-NEXT:    pinsrw $3, %eax, %xmm1
379; SSE-NEXT:    movdqa %xmm1, %xmm0
380; SSE-NEXT:    retq
381;
382; AVX-LABEL: dont_fold_urem_i16_smax:
383; AVX:       # %bb.0:
384; AVX-NEXT:    vpextrw $2, %xmm0, %eax
385; AVX-NEXT:    movswl %ax, %ecx
386; AVX-NEXT:    imull $-19945, %ecx, %ecx # imm = 0xB217
387; AVX-NEXT:    shrl $16, %ecx
388; AVX-NEXT:    addl %eax, %ecx
389; AVX-NEXT:    movzwl %cx, %ecx
390; AVX-NEXT:    movswl %cx, %edx
391; AVX-NEXT:    shrl $15, %ecx
392; AVX-NEXT:    sarl $4, %edx
393; AVX-NEXT:    addl %ecx, %edx
394; AVX-NEXT:    leal (%rdx,%rdx,2), %ecx
395; AVX-NEXT:    shll $3, %ecx
396; AVX-NEXT:    subl %ecx, %edx
397; AVX-NEXT:    addl %eax, %edx
398; AVX-NEXT:    vpextrw $1, %xmm0, %eax
399; AVX-NEXT:    leal 32767(%rax), %ecx
400; AVX-NEXT:    testw %ax, %ax
401; AVX-NEXT:    cmovnsl %eax, %ecx
402; AVX-NEXT:    andl $-32768, %ecx # imm = 0x8000
403; AVX-NEXT:    addl %eax, %ecx
404; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
405; AVX-NEXT:    vpinsrw $1, %ecx, %xmm1, %xmm1
406; AVX-NEXT:    vpinsrw $2, %edx, %xmm1, %xmm1
407; AVX-NEXT:    vpextrw $3, %xmm0, %eax
408; AVX-NEXT:    movswl %ax, %ecx
409; AVX-NEXT:    imull $12375, %ecx, %ecx # imm = 0x3057
410; AVX-NEXT:    movl %ecx, %edx
411; AVX-NEXT:    shrl $31, %edx
412; AVX-NEXT:    sarl $26, %ecx
413; AVX-NEXT:    addl %edx, %ecx
414; AVX-NEXT:    imull $5423, %ecx, %ecx # imm = 0x152F
415; AVX-NEXT:    subl %ecx, %eax
416; AVX-NEXT:    vpinsrw $3, %eax, %xmm1, %xmm0
417; AVX-NEXT:    retq
418  %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
419  ret <4 x i16> %1
420}
421
422; Don't fold i64 srem.
423define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
424; SSE-LABEL: dont_fold_srem_i64:
425; SSE:       # %bb.0:
426; SSE-NEXT:    movdqa %xmm1, %xmm2
427; SSE-NEXT:    movq %xmm1, %rcx
428; SSE-NEXT:    movabsq $-5614226457215950491, %rdx # imm = 0xB21642C8590B2165
429; SSE-NEXT:    movq %rcx, %rax
430; SSE-NEXT:    imulq %rdx
431; SSE-NEXT:    addq %rcx, %rdx
432; SSE-NEXT:    movq %rdx, %rax
433; SSE-NEXT:    shrq $63, %rax
434; SSE-NEXT:    sarq $4, %rdx
435; SSE-NEXT:    addq %rax, %rdx
436; SSE-NEXT:    leaq (%rdx,%rdx,2), %rax
437; SSE-NEXT:    shlq $3, %rax
438; SSE-NEXT:    subq %rax, %rdx
439; SSE-NEXT:    addq %rcx, %rdx
440; SSE-NEXT:    movq %rdx, %xmm1
441; SSE-NEXT:    pextrq $1, %xmm2, %rcx
442; SSE-NEXT:    movabsq $6966426675817289639, %rdx # imm = 0x60ADB826E5E517A7
443; SSE-NEXT:    movq %rcx, %rax
444; SSE-NEXT:    imulq %rdx
445; SSE-NEXT:    movq %rdx, %rax
446; SSE-NEXT:    shrq $63, %rax
447; SSE-NEXT:    sarq $11, %rdx
448; SSE-NEXT:    addq %rax, %rdx
449; SSE-NEXT:    imulq $5423, %rdx, %rax # imm = 0x152F
450; SSE-NEXT:    subq %rax, %rcx
451; SSE-NEXT:    movq %rcx, %xmm2
452; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
453; SSE-NEXT:    pextrq $1, %xmm0, %rcx
454; SSE-NEXT:    movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5
455; SSE-NEXT:    movq %rcx, %rax
456; SSE-NEXT:    imulq %rdx
457; SSE-NEXT:    movq %rdx, %rax
458; SSE-NEXT:    shrq $63, %rax
459; SSE-NEXT:    sarq $8, %rdx
460; SSE-NEXT:    addq %rax, %rdx
461; SSE-NEXT:    imulq $654, %rdx, %rax # imm = 0x28E
462; SSE-NEXT:    subq %rax, %rcx
463; SSE-NEXT:    movq %rcx, %xmm0
464; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
465; SSE-NEXT:    retq
466;
467; AVX1-LABEL: dont_fold_srem_i64:
468; AVX1:       # %bb.0:
469; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
470; AVX1-NEXT:    vmovq %xmm1, %rcx
471; AVX1-NEXT:    movabsq $-5614226457215950491, %rdx # imm = 0xB21642C8590B2165
472; AVX1-NEXT:    movq %rcx, %rax
473; AVX1-NEXT:    imulq %rdx
474; AVX1-NEXT:    addq %rcx, %rdx
475; AVX1-NEXT:    movq %rdx, %rax
476; AVX1-NEXT:    shrq $63, %rax
477; AVX1-NEXT:    sarq $4, %rdx
478; AVX1-NEXT:    addq %rax, %rdx
479; AVX1-NEXT:    leaq (%rdx,%rdx,2), %rax
480; AVX1-NEXT:    shlq $3, %rax
481; AVX1-NEXT:    subq %rax, %rdx
482; AVX1-NEXT:    addq %rcx, %rdx
483; AVX1-NEXT:    vmovq %rdx, %xmm2
484; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
485; AVX1-NEXT:    movabsq $6966426675817289639, %rdx # imm = 0x60ADB826E5E517A7
486; AVX1-NEXT:    movq %rcx, %rax
487; AVX1-NEXT:    imulq %rdx
488; AVX1-NEXT:    movq %rdx, %rax
489; AVX1-NEXT:    shrq $63, %rax
490; AVX1-NEXT:    sarq $11, %rdx
491; AVX1-NEXT:    addq %rax, %rdx
492; AVX1-NEXT:    imulq $5423, %rdx, %rax # imm = 0x152F
493; AVX1-NEXT:    subq %rax, %rcx
494; AVX1-NEXT:    vmovq %rcx, %xmm1
495; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
496; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
497; AVX1-NEXT:    movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5
498; AVX1-NEXT:    movq %rcx, %rax
499; AVX1-NEXT:    imulq %rdx
500; AVX1-NEXT:    movq %rdx, %rax
501; AVX1-NEXT:    shrq $63, %rax
502; AVX1-NEXT:    sarq $8, %rdx
503; AVX1-NEXT:    addq %rax, %rdx
504; AVX1-NEXT:    imulq $654, %rdx, %rax # imm = 0x28E
505; AVX1-NEXT:    subq %rax, %rcx
506; AVX1-NEXT:    vmovq %rcx, %xmm0
507; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
508; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
509; AVX1-NEXT:    retq
510;
511; AVX2-LABEL: dont_fold_srem_i64:
512; AVX2:       # %bb.0:
513; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
514; AVX2-NEXT:    vmovq %xmm1, %rcx
515; AVX2-NEXT:    movabsq $-5614226457215950491, %rdx # imm = 0xB21642C8590B2165
516; AVX2-NEXT:    movq %rcx, %rax
517; AVX2-NEXT:    imulq %rdx
518; AVX2-NEXT:    addq %rcx, %rdx
519; AVX2-NEXT:    movq %rdx, %rax
520; AVX2-NEXT:    shrq $63, %rax
521; AVX2-NEXT:    sarq $4, %rdx
522; AVX2-NEXT:    addq %rax, %rdx
523; AVX2-NEXT:    leaq (%rdx,%rdx,2), %rax
524; AVX2-NEXT:    shlq $3, %rax
525; AVX2-NEXT:    subq %rax, %rdx
526; AVX2-NEXT:    addq %rcx, %rdx
527; AVX2-NEXT:    vmovq %rdx, %xmm2
528; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
529; AVX2-NEXT:    movabsq $6966426675817289639, %rdx # imm = 0x60ADB826E5E517A7
530; AVX2-NEXT:    movq %rcx, %rax
531; AVX2-NEXT:    imulq %rdx
532; AVX2-NEXT:    movq %rdx, %rax
533; AVX2-NEXT:    shrq $63, %rax
534; AVX2-NEXT:    sarq $11, %rdx
535; AVX2-NEXT:    addq %rax, %rdx
536; AVX2-NEXT:    imulq $5423, %rdx, %rax # imm = 0x152F
537; AVX2-NEXT:    subq %rax, %rcx
538; AVX2-NEXT:    vmovq %rcx, %xmm1
539; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
540; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
541; AVX2-NEXT:    movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5
542; AVX2-NEXT:    movq %rcx, %rax
543; AVX2-NEXT:    imulq %rdx
544; AVX2-NEXT:    movq %rdx, %rax
545; AVX2-NEXT:    shrq $63, %rax
546; AVX2-NEXT:    sarq $8, %rdx
547; AVX2-NEXT:    addq %rax, %rdx
548; AVX2-NEXT:    imulq $654, %rdx, %rax # imm = 0x28E
549; AVX2-NEXT:    subq %rax, %rcx
550; AVX2-NEXT:    vmovq %rcx, %xmm0
551; AVX2-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
552; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
553; AVX2-NEXT:    retq
554  %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
555  ret <4 x i64> %1
556}
557