• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=SSE41
3; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s --check-prefix=SSE
4; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX
5
6target triple = "x86_64-unknown-unknown"
7
8define <4 x i32> @test1(<4 x i32> %a) #0 {
9; SSE41-LABEL: test1:
10; SSE41:       # BB#0:
11; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
12; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
13; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
14; SSE41-NEXT:    pmuludq %xmm2, %xmm3
15; SSE41-NEXT:    pmuludq %xmm0, %xmm1
16; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
17; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
18; SSE41-NEXT:    psubd %xmm1, %xmm0
19; SSE41-NEXT:    psrld $1, %xmm0
20; SSE41-NEXT:    paddd %xmm1, %xmm0
21; SSE41-NEXT:    psrld $2, %xmm0
22; SSE41-NEXT:    retq
23;
24; SSE-LABEL: test1:
25; SSE:       # BB#0:
26; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
27; SSE-NEXT:    movdqa %xmm0, %xmm2
28; SSE-NEXT:    pmuludq %xmm1, %xmm2
29; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
30; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
31; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
32; SSE-NEXT:    pmuludq %xmm1, %xmm3
33; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
34; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
35; SSE-NEXT:    psubd %xmm2, %xmm0
36; SSE-NEXT:    psrld $1, %xmm0
37; SSE-NEXT:    paddd %xmm2, %xmm0
38; SSE-NEXT:    psrld $2, %xmm0
39; SSE-NEXT:    retq
40;
41; AVX-LABEL: test1:
42; AVX:       # BB#0:
43; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
44; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
45; AVX-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
46; AVX-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
47; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
48; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
49; AVX-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
50; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
51; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
52; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
53; AVX-NEXT:    vpsrld $2, %xmm0, %xmm0
54; AVX-NEXT:    retq
55  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
56  ret <4 x i32> %div
57}
58
59define <8 x i32> @test2(<8 x i32> %a) #0 {
60; SSE41-LABEL: test2:
61; SSE41:       # BB#0:
62; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
63; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
64; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
65; SSE41-NEXT:    pmuludq %xmm3, %xmm4
66; SSE41-NEXT:    movdqa %xmm0, %xmm5
67; SSE41-NEXT:    pmuludq %xmm2, %xmm5
68; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
69; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
70; SSE41-NEXT:    psubd %xmm5, %xmm0
71; SSE41-NEXT:    psrld $1, %xmm0
72; SSE41-NEXT:    paddd %xmm5, %xmm0
73; SSE41-NEXT:    psrld $2, %xmm0
74; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
75; SSE41-NEXT:    pmuludq %xmm3, %xmm4
76; SSE41-NEXT:    pmuludq %xmm1, %xmm2
77; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
78; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
79; SSE41-NEXT:    psubd %xmm2, %xmm1
80; SSE41-NEXT:    psrld $1, %xmm1
81; SSE41-NEXT:    paddd %xmm2, %xmm1
82; SSE41-NEXT:    psrld $2, %xmm1
83; SSE41-NEXT:    retq
84;
85; SSE-LABEL: test2:
86; SSE:       # BB#0:
87; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
88; SSE-NEXT:    movdqa %xmm0, %xmm3
89; SSE-NEXT:    pmuludq %xmm2, %xmm3
90; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
91; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
92; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
93; SSE-NEXT:    pmuludq %xmm4, %xmm5
94; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
95; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
96; SSE-NEXT:    psubd %xmm3, %xmm0
97; SSE-NEXT:    psrld $1, %xmm0
98; SSE-NEXT:    paddd %xmm3, %xmm0
99; SSE-NEXT:    psrld $2, %xmm0
100; SSE-NEXT:    pmuludq %xmm1, %xmm2
101; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
102; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
103; SSE-NEXT:    pmuludq %xmm4, %xmm3
104; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
105; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
106; SSE-NEXT:    psubd %xmm2, %xmm1
107; SSE-NEXT:    psrld $1, %xmm1
108; SSE-NEXT:    paddd %xmm2, %xmm1
109; SSE-NEXT:    psrld $2, %xmm1
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: test2:
113; AVX:       # BB#0:
114; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
115; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
116; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
117; AVX-NEXT:    vpmuludq %ymm2, %ymm3, %ymm2
118; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm1
119; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
120; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
121; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
122; AVX-NEXT:    vpsrld $1, %ymm0, %ymm0
123; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
124; AVX-NEXT:    vpsrld $2, %ymm0, %ymm0
125; AVX-NEXT:    retq
126  %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
127  ret <8 x i32> %div
128}
129
130define <8 x i16> @test3(<8 x i16> %a) #0 {
131; SSE41-LABEL: test3:
132; SSE41:       # BB#0:
133; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
134; SSE41-NEXT:    pmulhuw %xmm0, %xmm1
135; SSE41-NEXT:    psubw %xmm1, %xmm0
136; SSE41-NEXT:    psrlw $1, %xmm0
137; SSE41-NEXT:    paddw %xmm1, %xmm0
138; SSE41-NEXT:    psrlw $2, %xmm0
139; SSE41-NEXT:    retq
140;
141; SSE-LABEL: test3:
142; SSE:       # BB#0:
143; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
144; SSE-NEXT:    pmulhuw %xmm0, %xmm1
145; SSE-NEXT:    psubw %xmm1, %xmm0
146; SSE-NEXT:    psrlw $1, %xmm0
147; SSE-NEXT:    paddw %xmm1, %xmm0
148; SSE-NEXT:    psrlw $2, %xmm0
149; SSE-NEXT:    retq
150;
151; AVX-LABEL: test3:
152; AVX:       # BB#0:
153; AVX-NEXT:    vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
154; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
155; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
156; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
157; AVX-NEXT:    vpsrlw $2, %xmm0, %xmm0
158; AVX-NEXT:    retq
159  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
160  ret <8 x i16> %div
161}
162
163define <16 x i16> @test4(<16 x i16> %a) #0 {
164; SSE41-LABEL: test4:
165; SSE41:       # BB#0:
166; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
167; SSE41-NEXT:    movdqa %xmm0, %xmm3
168; SSE41-NEXT:    pmulhuw %xmm2, %xmm3
169; SSE41-NEXT:    psubw %xmm3, %xmm0
170; SSE41-NEXT:    psrlw $1, %xmm0
171; SSE41-NEXT:    paddw %xmm3, %xmm0
172; SSE41-NEXT:    psrlw $2, %xmm0
173; SSE41-NEXT:    pmulhuw %xmm1, %xmm2
174; SSE41-NEXT:    psubw %xmm2, %xmm1
175; SSE41-NEXT:    psrlw $1, %xmm1
176; SSE41-NEXT:    paddw %xmm2, %xmm1
177; SSE41-NEXT:    psrlw $2, %xmm1
178; SSE41-NEXT:    retq
179;
180; SSE-LABEL: test4:
181; SSE:       # BB#0:
182; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
183; SSE-NEXT:    movdqa %xmm0, %xmm3
184; SSE-NEXT:    pmulhuw %xmm2, %xmm3
185; SSE-NEXT:    psubw %xmm3, %xmm0
186; SSE-NEXT:    psrlw $1, %xmm0
187; SSE-NEXT:    paddw %xmm3, %xmm0
188; SSE-NEXT:    psrlw $2, %xmm0
189; SSE-NEXT:    pmulhuw %xmm1, %xmm2
190; SSE-NEXT:    psubw %xmm2, %xmm1
191; SSE-NEXT:    psrlw $1, %xmm1
192; SSE-NEXT:    paddw %xmm2, %xmm1
193; SSE-NEXT:    psrlw $2, %xmm1
194; SSE-NEXT:    retq
195;
196; AVX-LABEL: test4:
197; AVX:       # BB#0:
198; AVX-NEXT:    vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
199; AVX-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
200; AVX-NEXT:    vpsrlw $1, %ymm0, %ymm0
201; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
202; AVX-NEXT:    vpsrlw $2, %ymm0, %ymm0
203; AVX-NEXT:    retq
204  %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
205  ret <16 x i16> %div
206}
207
208define <8 x i16> @test5(<8 x i16> %a) #0 {
209; SSE41-LABEL: test5:
210; SSE41:       # BB#0:
211; SSE41-NEXT:    pmulhw {{.*}}(%rip), %xmm0
212; SSE41-NEXT:    movdqa %xmm0, %xmm1
213; SSE41-NEXT:    psrlw $15, %xmm1
214; SSE41-NEXT:    psraw $1, %xmm0
215; SSE41-NEXT:    paddw %xmm1, %xmm0
216; SSE41-NEXT:    retq
217;
218; SSE-LABEL: test5:
219; SSE:       # BB#0:
220; SSE-NEXT:    pmulhw {{.*}}(%rip), %xmm0
221; SSE-NEXT:    movdqa %xmm0, %xmm1
222; SSE-NEXT:    psrlw $15, %xmm1
223; SSE-NEXT:    psraw $1, %xmm0
224; SSE-NEXT:    paddw %xmm1, %xmm0
225; SSE-NEXT:    retq
226;
227; AVX-LABEL: test5:
228; AVX:       # BB#0:
229; AVX-NEXT:    vpmulhw {{.*}}(%rip), %xmm0, %xmm0
230; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm1
231; AVX-NEXT:    vpsraw $1, %xmm0, %xmm0
232; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
233; AVX-NEXT:    retq
234  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
235  ret <8 x i16> %div
236}
237
238define <16 x i16> @test6(<16 x i16> %a) #0 {
239; SSE41-LABEL: test6:
240; SSE41:       # BB#0:
241; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
242; SSE41-NEXT:    pmulhw %xmm2, %xmm0
243; SSE41-NEXT:    movdqa %xmm0, %xmm3
244; SSE41-NEXT:    psrlw $15, %xmm3
245; SSE41-NEXT:    psraw $1, %xmm0
246; SSE41-NEXT:    paddw %xmm3, %xmm0
247; SSE41-NEXT:    pmulhw %xmm2, %xmm1
248; SSE41-NEXT:    movdqa %xmm1, %xmm2
249; SSE41-NEXT:    psrlw $15, %xmm2
250; SSE41-NEXT:    psraw $1, %xmm1
251; SSE41-NEXT:    paddw %xmm2, %xmm1
252; SSE41-NEXT:    retq
253;
254; SSE-LABEL: test6:
255; SSE:       # BB#0:
256; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
257; SSE-NEXT:    pmulhw %xmm2, %xmm0
258; SSE-NEXT:    movdqa %xmm0, %xmm3
259; SSE-NEXT:    psrlw $15, %xmm3
260; SSE-NEXT:    psraw $1, %xmm0
261; SSE-NEXT:    paddw %xmm3, %xmm0
262; SSE-NEXT:    pmulhw %xmm2, %xmm1
263; SSE-NEXT:    movdqa %xmm1, %xmm2
264; SSE-NEXT:    psrlw $15, %xmm2
265; SSE-NEXT:    psraw $1, %xmm1
266; SSE-NEXT:    paddw %xmm2, %xmm1
267; SSE-NEXT:    retq
268;
269; AVX-LABEL: test6:
270; AVX:       # BB#0:
271; AVX-NEXT:    vpmulhw {{.*}}(%rip), %ymm0, %ymm0
272; AVX-NEXT:    vpsrlw $15, %ymm0, %ymm1
273; AVX-NEXT:    vpsraw $1, %ymm0, %ymm0
274; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
275; AVX-NEXT:    retq
276  %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
277  ret <16 x i16> %div
278}
279
280define <16 x i8> @test7(<16 x i8> %a) #0 {
281; SSE41-LABEL: test7:
282; SSE41:       # BB#0:
283; SSE41-NEXT:    pextrb $1, %xmm0, %eax
284; SSE41-NEXT:    movsbl %al, %eax
285; SSE41-NEXT:    imull $-109, %eax, %ecx
286; SSE41-NEXT:    shrl $8, %ecx
287; SSE41-NEXT:    addb %cl, %al
288; SSE41-NEXT:    movb %al, %cl
289; SSE41-NEXT:    shrb $7, %cl
290; SSE41-NEXT:    sarb $2, %al
291; SSE41-NEXT:    addb %cl, %al
292; SSE41-NEXT:    movzbl %al, %eax
293; SSE41-NEXT:    pextrb $0, %xmm0, %ecx
294; SSE41-NEXT:    movsbl %cl, %ecx
295; SSE41-NEXT:    imull $-109, %ecx, %edx
296; SSE41-NEXT:    shrl $8, %edx
297; SSE41-NEXT:    addb %dl, %cl
298; SSE41-NEXT:    movb %cl, %dl
299; SSE41-NEXT:    shrb $7, %dl
300; SSE41-NEXT:    sarb $2, %cl
301; SSE41-NEXT:    addb %dl, %cl
302; SSE41-NEXT:    movzbl %cl, %ecx
303; SSE41-NEXT:    movd %ecx, %xmm1
304; SSE41-NEXT:    pinsrb $1, %eax, %xmm1
305; SSE41-NEXT:    pextrb $2, %xmm0, %eax
306; SSE41-NEXT:    movsbl %al, %eax
307; SSE41-NEXT:    imull $-109, %eax, %ecx
308; SSE41-NEXT:    shrl $8, %ecx
309; SSE41-NEXT:    addb %cl, %al
310; SSE41-NEXT:    movb %al, %cl
311; SSE41-NEXT:    shrb $7, %cl
312; SSE41-NEXT:    sarb $2, %al
313; SSE41-NEXT:    addb %cl, %al
314; SSE41-NEXT:    movzbl %al, %eax
315; SSE41-NEXT:    pinsrb $2, %eax, %xmm1
316; SSE41-NEXT:    pextrb $3, %xmm0, %eax
317; SSE41-NEXT:    movsbl %al, %eax
318; SSE41-NEXT:    imull $-109, %eax, %ecx
319; SSE41-NEXT:    shrl $8, %ecx
320; SSE41-NEXT:    addb %cl, %al
321; SSE41-NEXT:    movb %al, %cl
322; SSE41-NEXT:    shrb $7, %cl
323; SSE41-NEXT:    sarb $2, %al
324; SSE41-NEXT:    addb %cl, %al
325; SSE41-NEXT:    movzbl %al, %eax
326; SSE41-NEXT:    pinsrb $3, %eax, %xmm1
327; SSE41-NEXT:    pextrb $4, %xmm0, %eax
328; SSE41-NEXT:    movsbl %al, %eax
329; SSE41-NEXT:    imull $-109, %eax, %ecx
330; SSE41-NEXT:    shrl $8, %ecx
331; SSE41-NEXT:    addb %cl, %al
332; SSE41-NEXT:    movb %al, %cl
333; SSE41-NEXT:    shrb $7, %cl
334; SSE41-NEXT:    sarb $2, %al
335; SSE41-NEXT:    addb %cl, %al
336; SSE41-NEXT:    movzbl %al, %eax
337; SSE41-NEXT:    pinsrb $4, %eax, %xmm1
338; SSE41-NEXT:    pextrb $5, %xmm0, %eax
339; SSE41-NEXT:    movsbl %al, %eax
340; SSE41-NEXT:    imull $-109, %eax, %ecx
341; SSE41-NEXT:    shrl $8, %ecx
342; SSE41-NEXT:    addb %cl, %al
343; SSE41-NEXT:    movb %al, %cl
344; SSE41-NEXT:    shrb $7, %cl
345; SSE41-NEXT:    sarb $2, %al
346; SSE41-NEXT:    addb %cl, %al
347; SSE41-NEXT:    movzbl %al, %eax
348; SSE41-NEXT:    pinsrb $5, %eax, %xmm1
349; SSE41-NEXT:    pextrb $6, %xmm0, %eax
350; SSE41-NEXT:    movsbl %al, %eax
351; SSE41-NEXT:    imull $-109, %eax, %ecx
352; SSE41-NEXT:    shrl $8, %ecx
353; SSE41-NEXT:    addb %cl, %al
354; SSE41-NEXT:    movb %al, %cl
355; SSE41-NEXT:    shrb $7, %cl
356; SSE41-NEXT:    sarb $2, %al
357; SSE41-NEXT:    addb %cl, %al
358; SSE41-NEXT:    movzbl %al, %eax
359; SSE41-NEXT:    pinsrb $6, %eax, %xmm1
360; SSE41-NEXT:    pextrb $7, %xmm0, %eax
361; SSE41-NEXT:    movsbl %al, %eax
362; SSE41-NEXT:    imull $-109, %eax, %ecx
363; SSE41-NEXT:    shrl $8, %ecx
364; SSE41-NEXT:    addb %cl, %al
365; SSE41-NEXT:    movb %al, %cl
366; SSE41-NEXT:    shrb $7, %cl
367; SSE41-NEXT:    sarb $2, %al
368; SSE41-NEXT:    addb %cl, %al
369; SSE41-NEXT:    movzbl %al, %eax
370; SSE41-NEXT:    pinsrb $7, %eax, %xmm1
371; SSE41-NEXT:    pextrb $8, %xmm0, %eax
372; SSE41-NEXT:    movsbl %al, %eax
373; SSE41-NEXT:    imull $-109, %eax, %ecx
374; SSE41-NEXT:    shrl $8, %ecx
375; SSE41-NEXT:    addb %cl, %al
376; SSE41-NEXT:    movb %al, %cl
377; SSE41-NEXT:    shrb $7, %cl
378; SSE41-NEXT:    sarb $2, %al
379; SSE41-NEXT:    addb %cl, %al
380; SSE41-NEXT:    movzbl %al, %eax
381; SSE41-NEXT:    pinsrb $8, %eax, %xmm1
382; SSE41-NEXT:    pextrb $9, %xmm0, %eax
383; SSE41-NEXT:    movsbl %al, %eax
384; SSE41-NEXT:    imull $-109, %eax, %ecx
385; SSE41-NEXT:    shrl $8, %ecx
386; SSE41-NEXT:    addb %cl, %al
387; SSE41-NEXT:    movb %al, %cl
388; SSE41-NEXT:    shrb $7, %cl
389; SSE41-NEXT:    sarb $2, %al
390; SSE41-NEXT:    addb %cl, %al
391; SSE41-NEXT:    movzbl %al, %eax
392; SSE41-NEXT:    pinsrb $9, %eax, %xmm1
393; SSE41-NEXT:    pextrb $10, %xmm0, %eax
394; SSE41-NEXT:    movsbl %al, %eax
395; SSE41-NEXT:    imull $-109, %eax, %ecx
396; SSE41-NEXT:    shrl $8, %ecx
397; SSE41-NEXT:    addb %cl, %al
398; SSE41-NEXT:    movb %al, %cl
399; SSE41-NEXT:    shrb $7, %cl
400; SSE41-NEXT:    sarb $2, %al
401; SSE41-NEXT:    addb %cl, %al
402; SSE41-NEXT:    movzbl %al, %eax
403; SSE41-NEXT:    pinsrb $10, %eax, %xmm1
404; SSE41-NEXT:    pextrb $11, %xmm0, %eax
405; SSE41-NEXT:    movsbl %al, %eax
406; SSE41-NEXT:    imull $-109, %eax, %ecx
407; SSE41-NEXT:    shrl $8, %ecx
408; SSE41-NEXT:    addb %cl, %al
409; SSE41-NEXT:    movb %al, %cl
410; SSE41-NEXT:    shrb $7, %cl
411; SSE41-NEXT:    sarb $2, %al
412; SSE41-NEXT:    addb %cl, %al
413; SSE41-NEXT:    movzbl %al, %eax
414; SSE41-NEXT:    pinsrb $11, %eax, %xmm1
415; SSE41-NEXT:    pextrb $12, %xmm0, %eax
416; SSE41-NEXT:    movsbl %al, %eax
417; SSE41-NEXT:    imull $-109, %eax, %ecx
418; SSE41-NEXT:    shrl $8, %ecx
419; SSE41-NEXT:    addb %cl, %al
420; SSE41-NEXT:    movb %al, %cl
421; SSE41-NEXT:    shrb $7, %cl
422; SSE41-NEXT:    sarb $2, %al
423; SSE41-NEXT:    addb %cl, %al
424; SSE41-NEXT:    movzbl %al, %eax
425; SSE41-NEXT:    pinsrb $12, %eax, %xmm1
426; SSE41-NEXT:    pextrb $13, %xmm0, %eax
427; SSE41-NEXT:    movsbl %al, %eax
428; SSE41-NEXT:    imull $-109, %eax, %ecx
429; SSE41-NEXT:    shrl $8, %ecx
430; SSE41-NEXT:    addb %cl, %al
431; SSE41-NEXT:    movb %al, %cl
432; SSE41-NEXT:    shrb $7, %cl
433; SSE41-NEXT:    sarb $2, %al
434; SSE41-NEXT:    addb %cl, %al
435; SSE41-NEXT:    movzbl %al, %eax
436; SSE41-NEXT:    pinsrb $13, %eax, %xmm1
437; SSE41-NEXT:    pextrb $14, %xmm0, %eax
438; SSE41-NEXT:    movsbl %al, %eax
439; SSE41-NEXT:    imull $-109, %eax, %ecx
440; SSE41-NEXT:    shrl $8, %ecx
441; SSE41-NEXT:    addb %cl, %al
442; SSE41-NEXT:    movb %al, %cl
443; SSE41-NEXT:    shrb $7, %cl
444; SSE41-NEXT:    sarb $2, %al
445; SSE41-NEXT:    addb %cl, %al
446; SSE41-NEXT:    movzbl %al, %eax
447; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
448; SSE41-NEXT:    pextrb $15, %xmm0, %eax
449; SSE41-NEXT:    movsbl %al, %eax
450; SSE41-NEXT:    imull $-109, %eax, %ecx
451; SSE41-NEXT:    shrl $8, %ecx
452; SSE41-NEXT:    addb %cl, %al
453; SSE41-NEXT:    movb %al, %cl
454; SSE41-NEXT:    shrb $7, %cl
455; SSE41-NEXT:    sarb $2, %al
456; SSE41-NEXT:    addb %cl, %al
457; SSE41-NEXT:    movzbl %al, %eax
458; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
459; SSE41-NEXT:    movdqa %xmm1, %xmm0
460; SSE41-NEXT:    retq
461;
462; SSE-LABEL: test7:
463; SSE:       # BB#0:
464; SSE-NEXT:    pushq %rbp
465; SSE-NEXT:    pushq %r14
466; SSE-NEXT:    pushq %rbx
467; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
468; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
469; SSE-NEXT:    imull $-109, %eax, %ecx
470; SSE-NEXT:    shrl $8, %ecx
471; SSE-NEXT:    addb %al, %cl
472; SSE-NEXT:    movb %cl, %al
473; SSE-NEXT:    shrb $7, %al
474; SSE-NEXT:    sarb $2, %cl
475; SSE-NEXT:    addb %al, %cl
476; SSE-NEXT:    movzbl %cl, %eax
477; SSE-NEXT:    movd %eax, %xmm0
478; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %r14d
479; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %edx
480; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %r9d
481; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
482; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %r11d
483; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %ecx
484; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %r8d
485; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %esi
486; SSE-NEXT:    imull $-109, %esi, %edi
487; SSE-NEXT:    shrl $8, %edi
488; SSE-NEXT:    addb %sil, %dil
489; SSE-NEXT:    movb %dil, %bl
490; SSE-NEXT:    shrb $7, %bl
491; SSE-NEXT:    sarb $2, %dil
492; SSE-NEXT:    addb %bl, %dil
493; SSE-NEXT:    movzbl %dil, %esi
494; SSE-NEXT:    movd %esi, %xmm1
495; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
496; SSE-NEXT:    imull $-109, %eax, %esi
497; SSE-NEXT:    shrl $8, %esi
498; SSE-NEXT:    addb %al, %sil
499; SSE-NEXT:    movb %sil, %al
500; SSE-NEXT:    shrb $7, %al
501; SSE-NEXT:    sarb $2, %sil
502; SSE-NEXT:    addb %al, %sil
503; SSE-NEXT:    movzbl %sil, %eax
504; SSE-NEXT:    movd %eax, %xmm2
505; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %ebp
506; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %esi
507; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %r10d
508; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %edi
509; SSE-NEXT:    imull $-109, %edi, %ebx
510; SSE-NEXT:    shrl $8, %ebx
511; SSE-NEXT:    addb %dil, %bl
512; SSE-NEXT:    movb %bl, %al
513; SSE-NEXT:    shrb $7, %al
514; SSE-NEXT:    sarb $2, %bl
515; SSE-NEXT:    addb %al, %bl
516; SSE-NEXT:    movzbl %bl, %eax
517; SSE-NEXT:    movd %eax, %xmm0
518; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
519; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
520; SSE-NEXT:    imull $-109, %edx, %eax
521; SSE-NEXT:    shrl $8, %eax
522; SSE-NEXT:    addb %dl, %al
523; SSE-NEXT:    movb %al, %dl
524; SSE-NEXT:    shrb $7, %dl
525; SSE-NEXT:    sarb $2, %al
526; SSE-NEXT:    addb %dl, %al
527; SSE-NEXT:    movzbl %al, %eax
528; SSE-NEXT:    movd %eax, %xmm1
529; SSE-NEXT:    imull $-109, %esi, %eax
530; SSE-NEXT:    shrl $8, %eax
531; SSE-NEXT:    addb %sil, %al
532; SSE-NEXT:    movb %al, %dl
533; SSE-NEXT:    shrb $7, %dl
534; SSE-NEXT:    sarb $2, %al
535; SSE-NEXT:    addb %dl, %al
536; SSE-NEXT:    movzbl %al, %eax
537; SSE-NEXT:    movd %eax, %xmm2
538; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
539; SSE-NEXT:    imull $-109, %ecx, %eax
540; SSE-NEXT:    shrl $8, %eax
541; SSE-NEXT:    addb %cl, %al
542; SSE-NEXT:    movb %al, %cl
543; SSE-NEXT:    shrb $7, %cl
544; SSE-NEXT:    sarb $2, %al
545; SSE-NEXT:    addb %cl, %al
546; SSE-NEXT:    movzbl %al, %eax
547; SSE-NEXT:    movd %eax, %xmm3
548; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %ecx
549; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
550; SSE-NEXT:    imull $-109, %eax, %edx
551; SSE-NEXT:    shrl $8, %edx
552; SSE-NEXT:    addb %al, %dl
553; SSE-NEXT:    movb %dl, %al
554; SSE-NEXT:    shrb $7, %al
555; SSE-NEXT:    sarb $2, %dl
556; SSE-NEXT:    addb %al, %dl
557; SSE-NEXT:    movzbl %dl, %eax
558; SSE-NEXT:    movd %eax, %xmm1
559; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
560; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
561; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
562; SSE-NEXT:    imull $-109, %r14d, %eax
563; SSE-NEXT:    shrl $8, %eax
564; SSE-NEXT:    addb %r14b, %al
565; SSE-NEXT:    movb %al, %dl
566; SSE-NEXT:    shrb $7, %dl
567; SSE-NEXT:    sarb $2, %al
568; SSE-NEXT:    addb %dl, %al
569; SSE-NEXT:    movzbl %al, %eax
570; SSE-NEXT:    movd %eax, %xmm2
571; SSE-NEXT:    imull $-109, %ebp, %eax
572; SSE-NEXT:    shrl $8, %eax
573; SSE-NEXT:    addb %bpl, %al
574; SSE-NEXT:    movb %al, %dl
575; SSE-NEXT:    shrb $7, %dl
576; SSE-NEXT:    sarb $2, %al
577; SSE-NEXT:    addb %dl, %al
578; SSE-NEXT:    movzbl %al, %eax
579; SSE-NEXT:    movd %eax, %xmm0
580; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
581; SSE-NEXT:    imull $-109, %r11d, %eax
582; SSE-NEXT:    shrl $8, %eax
583; SSE-NEXT:    addb %r11b, %al
584; SSE-NEXT:    movb %al, %dl
585; SSE-NEXT:    shrb $7, %dl
586; SSE-NEXT:    sarb $2, %al
587; SSE-NEXT:    addb %dl, %al
588; SSE-NEXT:    movzbl %al, %eax
589; SSE-NEXT:    movd %eax, %xmm3
590; SSE-NEXT:    imull $-109, %ecx, %eax
591; SSE-NEXT:    shrl $8, %eax
592; SSE-NEXT:    addb %cl, %al
593; SSE-NEXT:    movb %al, %cl
594; SSE-NEXT:    shrb $7, %cl
595; SSE-NEXT:    sarb $2, %al
596; SSE-NEXT:    addb %cl, %al
597; SSE-NEXT:    movzbl %al, %eax
598; SSE-NEXT:    movd %eax, %xmm2
599; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
600; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
601; SSE-NEXT:    imull $-109, %r9d, %eax
602; SSE-NEXT:    shrl $8, %eax
603; SSE-NEXT:    addb %r9b, %al
604; SSE-NEXT:    movb %al, %cl
605; SSE-NEXT:    shrb $7, %cl
606; SSE-NEXT:    sarb $2, %al
607; SSE-NEXT:    addb %cl, %al
608; SSE-NEXT:    movzbl %al, %eax
609; SSE-NEXT:    movd %eax, %xmm0
610; SSE-NEXT:    imull $-109, %r10d, %eax
611; SSE-NEXT:    shrl $8, %eax
612; SSE-NEXT:    addb %r10b, %al
613; SSE-NEXT:    movb %al, %cl
614; SSE-NEXT:    shrb $7, %cl
615; SSE-NEXT:    sarb $2, %al
616; SSE-NEXT:    addb %cl, %al
617; SSE-NEXT:    movzbl %al, %eax
618; SSE-NEXT:    movd %eax, %xmm3
619; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
620; SSE-NEXT:    imull $-109, %r8d, %eax
621; SSE-NEXT:    shrl $8, %eax
622; SSE-NEXT:    addb %r8b, %al
623; SSE-NEXT:    movb %al, %cl
624; SSE-NEXT:    shrb $7, %cl
625; SSE-NEXT:    sarb $2, %al
626; SSE-NEXT:    addb %cl, %al
627; SSE-NEXT:    movzbl %al, %eax
628; SSE-NEXT:    movd %eax, %xmm4
629; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
630; SSE-NEXT:    imull $-109, %eax, %ecx
631; SSE-NEXT:    shrl $8, %ecx
632; SSE-NEXT:    addb %al, %cl
633; SSE-NEXT:    movb %cl, %al
634; SSE-NEXT:    shrb $7, %al
635; SSE-NEXT:    sarb $2, %cl
636; SSE-NEXT:    addb %al, %cl
637; SSE-NEXT:    movzbl %cl, %eax
638; SSE-NEXT:    movd %eax, %xmm0
639; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
640; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
641; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
642; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
643; SSE-NEXT:    popq %rbx
644; SSE-NEXT:    popq %r14
645; SSE-NEXT:    popq %rbp
646; SSE-NEXT:    retq
647;
648; AVX-LABEL: test7:
649; AVX:       # BB#0:
650; AVX-NEXT:    vpextrb $1, %xmm0, %eax
651; AVX-NEXT:    movsbl %al, %eax
652; AVX-NEXT:    imull $-109, %eax, %ecx
653; AVX-NEXT:    shrl $8, %ecx
654; AVX-NEXT:    addb %cl, %al
655; AVX-NEXT:    movb %al, %cl
656; AVX-NEXT:    shrb $7, %cl
657; AVX-NEXT:    sarb $2, %al
658; AVX-NEXT:    addb %cl, %al
659; AVX-NEXT:    movzbl %al, %eax
660; AVX-NEXT:    vpextrb $0, %xmm0, %ecx
661; AVX-NEXT:    movsbl %cl, %ecx
662; AVX-NEXT:    imull $-109, %ecx, %edx
663; AVX-NEXT:    shrl $8, %edx
664; AVX-NEXT:    addb %dl, %cl
665; AVX-NEXT:    movb %cl, %dl
666; AVX-NEXT:    shrb $7, %dl
667; AVX-NEXT:    sarb $2, %cl
668; AVX-NEXT:    addb %dl, %cl
669; AVX-NEXT:    movzbl %cl, %ecx
670; AVX-NEXT:    vmovd %ecx, %xmm1
671; AVX-NEXT:    vpextrb $2, %xmm0, %ecx
672; AVX-NEXT:    movsbl %cl, %ecx
673; AVX-NEXT:    imull $-109, %ecx, %edx
674; AVX-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
675; AVX-NEXT:    shrl $8, %edx
676; AVX-NEXT:    addb %dl, %cl
677; AVX-NEXT:    movb %cl, %al
678; AVX-NEXT:    shrb $7, %al
679; AVX-NEXT:    sarb $2, %cl
680; AVX-NEXT:    addb %al, %cl
681; AVX-NEXT:    movzbl %cl, %eax
682; AVX-NEXT:    vpextrb $3, %xmm0, %ecx
683; AVX-NEXT:    movsbl %cl, %ecx
684; AVX-NEXT:    imull $-109, %ecx, %edx
685; AVX-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
686; AVX-NEXT:    shrl $8, %edx
687; AVX-NEXT:    addb %dl, %cl
688; AVX-NEXT:    movb %cl, %al
689; AVX-NEXT:    shrb $7, %al
690; AVX-NEXT:    sarb $2, %cl
691; AVX-NEXT:    addb %al, %cl
692; AVX-NEXT:    movzbl %cl, %eax
693; AVX-NEXT:    vpextrb $4, %xmm0, %ecx
694; AVX-NEXT:    movsbl %cl, %ecx
695; AVX-NEXT:    imull $-109, %ecx, %edx
696; AVX-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
697; AVX-NEXT:    shrl $8, %edx
698; AVX-NEXT:    addb %dl, %cl
699; AVX-NEXT:    movb %cl, %al
700; AVX-NEXT:    shrb $7, %al
701; AVX-NEXT:    sarb $2, %cl
702; AVX-NEXT:    addb %al, %cl
703; AVX-NEXT:    movzbl %cl, %eax
704; AVX-NEXT:    vpextrb $5, %xmm0, %ecx
705; AVX-NEXT:    movsbl %cl, %ecx
706; AVX-NEXT:    imull $-109, %ecx, %edx
707; AVX-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
708; AVX-NEXT:    shrl $8, %edx
709; AVX-NEXT:    addb %dl, %cl
710; AVX-NEXT:    movb %cl, %al
711; AVX-NEXT:    shrb $7, %al
712; AVX-NEXT:    sarb $2, %cl
713; AVX-NEXT:    addb %al, %cl
714; AVX-NEXT:    movzbl %cl, %eax
715; AVX-NEXT:    vpextrb $6, %xmm0, %ecx
716; AVX-NEXT:    movsbl %cl, %ecx
717; AVX-NEXT:    imull $-109, %ecx, %edx
718; AVX-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
719; AVX-NEXT:    shrl $8, %edx
720; AVX-NEXT:    addb %dl, %cl
721; AVX-NEXT:    movb %cl, %al
722; AVX-NEXT:    shrb $7, %al
723; AVX-NEXT:    sarb $2, %cl
724; AVX-NEXT:    addb %al, %cl
725; AVX-NEXT:    movzbl %cl, %eax
726; AVX-NEXT:    vpextrb $7, %xmm0, %ecx
727; AVX-NEXT:    movsbl %cl, %ecx
728; AVX-NEXT:    imull $-109, %ecx, %edx
729; AVX-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
730; AVX-NEXT:    shrl $8, %edx
731; AVX-NEXT:    addb %dl, %cl
732; AVX-NEXT:    movb %cl, %al
733; AVX-NEXT:    shrb $7, %al
734; AVX-NEXT:    sarb $2, %cl
735; AVX-NEXT:    addb %al, %cl
736; AVX-NEXT:    movzbl %cl, %eax
737; AVX-NEXT:    vpextrb $8, %xmm0, %ecx
738; AVX-NEXT:    movsbl %cl, %ecx
739; AVX-NEXT:    imull $-109, %ecx, %edx
740; AVX-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
741; AVX-NEXT:    shrl $8, %edx
742; AVX-NEXT:    addb %dl, %cl
743; AVX-NEXT:    movb %cl, %al
744; AVX-NEXT:    shrb $7, %al
745; AVX-NEXT:    sarb $2, %cl
746; AVX-NEXT:    addb %al, %cl
747; AVX-NEXT:    movzbl %cl, %eax
748; AVX-NEXT:    vpextrb $9, %xmm0, %ecx
749; AVX-NEXT:    movsbl %cl, %ecx
750; AVX-NEXT:    imull $-109, %ecx, %edx
751; AVX-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
752; AVX-NEXT:    shrl $8, %edx
753; AVX-NEXT:    addb %dl, %cl
754; AVX-NEXT:    movb %cl, %al
755; AVX-NEXT:    shrb $7, %al
756; AVX-NEXT:    sarb $2, %cl
757; AVX-NEXT:    addb %al, %cl
758; AVX-NEXT:    movzbl %cl, %eax
759; AVX-NEXT:    vpextrb $10, %xmm0, %ecx
760; AVX-NEXT:    movsbl %cl, %ecx
761; AVX-NEXT:    imull $-109, %ecx, %edx
762; AVX-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
763; AVX-NEXT:    shrl $8, %edx
764; AVX-NEXT:    addb %dl, %cl
765; AVX-NEXT:    movb %cl, %al
766; AVX-NEXT:    shrb $7, %al
767; AVX-NEXT:    sarb $2, %cl
768; AVX-NEXT:    addb %al, %cl
769; AVX-NEXT:    movzbl %cl, %eax
770; AVX-NEXT:    vpextrb $11, %xmm0, %ecx
771; AVX-NEXT:    movsbl %cl, %ecx
772; AVX-NEXT:    imull $-109, %ecx, %edx
773; AVX-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
774; AVX-NEXT:    shrl $8, %edx
775; AVX-NEXT:    addb %dl, %cl
776; AVX-NEXT:    movb %cl, %al
777; AVX-NEXT:    shrb $7, %al
778; AVX-NEXT:    sarb $2, %cl
779; AVX-NEXT:    addb %al, %cl
780; AVX-NEXT:    movzbl %cl, %eax
781; AVX-NEXT:    vpextrb $12, %xmm0, %ecx
782; AVX-NEXT:    movsbl %cl, %ecx
783; AVX-NEXT:    imull $-109, %ecx, %edx
784; AVX-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
785; AVX-NEXT:    shrl $8, %edx
786; AVX-NEXT:    addb %dl, %cl
787; AVX-NEXT:    movb %cl, %al
788; AVX-NEXT:    shrb $7, %al
789; AVX-NEXT:    sarb $2, %cl
790; AVX-NEXT:    addb %al, %cl
791; AVX-NEXT:    movzbl %cl, %eax
792; AVX-NEXT:    vpextrb $13, %xmm0, %ecx
793; AVX-NEXT:    movsbl %cl, %ecx
794; AVX-NEXT:    imull $-109, %ecx, %edx
795; AVX-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
796; AVX-NEXT:    shrl $8, %edx
797; AVX-NEXT:    addb %dl, %cl
798; AVX-NEXT:    movb %cl, %al
799; AVX-NEXT:    shrb $7, %al
800; AVX-NEXT:    sarb $2, %cl
801; AVX-NEXT:    addb %al, %cl
802; AVX-NEXT:    movzbl %cl, %eax
803; AVX-NEXT:    vpextrb $14, %xmm0, %ecx
804; AVX-NEXT:    movsbl %cl, %ecx
805; AVX-NEXT:    imull $-109, %ecx, %edx
806; AVX-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
807; AVX-NEXT:    shrl $8, %edx
808; AVX-NEXT:    addb %dl, %cl
809; AVX-NEXT:    movb %cl, %al
810; AVX-NEXT:    shrb $7, %al
811; AVX-NEXT:    sarb $2, %cl
812; AVX-NEXT:    addb %al, %cl
813; AVX-NEXT:    movzbl %cl, %eax
814; AVX-NEXT:    vpextrb $15, %xmm0, %ecx
815; AVX-NEXT:    movsbl %cl, %ecx
816; AVX-NEXT:    imull $-109, %ecx, %edx
817; AVX-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm0
818; AVX-NEXT:    shrl $8, %edx
819; AVX-NEXT:    addb %dl, %cl
820; AVX-NEXT:    movb %cl, %al
821; AVX-NEXT:    shrb $7, %al
822; AVX-NEXT:    sarb $2, %cl
823; AVX-NEXT:    addb %al, %cl
824; AVX-NEXT:    movzbl %cl, %eax
825; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
826; AVX-NEXT:    retq
827  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
828  ret <16 x i8> %div
829}
830
831define <4 x i32> @test8(<4 x i32> %a) #0 {
832; SSE41-LABEL: test8:
833; SSE41:       # BB#0:
834; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
835; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
836; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
837; SSE41-NEXT:    pmuldq %xmm2, %xmm3
838; SSE41-NEXT:    pmuldq %xmm0, %xmm1
839; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
840; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
841; SSE41-NEXT:    paddd %xmm0, %xmm1
842; SSE41-NEXT:    movdqa %xmm1, %xmm0
843; SSE41-NEXT:    psrld $31, %xmm0
844; SSE41-NEXT:    psrad $2, %xmm1
845; SSE41-NEXT:    paddd %xmm0, %xmm1
846; SSE41-NEXT:    movdqa %xmm1, %xmm0
847; SSE41-NEXT:    retq
848;
849; SSE-LABEL: test8:
850; SSE:       # BB#0:
851; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
852; SSE-NEXT:    movdqa %xmm0, %xmm2
853; SSE-NEXT:    psrad $31, %xmm2
854; SSE-NEXT:    pand %xmm1, %xmm2
855; SSE-NEXT:    movdqa %xmm0, %xmm3
856; SSE-NEXT:    pmuludq %xmm1, %xmm3
857; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
858; SSE-NEXT:    psrad $31, %xmm1
859; SSE-NEXT:    pand %xmm0, %xmm1
860; SSE-NEXT:    paddd %xmm1, %xmm2
861; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
862; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
863; SSE-NEXT:    pmuludq %xmm4, %xmm3
864; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
865; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
866; SSE-NEXT:    psubd %xmm2, %xmm1
867; SSE-NEXT:    paddd %xmm0, %xmm1
868; SSE-NEXT:    movdqa %xmm1, %xmm0
869; SSE-NEXT:    psrld $31, %xmm0
870; SSE-NEXT:    psrad $2, %xmm1
871; SSE-NEXT:    paddd %xmm0, %xmm1
872; SSE-NEXT:    movdqa %xmm1, %xmm0
873; SSE-NEXT:    retq
874;
875; AVX-LABEL: test8:
876; AVX:       # BB#0:
877; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
878; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
879; AVX-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
880; AVX-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
881; AVX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
882; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
883; AVX-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
884; AVX-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
885; AVX-NEXT:    vpsrld $31, %xmm0, %xmm1
886; AVX-NEXT:    vpsrad $2, %xmm0, %xmm0
887; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
888; AVX-NEXT:    retq
889  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
890  ret <4 x i32> %div
891}
892
893define <8 x i32> @test9(<8 x i32> %a) #0 {
894; SSE41-LABEL: test9:
895; SSE41:       # BB#0:
896; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
897; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
898; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
899; SSE41-NEXT:    pmuldq %xmm4, %xmm5
900; SSE41-NEXT:    movdqa %xmm0, %xmm2
901; SSE41-NEXT:    pmuldq %xmm3, %xmm2
902; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
903; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
904; SSE41-NEXT:    paddd %xmm0, %xmm2
905; SSE41-NEXT:    movdqa %xmm2, %xmm0
906; SSE41-NEXT:    psrld $31, %xmm0
907; SSE41-NEXT:    psrad $2, %xmm2
908; SSE41-NEXT:    paddd %xmm0, %xmm2
909; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
910; SSE41-NEXT:    pmuldq %xmm4, %xmm0
911; SSE41-NEXT:    pmuldq %xmm1, %xmm3
912; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
913; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm0[2,3],xmm3[4,5],xmm0[6,7]
914; SSE41-NEXT:    paddd %xmm1, %xmm3
915; SSE41-NEXT:    movdqa %xmm3, %xmm0
916; SSE41-NEXT:    psrld $31, %xmm0
917; SSE41-NEXT:    psrad $2, %xmm3
918; SSE41-NEXT:    paddd %xmm0, %xmm3
919; SSE41-NEXT:    movdqa %xmm2, %xmm0
920; SSE41-NEXT:    movdqa %xmm3, %xmm1
921; SSE41-NEXT:    retq
922;
923; SSE-LABEL: test9:
924; SSE:       # BB#0:
925; SSE-NEXT:    movdqa %xmm0, %xmm2
926; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
927; SSE-NEXT:    movdqa %xmm3, %xmm4
928; SSE-NEXT:    psrad $31, %xmm4
929; SSE-NEXT:    movdqa %xmm4, %xmm0
930; SSE-NEXT:    pand %xmm2, %xmm0
931; SSE-NEXT:    movdqa %xmm2, %xmm5
932; SSE-NEXT:    psrad $31, %xmm5
933; SSE-NEXT:    pand %xmm3, %xmm5
934; SSE-NEXT:    paddd %xmm0, %xmm5
935; SSE-NEXT:    movdqa %xmm2, %xmm0
936; SSE-NEXT:    pmuludq %xmm3, %xmm0
937; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
938; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3]
939; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
940; SSE-NEXT:    pmuludq %xmm6, %xmm7
941; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,3,2,3]
942; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
943; SSE-NEXT:    psubd %xmm5, %xmm0
944; SSE-NEXT:    paddd %xmm2, %xmm0
945; SSE-NEXT:    movdqa %xmm0, %xmm2
946; SSE-NEXT:    psrld $31, %xmm2
947; SSE-NEXT:    psrad $2, %xmm0
948; SSE-NEXT:    paddd %xmm2, %xmm0
949; SSE-NEXT:    pand %xmm1, %xmm4
950; SSE-NEXT:    movdqa %xmm1, %xmm5
951; SSE-NEXT:    psrad $31, %xmm5
952; SSE-NEXT:    pand %xmm3, %xmm5
953; SSE-NEXT:    paddd %xmm4, %xmm5
954; SSE-NEXT:    pmuludq %xmm1, %xmm3
955; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
956; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
957; SSE-NEXT:    pmuludq %xmm6, %xmm3
958; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
959; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
960; SSE-NEXT:    psubd %xmm5, %xmm2
961; SSE-NEXT:    paddd %xmm1, %xmm2
962; SSE-NEXT:    movdqa %xmm2, %xmm1
963; SSE-NEXT:    psrld $31, %xmm1
964; SSE-NEXT:    psrad $2, %xmm2
965; SSE-NEXT:    paddd %xmm1, %xmm2
966; SSE-NEXT:    movdqa %xmm2, %xmm1
967; SSE-NEXT:    retq
968;
969; AVX-LABEL: test9:
970; AVX:       # BB#0:
971; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
972; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
973; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
974; AVX-NEXT:    vpmuldq %ymm2, %ymm3, %ymm2
975; AVX-NEXT:    vpmuldq %ymm1, %ymm0, %ymm1
976; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
977; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
978; AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
979; AVX-NEXT:    vpsrld $31, %ymm0, %ymm1
980; AVX-NEXT:    vpsrad $2, %ymm0, %ymm0
981; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
982; AVX-NEXT:    retq
983  %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
984  ret <8 x i32> %div
985}
986
987define <8 x i32> @test10(<8 x i32> %a) #0 {
988; SSE41-LABEL: test10:
989; SSE41:       # BB#0:
990; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
991; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
992; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
993; SSE41-NEXT:    pmuludq %xmm3, %xmm4
994; SSE41-NEXT:    movdqa %xmm0, %xmm5
995; SSE41-NEXT:    pmuludq %xmm2, %xmm5
996; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
997; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
998; SSE41-NEXT:    movdqa %xmm0, %xmm4
999; SSE41-NEXT:    psubd %xmm5, %xmm4
1000; SSE41-NEXT:    psrld $1, %xmm4
1001; SSE41-NEXT:    paddd %xmm5, %xmm4
1002; SSE41-NEXT:    psrld $2, %xmm4
1003; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [7,7,7,7]
1004; SSE41-NEXT:    pmulld %xmm5, %xmm4
1005; SSE41-NEXT:    psubd %xmm4, %xmm0
1006; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
1007; SSE41-NEXT:    pmuludq %xmm3, %xmm4
1008; SSE41-NEXT:    pmuludq %xmm1, %xmm2
1009; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1010; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
1011; SSE41-NEXT:    movdqa %xmm1, %xmm3
1012; SSE41-NEXT:    psubd %xmm2, %xmm3
1013; SSE41-NEXT:    psrld $1, %xmm3
1014; SSE41-NEXT:    paddd %xmm2, %xmm3
1015; SSE41-NEXT:    psrld $2, %xmm3
1016; SSE41-NEXT:    pmulld %xmm5, %xmm3
1017; SSE41-NEXT:    psubd %xmm3, %xmm1
1018; SSE41-NEXT:    retq
1019;
1020; SSE-LABEL: test10:
1021; SSE:       # BB#0:
1022; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [613566757,613566757,613566757,613566757]
1023; SSE-NEXT:    movdqa %xmm0, %xmm2
1024; SSE-NEXT:    pmuludq %xmm3, %xmm2
1025; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1026; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1027; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1028; SSE-NEXT:    pmuludq %xmm4, %xmm5
1029; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
1030; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1031; SSE-NEXT:    movdqa %xmm0, %xmm5
1032; SSE-NEXT:    psubd %xmm2, %xmm5
1033; SSE-NEXT:    psrld $1, %xmm5
1034; SSE-NEXT:    paddd %xmm2, %xmm5
1035; SSE-NEXT:    psrld $2, %xmm5
1036; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [7,7,7,7]
1037; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1038; SSE-NEXT:    pmuludq %xmm2, %xmm5
1039; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
1040; SSE-NEXT:    pmuludq %xmm2, %xmm6
1041; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
1042; SSE-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
1043; SSE-NEXT:    psubd %xmm5, %xmm0
1044; SSE-NEXT:    pmuludq %xmm1, %xmm3
1045; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
1046; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
1047; SSE-NEXT:    pmuludq %xmm4, %xmm5
1048; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,3,2,3]
1049; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
1050; SSE-NEXT:    movdqa %xmm1, %xmm4
1051; SSE-NEXT:    psubd %xmm3, %xmm4
1052; SSE-NEXT:    psrld $1, %xmm4
1053; SSE-NEXT:    paddd %xmm3, %xmm4
1054; SSE-NEXT:    psrld $2, %xmm4
1055; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1056; SSE-NEXT:    pmuludq %xmm2, %xmm4
1057; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
1058; SSE-NEXT:    pmuludq %xmm2, %xmm3
1059; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
1060; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
1061; SSE-NEXT:    psubd %xmm4, %xmm1
1062; SSE-NEXT:    retq
1063;
1064; AVX-LABEL: test10:
1065; AVX:       # BB#0:
1066; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
1067; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
1068; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
1069; AVX-NEXT:    vpmuludq %ymm2, %ymm3, %ymm2
1070; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm1
1071; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
1072; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
1073; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm2
1074; AVX-NEXT:    vpsrld $1, %ymm2, %ymm2
1075; AVX-NEXT:    vpaddd %ymm1, %ymm2, %ymm1
1076; AVX-NEXT:    vpsrld $2, %ymm1, %ymm1
1077; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
1078; AVX-NEXT:    vpmulld %ymm2, %ymm1, %ymm1
1079; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
1080; AVX-NEXT:    retq
1081  %rem = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
1082  ret <8 x i32> %rem
1083}
1084
1085define <8 x i32> @test11(<8 x i32> %a) #0 {
1086; SSE41-LABEL: test11:
1087; SSE41:       # BB#0:
1088; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1089; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1090; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1091; SSE41-NEXT:    pmuldq %xmm3, %xmm4
1092; SSE41-NEXT:    movdqa %xmm0, %xmm5
1093; SSE41-NEXT:    pmuldq %xmm2, %xmm5
1094; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1095; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
1096; SSE41-NEXT:    paddd %xmm0, %xmm5
1097; SSE41-NEXT:    movdqa %xmm5, %xmm4
1098; SSE41-NEXT:    psrld $31, %xmm4
1099; SSE41-NEXT:    psrad $2, %xmm5
1100; SSE41-NEXT:    paddd %xmm4, %xmm5
1101; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [7,7,7,7]
1102; SSE41-NEXT:    pmulld %xmm4, %xmm5
1103; SSE41-NEXT:    psubd %xmm5, %xmm0
1104; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
1105; SSE41-NEXT:    pmuldq %xmm3, %xmm5
1106; SSE41-NEXT:    pmuldq %xmm1, %xmm2
1107; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1108; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
1109; SSE41-NEXT:    paddd %xmm1, %xmm2
1110; SSE41-NEXT:    movdqa %xmm2, %xmm3
1111; SSE41-NEXT:    psrld $31, %xmm3
1112; SSE41-NEXT:    psrad $2, %xmm2
1113; SSE41-NEXT:    paddd %xmm3, %xmm2
1114; SSE41-NEXT:    pmulld %xmm4, %xmm2
1115; SSE41-NEXT:    psubd %xmm2, %xmm1
1116; SSE41-NEXT:    retq
1117;
1118; SSE-LABEL: test11:
1119; SSE:       # BB#0:
1120; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1121; SSE-NEXT:    movdqa %xmm2, %xmm3
1122; SSE-NEXT:    psrad $31, %xmm3
1123; SSE-NEXT:    movdqa %xmm3, %xmm4
1124; SSE-NEXT:    pand %xmm0, %xmm4
1125; SSE-NEXT:    movdqa %xmm0, %xmm6
1126; SSE-NEXT:    psrad $31, %xmm6
1127; SSE-NEXT:    pand %xmm2, %xmm6
1128; SSE-NEXT:    paddd %xmm4, %xmm6
1129; SSE-NEXT:    movdqa %xmm0, %xmm4
1130; SSE-NEXT:    pmuludq %xmm2, %xmm4
1131; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,3,2,3]
1132; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
1133; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1134; SSE-NEXT:    pmuludq %xmm5, %xmm4
1135; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
1136; SSE-NEXT:    punpckldq {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1]
1137; SSE-NEXT:    psubd %xmm6, %xmm7
1138; SSE-NEXT:    paddd %xmm0, %xmm7
1139; SSE-NEXT:    movdqa %xmm7, %xmm4
1140; SSE-NEXT:    psrld $31, %xmm4
1141; SSE-NEXT:    psrad $2, %xmm7
1142; SSE-NEXT:    paddd %xmm4, %xmm7
1143; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [7,7,7,7]
1144; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1145; SSE-NEXT:    pmuludq %xmm4, %xmm7
1146; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[0,2,2,3]
1147; SSE-NEXT:    pmuludq %xmm4, %xmm6
1148; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
1149; SSE-NEXT:    punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
1150; SSE-NEXT:    psubd %xmm7, %xmm0
1151; SSE-NEXT:    pand %xmm1, %xmm3
1152; SSE-NEXT:    movdqa %xmm1, %xmm6
1153; SSE-NEXT:    psrad $31, %xmm6
1154; SSE-NEXT:    pand %xmm2, %xmm6
1155; SSE-NEXT:    paddd %xmm3, %xmm6
1156; SSE-NEXT:    pmuludq %xmm1, %xmm2
1157; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
1158; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
1159; SSE-NEXT:    pmuludq %xmm5, %xmm3
1160; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
1161; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1162; SSE-NEXT:    psubd %xmm6, %xmm2
1163; SSE-NEXT:    paddd %xmm1, %xmm2
1164; SSE-NEXT:    movdqa %xmm2, %xmm3
1165; SSE-NEXT:    psrld $31, %xmm3
1166; SSE-NEXT:    psrad $2, %xmm2
1167; SSE-NEXT:    paddd %xmm3, %xmm2
1168; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1169; SSE-NEXT:    pmuludq %xmm4, %xmm2
1170; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1171; SSE-NEXT:    pmuludq %xmm4, %xmm3
1172; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1173; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1174; SSE-NEXT:    psubd %xmm2, %xmm1
1175; SSE-NEXT:    retq
1176;
1177; AVX-LABEL: test11:
1178; AVX:       # BB#0:
1179; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
1180; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
1181; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
1182; AVX-NEXT:    vpmuldq %ymm2, %ymm3, %ymm2
1183; AVX-NEXT:    vpmuldq %ymm1, %ymm0, %ymm1
1184; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
1185; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
1186; AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm1
1187; AVX-NEXT:    vpsrld $31, %ymm1, %ymm2
1188; AVX-NEXT:    vpsrad $2, %ymm1, %ymm1
1189; AVX-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
1190; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
1191; AVX-NEXT:    vpmulld %ymm2, %ymm1, %ymm1
1192; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
1193; AVX-NEXT:    retq
1194  %rem = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
1195  ret <8 x i32> %rem
1196}
1197
1198define <2 x i16> @test12() #0 {
1199; SSE41-LABEL: test12:
1200; SSE41:       # BB#0:
1201; SSE41-NEXT:    xorps %xmm0, %xmm0
1202; SSE41-NEXT:    retq
1203;
1204; SSE-LABEL: test12:
1205; SSE:       # BB#0:
1206; SSE-NEXT:    xorps %xmm0, %xmm0
1207; SSE-NEXT:    retq
1208;
1209; AVX-LABEL: test12:
1210; AVX:       # BB#0:
1211; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1212; AVX-NEXT:    retq
1213  %I8 = insertelement <2 x i16> zeroinitializer, i16 -1, i32 0
1214  %I9 = insertelement <2 x i16> %I8, i16 -1, i32 1
1215  %B9 = urem <2 x i16> %I9, %I9
1216  ret <2 x i16> %B9
1217}
1218
1219define <4 x i32> @PR20355(<4 x i32> %a) #0 {
1220; SSE41-LABEL: PR20355:
1221; SSE41:       # BB#0: # %entry
1222; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
1223; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1224; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1225; SSE41-NEXT:    pmuldq %xmm2, %xmm3
1226; SSE41-NEXT:    pmuldq %xmm1, %xmm0
1227; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1228; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1229; SSE41-NEXT:    movdqa %xmm1, %xmm0
1230; SSE41-NEXT:    psrld $31, %xmm0
1231; SSE41-NEXT:    paddd %xmm1, %xmm0
1232; SSE41-NEXT:    retq
1233;
1234; SSE-LABEL: PR20355:
1235; SSE:       # BB#0: # %entry
1236; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
1237; SSE-NEXT:    movdqa %xmm1, %xmm2
1238; SSE-NEXT:    psrad $31, %xmm2
1239; SSE-NEXT:    pand %xmm0, %xmm2
1240; SSE-NEXT:    movdqa %xmm0, %xmm3
1241; SSE-NEXT:    psrad $31, %xmm3
1242; SSE-NEXT:    pand %xmm1, %xmm3
1243; SSE-NEXT:    paddd %xmm2, %xmm3
1244; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1245; SSE-NEXT:    pmuludq %xmm1, %xmm0
1246; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
1247; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1248; SSE-NEXT:    pmuludq %xmm2, %xmm0
1249; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1250; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
1251; SSE-NEXT:    psubd %xmm3, %xmm4
1252; SSE-NEXT:    movdqa %xmm4, %xmm0
1253; SSE-NEXT:    psrld $31, %xmm0
1254; SSE-NEXT:    paddd %xmm4, %xmm0
1255; SSE-NEXT:    retq
1256;
1257; AVX-LABEL: PR20355:
1258; AVX:       # BB#0: # %entry
1259; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
1260; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1261; AVX-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1262; AVX-NEXT:    vpmuldq %xmm2, %xmm3, %xmm2
1263; AVX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
1264; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1265; AVX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
1266; AVX-NEXT:    vpsrld $31, %xmm0, %xmm1
1267; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1268; AVX-NEXT:    retq
1269entry:
1270  %sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
1271  ret <4 x i32> %sdiv
1272}
1273
1274attributes #0 = { nounwind }
1275