• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
4
5;
6; sdiv by 7
7;
8
9define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
10; AVX-LABEL: test_div7_8i64:
11; AVX:       # BB#0:
12; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
13; AVX-NEXT:    vpextrq $1, %xmm1, %rax
14; AVX-NEXT:    movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925
15; AVX-NEXT:    imulq %rcx
16; AVX-NEXT:    movq %rdx, %rax
17; AVX-NEXT:    shrq $63, %rax
18; AVX-NEXT:    sarq %rdx
19; AVX-NEXT:    addq %rax, %rdx
20; AVX-NEXT:    vmovq %rdx, %xmm2
21; AVX-NEXT:    vmovq %xmm1, %rax
22; AVX-NEXT:    imulq %rcx
23; AVX-NEXT:    movq %rdx, %rax
24; AVX-NEXT:    shrq $63, %rax
25; AVX-NEXT:    sarq %rdx
26; AVX-NEXT:    addq %rax, %rdx
27; AVX-NEXT:    vmovq %rdx, %xmm1
28; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
29; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
30; AVX-NEXT:    vpextrq $1, %xmm2, %rax
31; AVX-NEXT:    imulq %rcx
32; AVX-NEXT:    movq %rdx, %rax
33; AVX-NEXT:    shrq $63, %rax
34; AVX-NEXT:    sarq %rdx
35; AVX-NEXT:    addq %rax, %rdx
36; AVX-NEXT:    vmovq %rdx, %xmm3
37; AVX-NEXT:    vmovq %xmm2, %rax
38; AVX-NEXT:    imulq %rcx
39; AVX-NEXT:    movq %rdx, %rax
40; AVX-NEXT:    shrq $63, %rax
41; AVX-NEXT:    sarq %rdx
42; AVX-NEXT:    addq %rax, %rdx
43; AVX-NEXT:    vmovq %rdx, %xmm2
44; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
45; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
46; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
47; AVX-NEXT:    vpextrq $1, %xmm2, %rax
48; AVX-NEXT:    imulq %rcx
49; AVX-NEXT:    movq %rdx, %rax
50; AVX-NEXT:    shrq $63, %rax
51; AVX-NEXT:    sarq %rdx
52; AVX-NEXT:    addq %rax, %rdx
53; AVX-NEXT:    vmovq %rdx, %xmm3
54; AVX-NEXT:    vmovq %xmm2, %rax
55; AVX-NEXT:    imulq %rcx
56; AVX-NEXT:    movq %rdx, %rax
57; AVX-NEXT:    shrq $63, %rax
58; AVX-NEXT:    sarq %rdx
59; AVX-NEXT:    addq %rax, %rdx
60; AVX-NEXT:    vmovq %rdx, %xmm2
61; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
62; AVX-NEXT:    vpextrq $1, %xmm0, %rax
63; AVX-NEXT:    imulq %rcx
64; AVX-NEXT:    movq %rdx, %rax
65; AVX-NEXT:    shrq $63, %rax
66; AVX-NEXT:    sarq %rdx
67; AVX-NEXT:    addq %rax, %rdx
68; AVX-NEXT:    vmovq %rdx, %xmm3
69; AVX-NEXT:    vmovq %xmm0, %rax
70; AVX-NEXT:    imulq %rcx
71; AVX-NEXT:    movq %rdx, %rax
72; AVX-NEXT:    shrq $63, %rax
73; AVX-NEXT:    sarq %rdx
74; AVX-NEXT:    addq %rax, %rdx
75; AVX-NEXT:    vmovq %rdx, %xmm0
76; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
77; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
78; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
79; AVX-NEXT:    retq
80  %res = sdiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
81  ret <8 x i64> %res
82}
83
84define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
85; AVX-LABEL: test_div7_16i32:
86; AVX:       # BB#0:
87; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
88; AVX-NEXT:    vpextrd $1, %xmm1, %eax
89; AVX-NEXT:    cltq
90; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
91; AVX-NEXT:    shrq $32, %rcx
92; AVX-NEXT:    addl %ecx, %eax
93; AVX-NEXT:    movl %eax, %ecx
94; AVX-NEXT:    shrl $31, %ecx
95; AVX-NEXT:    sarl $2, %eax
96; AVX-NEXT:    addl %ecx, %eax
97; AVX-NEXT:    vmovd %xmm1, %ecx
98; AVX-NEXT:    movslq %ecx, %rcx
99; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
100; AVX-NEXT:    shrq $32, %rdx
101; AVX-NEXT:    addl %edx, %ecx
102; AVX-NEXT:    movl %ecx, %edx
103; AVX-NEXT:    shrl $31, %edx
104; AVX-NEXT:    sarl $2, %ecx
105; AVX-NEXT:    addl %edx, %ecx
106; AVX-NEXT:    vmovd %ecx, %xmm2
107; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
108; AVX-NEXT:    vpextrd $2, %xmm1, %eax
109; AVX-NEXT:    cltq
110; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
111; AVX-NEXT:    shrq $32, %rcx
112; AVX-NEXT:    addl %ecx, %eax
113; AVX-NEXT:    movl %eax, %ecx
114; AVX-NEXT:    shrl $31, %ecx
115; AVX-NEXT:    sarl $2, %eax
116; AVX-NEXT:    addl %ecx, %eax
117; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
118; AVX-NEXT:    vpextrd $3, %xmm1, %eax
119; AVX-NEXT:    cltq
120; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
121; AVX-NEXT:    shrq $32, %rcx
122; AVX-NEXT:    addl %ecx, %eax
123; AVX-NEXT:    movl %eax, %ecx
124; AVX-NEXT:    shrl $31, %ecx
125; AVX-NEXT:    sarl $2, %eax
126; AVX-NEXT:    addl %ecx, %eax
127; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
128; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
129; AVX-NEXT:    vpextrd $1, %xmm2, %eax
130; AVX-NEXT:    cltq
131; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
132; AVX-NEXT:    shrq $32, %rcx
133; AVX-NEXT:    addl %ecx, %eax
134; AVX-NEXT:    movl %eax, %ecx
135; AVX-NEXT:    shrl $31, %ecx
136; AVX-NEXT:    sarl $2, %eax
137; AVX-NEXT:    addl %ecx, %eax
138; AVX-NEXT:    vmovd %xmm2, %ecx
139; AVX-NEXT:    movslq %ecx, %rcx
140; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
141; AVX-NEXT:    shrq $32, %rdx
142; AVX-NEXT:    addl %edx, %ecx
143; AVX-NEXT:    movl %ecx, %edx
144; AVX-NEXT:    shrl $31, %edx
145; AVX-NEXT:    sarl $2, %ecx
146; AVX-NEXT:    addl %edx, %ecx
147; AVX-NEXT:    vmovd %ecx, %xmm3
148; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
149; AVX-NEXT:    vpextrd $2, %xmm2, %eax
150; AVX-NEXT:    cltq
151; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
152; AVX-NEXT:    shrq $32, %rcx
153; AVX-NEXT:    addl %ecx, %eax
154; AVX-NEXT:    movl %eax, %ecx
155; AVX-NEXT:    shrl $31, %ecx
156; AVX-NEXT:    sarl $2, %eax
157; AVX-NEXT:    addl %ecx, %eax
158; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
159; AVX-NEXT:    vpextrd $3, %xmm2, %eax
160; AVX-NEXT:    cltq
161; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
162; AVX-NEXT:    shrq $32, %rcx
163; AVX-NEXT:    addl %ecx, %eax
164; AVX-NEXT:    movl %eax, %ecx
165; AVX-NEXT:    shrl $31, %ecx
166; AVX-NEXT:    sarl $2, %eax
167; AVX-NEXT:    addl %ecx, %eax
168; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
169; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
170; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
171; AVX-NEXT:    vpextrd $1, %xmm2, %eax
172; AVX-NEXT:    cltq
173; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
174; AVX-NEXT:    shrq $32, %rcx
175; AVX-NEXT:    addl %ecx, %eax
176; AVX-NEXT:    movl %eax, %ecx
177; AVX-NEXT:    shrl $31, %ecx
178; AVX-NEXT:    sarl $2, %eax
179; AVX-NEXT:    addl %ecx, %eax
180; AVX-NEXT:    vmovd %xmm2, %ecx
181; AVX-NEXT:    movslq %ecx, %rcx
182; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
183; AVX-NEXT:    shrq $32, %rdx
184; AVX-NEXT:    addl %edx, %ecx
185; AVX-NEXT:    movl %ecx, %edx
186; AVX-NEXT:    shrl $31, %edx
187; AVX-NEXT:    sarl $2, %ecx
188; AVX-NEXT:    addl %edx, %ecx
189; AVX-NEXT:    vmovd %ecx, %xmm3
190; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
191; AVX-NEXT:    vpextrd $2, %xmm2, %eax
192; AVX-NEXT:    cltq
193; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
194; AVX-NEXT:    shrq $32, %rcx
195; AVX-NEXT:    addl %ecx, %eax
196; AVX-NEXT:    movl %eax, %ecx
197; AVX-NEXT:    shrl $31, %ecx
198; AVX-NEXT:    sarl $2, %eax
199; AVX-NEXT:    addl %ecx, %eax
200; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
201; AVX-NEXT:    vpextrd $3, %xmm2, %eax
202; AVX-NEXT:    cltq
203; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
204; AVX-NEXT:    shrq $32, %rcx
205; AVX-NEXT:    addl %ecx, %eax
206; AVX-NEXT:    movl %eax, %ecx
207; AVX-NEXT:    shrl $31, %ecx
208; AVX-NEXT:    sarl $2, %eax
209; AVX-NEXT:    addl %ecx, %eax
210; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
211; AVX-NEXT:    vpextrd $1, %xmm0, %eax
212; AVX-NEXT:    cltq
213; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
214; AVX-NEXT:    shrq $32, %rcx
215; AVX-NEXT:    addl %ecx, %eax
216; AVX-NEXT:    movl %eax, %ecx
217; AVX-NEXT:    shrl $31, %ecx
218; AVX-NEXT:    sarl $2, %eax
219; AVX-NEXT:    addl %ecx, %eax
220; AVX-NEXT:    vmovd %xmm0, %ecx
221; AVX-NEXT:    movslq %ecx, %rcx
222; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
223; AVX-NEXT:    shrq $32, %rdx
224; AVX-NEXT:    addl %edx, %ecx
225; AVX-NEXT:    movl %ecx, %edx
226; AVX-NEXT:    shrl $31, %edx
227; AVX-NEXT:    sarl $2, %ecx
228; AVX-NEXT:    addl %edx, %ecx
229; AVX-NEXT:    vmovd %ecx, %xmm3
230; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
231; AVX-NEXT:    vpextrd $2, %xmm0, %eax
232; AVX-NEXT:    cltq
233; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
234; AVX-NEXT:    shrq $32, %rcx
235; AVX-NEXT:    addl %ecx, %eax
236; AVX-NEXT:    movl %eax, %ecx
237; AVX-NEXT:    shrl $31, %ecx
238; AVX-NEXT:    sarl $2, %eax
239; AVX-NEXT:    addl %ecx, %eax
240; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
241; AVX-NEXT:    vpextrd $3, %xmm0, %eax
242; AVX-NEXT:    cltq
243; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
244; AVX-NEXT:    shrq $32, %rcx
245; AVX-NEXT:    addl %ecx, %eax
246; AVX-NEXT:    movl %eax, %ecx
247; AVX-NEXT:    shrl $31, %ecx
248; AVX-NEXT:    sarl $2, %eax
249; AVX-NEXT:    addl %ecx, %eax
250; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
251; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
252; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
253; AVX-NEXT:    retq
254  %res = sdiv <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
255  ret <16 x i32> %res
256}
257
258define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind {
259; AVX512F-LABEL: test_div7_32i16:
260; AVX512F:       # BB#0:
261; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725]
262; AVX512F-NEXT:    vpmulhw %ymm2, %ymm0, %ymm0
263; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm3
264; AVX512F-NEXT:    vpsraw $1, %ymm0, %ymm0
265; AVX512F-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
266; AVX512F-NEXT:    vpmulhw %ymm2, %ymm1, %ymm1
267; AVX512F-NEXT:    vpsrlw $15, %ymm1, %ymm2
268; AVX512F-NEXT:    vpsraw $1, %ymm1, %ymm1
269; AVX512F-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
270; AVX512F-NEXT:    retq
271;
272; AVX512BW-LABEL: test_div7_32i16:
273; AVX512BW:       # BB#0:
274; AVX512BW-NEXT:    vpmulhw {{.*}}(%rip), %zmm0, %zmm0
275; AVX512BW-NEXT:    vpsrlw $15, %zmm0, %zmm1
276; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm0
277; AVX512BW-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
278; AVX512BW-NEXT:    retq
279  %res = sdiv <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
280  ret <32 x i16> %res
281}
282
283define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
284; AVX512F-LABEL: test_div7_64i8:
285; AVX512F:       # BB#0:
286; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147]
287; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
288; AVX512F-NEXT:    vpmovsxbw %xmm3, %ymm3
289; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
290; AVX512F-NEXT:    vpmovsxbw %xmm4, %ymm4
291; AVX512F-NEXT:    vpmullw %ymm3, %ymm4, %ymm4
292; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
293; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
294; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm5
295; AVX512F-NEXT:    vpmullw %ymm2, %ymm5, %ymm5
296; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5
297; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm5[2,3],ymm4[2,3]
298; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
299; AVX512F-NEXT:    vpackuswb %ymm6, %ymm4, %ymm4
300; AVX512F-NEXT:    vpaddb %ymm0, %ymm4, %ymm0
301; AVX512F-NEXT:    vpsrlw $7, %ymm0, %ymm4
302; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
303; AVX512F-NEXT:    vpand %ymm5, %ymm4, %ymm4
304; AVX512F-NEXT:    vpsrlw $2, %ymm0, %ymm0
305; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
306; AVX512F-NEXT:    vpand %ymm6, %ymm0, %ymm0
307; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
308; AVX512F-NEXT:    vpxor %ymm7, %ymm0, %ymm0
309; AVX512F-NEXT:    vpsubb %ymm7, %ymm0, %ymm0
310; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
311; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm4
312; AVX512F-NEXT:    vpmovsxbw %xmm4, %ymm4
313; AVX512F-NEXT:    vpmullw %ymm3, %ymm4, %ymm3
314; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
315; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm4
316; AVX512F-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
317; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
318; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
319; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
320; AVX512F-NEXT:    vpackuswb %ymm4, %ymm2, %ymm2
321; AVX512F-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
322; AVX512F-NEXT:    vpsrlw $7, %ymm1, %ymm2
323; AVX512F-NEXT:    vpand %ymm5, %ymm2, %ymm2
324; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
325; AVX512F-NEXT:    vpand %ymm6, %ymm1, %ymm1
326; AVX512F-NEXT:    vpxor %ymm7, %ymm1, %ymm1
327; AVX512F-NEXT:    vpsubb %ymm7, %ymm1, %ymm1
328; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
329; AVX512F-NEXT:    retq
330;
331; AVX512BW-LABEL: test_div7_64i8:
332; AVX512BW:       # BB#0:
333; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
334; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
335; AVX512BW-NEXT:    movsbl %al, %eax
336; AVX512BW-NEXT:    imull $-109, %eax, %ecx
337; AVX512BW-NEXT:    shrl $8, %ecx
338; AVX512BW-NEXT:    addb %cl, %al
339; AVX512BW-NEXT:    movl %eax, %ecx
340; AVX512BW-NEXT:    shrb $7, %cl
341; AVX512BW-NEXT:    sarb $2, %al
342; AVX512BW-NEXT:    addb %cl, %al
343; AVX512BW-NEXT:    movzbl %al, %eax
344; AVX512BW-NEXT:    vpextrb $0, %xmm1, %ecx
345; AVX512BW-NEXT:    movsbl %cl, %ecx
346; AVX512BW-NEXT:    imull $-109, %ecx, %edx
347; AVX512BW-NEXT:    shrl $8, %edx
348; AVX512BW-NEXT:    addb %dl, %cl
349; AVX512BW-NEXT:    movl %ecx, %edx
350; AVX512BW-NEXT:    shrb $7, %dl
351; AVX512BW-NEXT:    sarb $2, %cl
352; AVX512BW-NEXT:    addb %dl, %cl
353; AVX512BW-NEXT:    movzbl %cl, %ecx
354; AVX512BW-NEXT:    vmovd %ecx, %xmm2
355; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
356; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
357; AVX512BW-NEXT:    movsbl %al, %eax
358; AVX512BW-NEXT:    imull $-109, %eax, %ecx
359; AVX512BW-NEXT:    shrl $8, %ecx
360; AVX512BW-NEXT:    addb %cl, %al
361; AVX512BW-NEXT:    movl %eax, %ecx
362; AVX512BW-NEXT:    shrb $7, %cl
363; AVX512BW-NEXT:    sarb $2, %al
364; AVX512BW-NEXT:    addb %cl, %al
365; AVX512BW-NEXT:    movzbl %al, %eax
366; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
367; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
368; AVX512BW-NEXT:    movsbl %al, %eax
369; AVX512BW-NEXT:    imull $-109, %eax, %ecx
370; AVX512BW-NEXT:    shrl $8, %ecx
371; AVX512BW-NEXT:    addb %cl, %al
372; AVX512BW-NEXT:    movl %eax, %ecx
373; AVX512BW-NEXT:    shrb $7, %cl
374; AVX512BW-NEXT:    sarb $2, %al
375; AVX512BW-NEXT:    addb %cl, %al
376; AVX512BW-NEXT:    movzbl %al, %eax
377; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
378; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
379; AVX512BW-NEXT:    movsbl %al, %eax
380; AVX512BW-NEXT:    imull $-109, %eax, %ecx
381; AVX512BW-NEXT:    shrl $8, %ecx
382; AVX512BW-NEXT:    addb %cl, %al
383; AVX512BW-NEXT:    movl %eax, %ecx
384; AVX512BW-NEXT:    shrb $7, %cl
385; AVX512BW-NEXT:    sarb $2, %al
386; AVX512BW-NEXT:    addb %cl, %al
387; AVX512BW-NEXT:    movzbl %al, %eax
388; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
389; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
390; AVX512BW-NEXT:    movsbl %al, %eax
391; AVX512BW-NEXT:    imull $-109, %eax, %ecx
392; AVX512BW-NEXT:    shrl $8, %ecx
393; AVX512BW-NEXT:    addb %cl, %al
394; AVX512BW-NEXT:    movl %eax, %ecx
395; AVX512BW-NEXT:    shrb $7, %cl
396; AVX512BW-NEXT:    sarb $2, %al
397; AVX512BW-NEXT:    addb %cl, %al
398; AVX512BW-NEXT:    movzbl %al, %eax
399; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
400; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
401; AVX512BW-NEXT:    movsbl %al, %eax
402; AVX512BW-NEXT:    imull $-109, %eax, %ecx
403; AVX512BW-NEXT:    shrl $8, %ecx
404; AVX512BW-NEXT:    addb %cl, %al
405; AVX512BW-NEXT:    movl %eax, %ecx
406; AVX512BW-NEXT:    shrb $7, %cl
407; AVX512BW-NEXT:    sarb $2, %al
408; AVX512BW-NEXT:    addb %cl, %al
409; AVX512BW-NEXT:    movzbl %al, %eax
410; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
411; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
412; AVX512BW-NEXT:    movsbl %al, %eax
413; AVX512BW-NEXT:    imull $-109, %eax, %ecx
414; AVX512BW-NEXT:    shrl $8, %ecx
415; AVX512BW-NEXT:    addb %cl, %al
416; AVX512BW-NEXT:    movl %eax, %ecx
417; AVX512BW-NEXT:    shrb $7, %cl
418; AVX512BW-NEXT:    sarb $2, %al
419; AVX512BW-NEXT:    addb %cl, %al
420; AVX512BW-NEXT:    movzbl %al, %eax
421; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
422; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
423; AVX512BW-NEXT:    movsbl %al, %eax
424; AVX512BW-NEXT:    imull $-109, %eax, %ecx
425; AVX512BW-NEXT:    shrl $8, %ecx
426; AVX512BW-NEXT:    addb %cl, %al
427; AVX512BW-NEXT:    movl %eax, %ecx
428; AVX512BW-NEXT:    shrb $7, %cl
429; AVX512BW-NEXT:    sarb $2, %al
430; AVX512BW-NEXT:    addb %cl, %al
431; AVX512BW-NEXT:    movzbl %al, %eax
432; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
433; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
434; AVX512BW-NEXT:    movsbl %al, %eax
435; AVX512BW-NEXT:    imull $-109, %eax, %ecx
436; AVX512BW-NEXT:    shrl $8, %ecx
437; AVX512BW-NEXT:    addb %cl, %al
438; AVX512BW-NEXT:    movl %eax, %ecx
439; AVX512BW-NEXT:    shrb $7, %cl
440; AVX512BW-NEXT:    sarb $2, %al
441; AVX512BW-NEXT:    addb %cl, %al
442; AVX512BW-NEXT:    movzbl %al, %eax
443; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
444; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
445; AVX512BW-NEXT:    movsbl %al, %eax
446; AVX512BW-NEXT:    imull $-109, %eax, %ecx
447; AVX512BW-NEXT:    shrl $8, %ecx
448; AVX512BW-NEXT:    addb %cl, %al
449; AVX512BW-NEXT:    movl %eax, %ecx
450; AVX512BW-NEXT:    shrb $7, %cl
451; AVX512BW-NEXT:    sarb $2, %al
452; AVX512BW-NEXT:    addb %cl, %al
453; AVX512BW-NEXT:    movzbl %al, %eax
454; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
455; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
456; AVX512BW-NEXT:    movsbl %al, %eax
457; AVX512BW-NEXT:    imull $-109, %eax, %ecx
458; AVX512BW-NEXT:    shrl $8, %ecx
459; AVX512BW-NEXT:    addb %cl, %al
460; AVX512BW-NEXT:    movl %eax, %ecx
461; AVX512BW-NEXT:    shrb $7, %cl
462; AVX512BW-NEXT:    sarb $2, %al
463; AVX512BW-NEXT:    addb %cl, %al
464; AVX512BW-NEXT:    movzbl %al, %eax
465; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
466; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
467; AVX512BW-NEXT:    movsbl %al, %eax
468; AVX512BW-NEXT:    imull $-109, %eax, %ecx
469; AVX512BW-NEXT:    shrl $8, %ecx
470; AVX512BW-NEXT:    addb %cl, %al
471; AVX512BW-NEXT:    movl %eax, %ecx
472; AVX512BW-NEXT:    shrb $7, %cl
473; AVX512BW-NEXT:    sarb $2, %al
474; AVX512BW-NEXT:    addb %cl, %al
475; AVX512BW-NEXT:    movzbl %al, %eax
476; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
477; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
478; AVX512BW-NEXT:    movsbl %al, %eax
479; AVX512BW-NEXT:    imull $-109, %eax, %ecx
480; AVX512BW-NEXT:    shrl $8, %ecx
481; AVX512BW-NEXT:    addb %cl, %al
482; AVX512BW-NEXT:    movl %eax, %ecx
483; AVX512BW-NEXT:    shrb $7, %cl
484; AVX512BW-NEXT:    sarb $2, %al
485; AVX512BW-NEXT:    addb %cl, %al
486; AVX512BW-NEXT:    movzbl %al, %eax
487; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
488; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
489; AVX512BW-NEXT:    movsbl %al, %eax
490; AVX512BW-NEXT:    imull $-109, %eax, %ecx
491; AVX512BW-NEXT:    shrl $8, %ecx
492; AVX512BW-NEXT:    addb %cl, %al
493; AVX512BW-NEXT:    movl %eax, %ecx
494; AVX512BW-NEXT:    shrb $7, %cl
495; AVX512BW-NEXT:    sarb $2, %al
496; AVX512BW-NEXT:    addb %cl, %al
497; AVX512BW-NEXT:    movzbl %al, %eax
498; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
499; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
500; AVX512BW-NEXT:    movsbl %al, %eax
501; AVX512BW-NEXT:    imull $-109, %eax, %ecx
502; AVX512BW-NEXT:    shrl $8, %ecx
503; AVX512BW-NEXT:    addb %cl, %al
504; AVX512BW-NEXT:    movl %eax, %ecx
505; AVX512BW-NEXT:    shrb $7, %cl
506; AVX512BW-NEXT:    sarb $2, %al
507; AVX512BW-NEXT:    addb %cl, %al
508; AVX512BW-NEXT:    movzbl %al, %eax
509; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
510; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
511; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
512; AVX512BW-NEXT:    movsbl %al, %eax
513; AVX512BW-NEXT:    imull $-109, %eax, %ecx
514; AVX512BW-NEXT:    shrl $8, %ecx
515; AVX512BW-NEXT:    addb %cl, %al
516; AVX512BW-NEXT:    movl %eax, %ecx
517; AVX512BW-NEXT:    shrb $7, %cl
518; AVX512BW-NEXT:    sarb $2, %al
519; AVX512BW-NEXT:    addb %cl, %al
520; AVX512BW-NEXT:    movzbl %al, %eax
521; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
522; AVX512BW-NEXT:    movsbl %cl, %ecx
523; AVX512BW-NEXT:    imull $-109, %ecx, %edx
524; AVX512BW-NEXT:    shrl $8, %edx
525; AVX512BW-NEXT:    addb %dl, %cl
526; AVX512BW-NEXT:    movl %ecx, %edx
527; AVX512BW-NEXT:    shrb $7, %dl
528; AVX512BW-NEXT:    sarb $2, %cl
529; AVX512BW-NEXT:    addb %dl, %cl
530; AVX512BW-NEXT:    movzbl %cl, %ecx
531; AVX512BW-NEXT:    vmovd %ecx, %xmm3
532; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
533; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
534; AVX512BW-NEXT:    movsbl %al, %eax
535; AVX512BW-NEXT:    imull $-109, %eax, %ecx
536; AVX512BW-NEXT:    shrl $8, %ecx
537; AVX512BW-NEXT:    addb %cl, %al
538; AVX512BW-NEXT:    movl %eax, %ecx
539; AVX512BW-NEXT:    shrb $7, %cl
540; AVX512BW-NEXT:    sarb $2, %al
541; AVX512BW-NEXT:    addb %cl, %al
542; AVX512BW-NEXT:    movzbl %al, %eax
543; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
544; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
545; AVX512BW-NEXT:    movsbl %al, %eax
546; AVX512BW-NEXT:    imull $-109, %eax, %ecx
547; AVX512BW-NEXT:    shrl $8, %ecx
548; AVX512BW-NEXT:    addb %cl, %al
549; AVX512BW-NEXT:    movl %eax, %ecx
550; AVX512BW-NEXT:    shrb $7, %cl
551; AVX512BW-NEXT:    sarb $2, %al
552; AVX512BW-NEXT:    addb %cl, %al
553; AVX512BW-NEXT:    movzbl %al, %eax
554; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
555; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
556; AVX512BW-NEXT:    movsbl %al, %eax
557; AVX512BW-NEXT:    imull $-109, %eax, %ecx
558; AVX512BW-NEXT:    shrl $8, %ecx
559; AVX512BW-NEXT:    addb %cl, %al
560; AVX512BW-NEXT:    movl %eax, %ecx
561; AVX512BW-NEXT:    shrb $7, %cl
562; AVX512BW-NEXT:    sarb $2, %al
563; AVX512BW-NEXT:    addb %cl, %al
564; AVX512BW-NEXT:    movzbl %al, %eax
565; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
566; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
567; AVX512BW-NEXT:    movsbl %al, %eax
568; AVX512BW-NEXT:    imull $-109, %eax, %ecx
569; AVX512BW-NEXT:    shrl $8, %ecx
570; AVX512BW-NEXT:    addb %cl, %al
571; AVX512BW-NEXT:    movl %eax, %ecx
572; AVX512BW-NEXT:    shrb $7, %cl
573; AVX512BW-NEXT:    sarb $2, %al
574; AVX512BW-NEXT:    addb %cl, %al
575; AVX512BW-NEXT:    movzbl %al, %eax
576; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
577; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
578; AVX512BW-NEXT:    movsbl %al, %eax
579; AVX512BW-NEXT:    imull $-109, %eax, %ecx
580; AVX512BW-NEXT:    shrl $8, %ecx
581; AVX512BW-NEXT:    addb %cl, %al
582; AVX512BW-NEXT:    movl %eax, %ecx
583; AVX512BW-NEXT:    shrb $7, %cl
584; AVX512BW-NEXT:    sarb $2, %al
585; AVX512BW-NEXT:    addb %cl, %al
586; AVX512BW-NEXT:    movzbl %al, %eax
587; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
588; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
589; AVX512BW-NEXT:    movsbl %al, %eax
590; AVX512BW-NEXT:    imull $-109, %eax, %ecx
591; AVX512BW-NEXT:    shrl $8, %ecx
592; AVX512BW-NEXT:    addb %cl, %al
593; AVX512BW-NEXT:    movl %eax, %ecx
594; AVX512BW-NEXT:    shrb $7, %cl
595; AVX512BW-NEXT:    sarb $2, %al
596; AVX512BW-NEXT:    addb %cl, %al
597; AVX512BW-NEXT:    movzbl %al, %eax
598; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
599; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
600; AVX512BW-NEXT:    movsbl %al, %eax
601; AVX512BW-NEXT:    imull $-109, %eax, %ecx
602; AVX512BW-NEXT:    shrl $8, %ecx
603; AVX512BW-NEXT:    addb %cl, %al
604; AVX512BW-NEXT:    movl %eax, %ecx
605; AVX512BW-NEXT:    shrb $7, %cl
606; AVX512BW-NEXT:    sarb $2, %al
607; AVX512BW-NEXT:    addb %cl, %al
608; AVX512BW-NEXT:    movzbl %al, %eax
609; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
610; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
611; AVX512BW-NEXT:    movsbl %al, %eax
612; AVX512BW-NEXT:    imull $-109, %eax, %ecx
613; AVX512BW-NEXT:    shrl $8, %ecx
614; AVX512BW-NEXT:    addb %cl, %al
615; AVX512BW-NEXT:    movl %eax, %ecx
616; AVX512BW-NEXT:    shrb $7, %cl
617; AVX512BW-NEXT:    sarb $2, %al
618; AVX512BW-NEXT:    addb %cl, %al
619; AVX512BW-NEXT:    movzbl %al, %eax
620; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
621; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
622; AVX512BW-NEXT:    movsbl %al, %eax
623; AVX512BW-NEXT:    imull $-109, %eax, %ecx
624; AVX512BW-NEXT:    shrl $8, %ecx
625; AVX512BW-NEXT:    addb %cl, %al
626; AVX512BW-NEXT:    movl %eax, %ecx
627; AVX512BW-NEXT:    shrb $7, %cl
628; AVX512BW-NEXT:    sarb $2, %al
629; AVX512BW-NEXT:    addb %cl, %al
630; AVX512BW-NEXT:    movzbl %al, %eax
631; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
632; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
633; AVX512BW-NEXT:    movsbl %al, %eax
634; AVX512BW-NEXT:    imull $-109, %eax, %ecx
635; AVX512BW-NEXT:    shrl $8, %ecx
636; AVX512BW-NEXT:    addb %cl, %al
637; AVX512BW-NEXT:    movl %eax, %ecx
638; AVX512BW-NEXT:    shrb $7, %cl
639; AVX512BW-NEXT:    sarb $2, %al
640; AVX512BW-NEXT:    addb %cl, %al
641; AVX512BW-NEXT:    movzbl %al, %eax
642; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
643; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
644; AVX512BW-NEXT:    movsbl %al, %eax
645; AVX512BW-NEXT:    imull $-109, %eax, %ecx
646; AVX512BW-NEXT:    shrl $8, %ecx
647; AVX512BW-NEXT:    addb %cl, %al
648; AVX512BW-NEXT:    movl %eax, %ecx
649; AVX512BW-NEXT:    shrb $7, %cl
650; AVX512BW-NEXT:    sarb $2, %al
651; AVX512BW-NEXT:    addb %cl, %al
652; AVX512BW-NEXT:    movzbl %al, %eax
653; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
654; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
655; AVX512BW-NEXT:    movsbl %al, %eax
656; AVX512BW-NEXT:    imull $-109, %eax, %ecx
657; AVX512BW-NEXT:    shrl $8, %ecx
658; AVX512BW-NEXT:    addb %cl, %al
659; AVX512BW-NEXT:    movl %eax, %ecx
660; AVX512BW-NEXT:    shrb $7, %cl
661; AVX512BW-NEXT:    sarb $2, %al
662; AVX512BW-NEXT:    addb %cl, %al
663; AVX512BW-NEXT:    movzbl %al, %eax
664; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
665; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
666; AVX512BW-NEXT:    movsbl %al, %eax
667; AVX512BW-NEXT:    imull $-109, %eax, %ecx
668; AVX512BW-NEXT:    shrl $8, %ecx
669; AVX512BW-NEXT:    addb %cl, %al
670; AVX512BW-NEXT:    movl %eax, %ecx
671; AVX512BW-NEXT:    shrb $7, %cl
672; AVX512BW-NEXT:    sarb $2, %al
673; AVX512BW-NEXT:    addb %cl, %al
674; AVX512BW-NEXT:    movzbl %al, %eax
675; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
676; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
677; AVX512BW-NEXT:    movsbl %al, %eax
678; AVX512BW-NEXT:    imull $-109, %eax, %ecx
679; AVX512BW-NEXT:    shrl $8, %ecx
680; AVX512BW-NEXT:    addb %cl, %al
681; AVX512BW-NEXT:    movl %eax, %ecx
682; AVX512BW-NEXT:    shrb $7, %cl
683; AVX512BW-NEXT:    sarb $2, %al
684; AVX512BW-NEXT:    addb %cl, %al
685; AVX512BW-NEXT:    movzbl %al, %eax
686; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
687; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
688; AVX512BW-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
689; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
690; AVX512BW-NEXT:    movsbl %al, %eax
691; AVX512BW-NEXT:    imull $-109, %eax, %ecx
692; AVX512BW-NEXT:    shrl $8, %ecx
693; AVX512BW-NEXT:    addb %cl, %al
694; AVX512BW-NEXT:    movl %eax, %ecx
695; AVX512BW-NEXT:    shrb $7, %cl
696; AVX512BW-NEXT:    sarb $2, %al
697; AVX512BW-NEXT:    addb %cl, %al
698; AVX512BW-NEXT:    movzbl %al, %eax
699; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
700; AVX512BW-NEXT:    movsbl %cl, %ecx
701; AVX512BW-NEXT:    imull $-109, %ecx, %edx
702; AVX512BW-NEXT:    shrl $8, %edx
703; AVX512BW-NEXT:    addb %dl, %cl
704; AVX512BW-NEXT:    movl %ecx, %edx
705; AVX512BW-NEXT:    shrb $7, %dl
706; AVX512BW-NEXT:    sarb $2, %cl
707; AVX512BW-NEXT:    addb %dl, %cl
708; AVX512BW-NEXT:    movzbl %cl, %ecx
709; AVX512BW-NEXT:    vmovd %ecx, %xmm3
710; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
711; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
712; AVX512BW-NEXT:    movsbl %al, %eax
713; AVX512BW-NEXT:    imull $-109, %eax, %ecx
714; AVX512BW-NEXT:    shrl $8, %ecx
715; AVX512BW-NEXT:    addb %cl, %al
716; AVX512BW-NEXT:    movl %eax, %ecx
717; AVX512BW-NEXT:    shrb $7, %cl
718; AVX512BW-NEXT:    sarb $2, %al
719; AVX512BW-NEXT:    addb %cl, %al
720; AVX512BW-NEXT:    movzbl %al, %eax
721; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
722; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
723; AVX512BW-NEXT:    movsbl %al, %eax
724; AVX512BW-NEXT:    imull $-109, %eax, %ecx
725; AVX512BW-NEXT:    shrl $8, %ecx
726; AVX512BW-NEXT:    addb %cl, %al
727; AVX512BW-NEXT:    movl %eax, %ecx
728; AVX512BW-NEXT:    shrb $7, %cl
729; AVX512BW-NEXT:    sarb $2, %al
730; AVX512BW-NEXT:    addb %cl, %al
731; AVX512BW-NEXT:    movzbl %al, %eax
732; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
733; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
734; AVX512BW-NEXT:    movsbl %al, %eax
735; AVX512BW-NEXT:    imull $-109, %eax, %ecx
736; AVX512BW-NEXT:    shrl $8, %ecx
737; AVX512BW-NEXT:    addb %cl, %al
738; AVX512BW-NEXT:    movl %eax, %ecx
739; AVX512BW-NEXT:    shrb $7, %cl
740; AVX512BW-NEXT:    sarb $2, %al
741; AVX512BW-NEXT:    addb %cl, %al
742; AVX512BW-NEXT:    movzbl %al, %eax
743; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
744; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
745; AVX512BW-NEXT:    movsbl %al, %eax
746; AVX512BW-NEXT:    imull $-109, %eax, %ecx
747; AVX512BW-NEXT:    shrl $8, %ecx
748; AVX512BW-NEXT:    addb %cl, %al
749; AVX512BW-NEXT:    movl %eax, %ecx
750; AVX512BW-NEXT:    shrb $7, %cl
751; AVX512BW-NEXT:    sarb $2, %al
752; AVX512BW-NEXT:    addb %cl, %al
753; AVX512BW-NEXT:    movzbl %al, %eax
754; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
755; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
756; AVX512BW-NEXT:    movsbl %al, %eax
757; AVX512BW-NEXT:    imull $-109, %eax, %ecx
758; AVX512BW-NEXT:    shrl $8, %ecx
759; AVX512BW-NEXT:    addb %cl, %al
760; AVX512BW-NEXT:    movl %eax, %ecx
761; AVX512BW-NEXT:    shrb $7, %cl
762; AVX512BW-NEXT:    sarb $2, %al
763; AVX512BW-NEXT:    addb %cl, %al
764; AVX512BW-NEXT:    movzbl %al, %eax
765; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
766; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
767; AVX512BW-NEXT:    movsbl %al, %eax
768; AVX512BW-NEXT:    imull $-109, %eax, %ecx
769; AVX512BW-NEXT:    shrl $8, %ecx
770; AVX512BW-NEXT:    addb %cl, %al
771; AVX512BW-NEXT:    movl %eax, %ecx
772; AVX512BW-NEXT:    shrb $7, %cl
773; AVX512BW-NEXT:    sarb $2, %al
774; AVX512BW-NEXT:    addb %cl, %al
775; AVX512BW-NEXT:    movzbl %al, %eax
776; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
777; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
778; AVX512BW-NEXT:    movsbl %al, %eax
779; AVX512BW-NEXT:    imull $-109, %eax, %ecx
780; AVX512BW-NEXT:    shrl $8, %ecx
781; AVX512BW-NEXT:    addb %cl, %al
782; AVX512BW-NEXT:    movl %eax, %ecx
783; AVX512BW-NEXT:    shrb $7, %cl
784; AVX512BW-NEXT:    sarb $2, %al
785; AVX512BW-NEXT:    addb %cl, %al
786; AVX512BW-NEXT:    movzbl %al, %eax
787; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
788; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
789; AVX512BW-NEXT:    movsbl %al, %eax
790; AVX512BW-NEXT:    imull $-109, %eax, %ecx
791; AVX512BW-NEXT:    shrl $8, %ecx
792; AVX512BW-NEXT:    addb %cl, %al
793; AVX512BW-NEXT:    movl %eax, %ecx
794; AVX512BW-NEXT:    shrb $7, %cl
795; AVX512BW-NEXT:    sarb $2, %al
796; AVX512BW-NEXT:    addb %cl, %al
797; AVX512BW-NEXT:    movzbl %al, %eax
798; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
799; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
800; AVX512BW-NEXT:    movsbl %al, %eax
801; AVX512BW-NEXT:    imull $-109, %eax, %ecx
802; AVX512BW-NEXT:    shrl $8, %ecx
803; AVX512BW-NEXT:    addb %cl, %al
804; AVX512BW-NEXT:    movl %eax, %ecx
805; AVX512BW-NEXT:    shrb $7, %cl
806; AVX512BW-NEXT:    sarb $2, %al
807; AVX512BW-NEXT:    addb %cl, %al
808; AVX512BW-NEXT:    movzbl %al, %eax
809; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
810; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
811; AVX512BW-NEXT:    movsbl %al, %eax
812; AVX512BW-NEXT:    imull $-109, %eax, %ecx
813; AVX512BW-NEXT:    shrl $8, %ecx
814; AVX512BW-NEXT:    addb %cl, %al
815; AVX512BW-NEXT:    movl %eax, %ecx
816; AVX512BW-NEXT:    shrb $7, %cl
817; AVX512BW-NEXT:    sarb $2, %al
818; AVX512BW-NEXT:    addb %cl, %al
819; AVX512BW-NEXT:    movzbl %al, %eax
820; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
821; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
822; AVX512BW-NEXT:    movsbl %al, %eax
823; AVX512BW-NEXT:    imull $-109, %eax, %ecx
824; AVX512BW-NEXT:    shrl $8, %ecx
825; AVX512BW-NEXT:    addb %cl, %al
826; AVX512BW-NEXT:    movl %eax, %ecx
827; AVX512BW-NEXT:    shrb $7, %cl
828; AVX512BW-NEXT:    sarb $2, %al
829; AVX512BW-NEXT:    addb %cl, %al
830; AVX512BW-NEXT:    movzbl %al, %eax
831; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
832; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
833; AVX512BW-NEXT:    movsbl %al, %eax
834; AVX512BW-NEXT:    imull $-109, %eax, %ecx
835; AVX512BW-NEXT:    shrl $8, %ecx
836; AVX512BW-NEXT:    addb %cl, %al
837; AVX512BW-NEXT:    movl %eax, %ecx
838; AVX512BW-NEXT:    shrb $7, %cl
839; AVX512BW-NEXT:    sarb $2, %al
840; AVX512BW-NEXT:    addb %cl, %al
841; AVX512BW-NEXT:    movzbl %al, %eax
842; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
843; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
844; AVX512BW-NEXT:    movsbl %al, %eax
845; AVX512BW-NEXT:    imull $-109, %eax, %ecx
846; AVX512BW-NEXT:    shrl $8, %ecx
847; AVX512BW-NEXT:    addb %cl, %al
848; AVX512BW-NEXT:    movl %eax, %ecx
849; AVX512BW-NEXT:    shrb $7, %cl
850; AVX512BW-NEXT:    sarb $2, %al
851; AVX512BW-NEXT:    addb %cl, %al
852; AVX512BW-NEXT:    movzbl %al, %eax
853; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
854; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
855; AVX512BW-NEXT:    movsbl %al, %eax
856; AVX512BW-NEXT:    imull $-109, %eax, %ecx
857; AVX512BW-NEXT:    shrl $8, %ecx
858; AVX512BW-NEXT:    addb %cl, %al
859; AVX512BW-NEXT:    movl %eax, %ecx
860; AVX512BW-NEXT:    shrb $7, %cl
861; AVX512BW-NEXT:    sarb $2, %al
862; AVX512BW-NEXT:    addb %cl, %al
863; AVX512BW-NEXT:    movzbl %al, %eax
864; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
865; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
866; AVX512BW-NEXT:    movsbl %al, %eax
867; AVX512BW-NEXT:    imull $-109, %eax, %ecx
868; AVX512BW-NEXT:    shrl $8, %ecx
869; AVX512BW-NEXT:    addb %cl, %al
870; AVX512BW-NEXT:    movl %eax, %ecx
871; AVX512BW-NEXT:    shrb $7, %cl
872; AVX512BW-NEXT:    sarb $2, %al
873; AVX512BW-NEXT:    addb %cl, %al
874; AVX512BW-NEXT:    movzbl %al, %eax
875; AVX512BW-NEXT:    vpextrb $0, %xmm0, %ecx
876; AVX512BW-NEXT:    movsbl %cl, %ecx
877; AVX512BW-NEXT:    imull $-109, %ecx, %edx
878; AVX512BW-NEXT:    shrl $8, %edx
879; AVX512BW-NEXT:    addb %dl, %cl
880; AVX512BW-NEXT:    movl %ecx, %edx
881; AVX512BW-NEXT:    shrb $7, %dl
882; AVX512BW-NEXT:    sarb $2, %cl
883; AVX512BW-NEXT:    addb %dl, %cl
884; AVX512BW-NEXT:    movzbl %cl, %ecx
885; AVX512BW-NEXT:    vmovd %ecx, %xmm3
886; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
887; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
888; AVX512BW-NEXT:    movsbl %al, %eax
889; AVX512BW-NEXT:    imull $-109, %eax, %ecx
890; AVX512BW-NEXT:    shrl $8, %ecx
891; AVX512BW-NEXT:    addb %cl, %al
892; AVX512BW-NEXT:    movl %eax, %ecx
893; AVX512BW-NEXT:    shrb $7, %cl
894; AVX512BW-NEXT:    sarb $2, %al
895; AVX512BW-NEXT:    addb %cl, %al
896; AVX512BW-NEXT:    movzbl %al, %eax
897; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
898; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
899; AVX512BW-NEXT:    movsbl %al, %eax
900; AVX512BW-NEXT:    imull $-109, %eax, %ecx
901; AVX512BW-NEXT:    shrl $8, %ecx
902; AVX512BW-NEXT:    addb %cl, %al
903; AVX512BW-NEXT:    movl %eax, %ecx
904; AVX512BW-NEXT:    shrb $7, %cl
905; AVX512BW-NEXT:    sarb $2, %al
906; AVX512BW-NEXT:    addb %cl, %al
907; AVX512BW-NEXT:    movzbl %al, %eax
908; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
909; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
910; AVX512BW-NEXT:    movsbl %al, %eax
911; AVX512BW-NEXT:    imull $-109, %eax, %ecx
912; AVX512BW-NEXT:    shrl $8, %ecx
913; AVX512BW-NEXT:    addb %cl, %al
914; AVX512BW-NEXT:    movl %eax, %ecx
915; AVX512BW-NEXT:    shrb $7, %cl
916; AVX512BW-NEXT:    sarb $2, %al
917; AVX512BW-NEXT:    addb %cl, %al
918; AVX512BW-NEXT:    movzbl %al, %eax
919; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
920; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
921; AVX512BW-NEXT:    movsbl %al, %eax
922; AVX512BW-NEXT:    imull $-109, %eax, %ecx
923; AVX512BW-NEXT:    shrl $8, %ecx
924; AVX512BW-NEXT:    addb %cl, %al
925; AVX512BW-NEXT:    movl %eax, %ecx
926; AVX512BW-NEXT:    shrb $7, %cl
927; AVX512BW-NEXT:    sarb $2, %al
928; AVX512BW-NEXT:    addb %cl, %al
929; AVX512BW-NEXT:    movzbl %al, %eax
930; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
931; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
932; AVX512BW-NEXT:    movsbl %al, %eax
933; AVX512BW-NEXT:    imull $-109, %eax, %ecx
934; AVX512BW-NEXT:    shrl $8, %ecx
935; AVX512BW-NEXT:    addb %cl, %al
936; AVX512BW-NEXT:    movl %eax, %ecx
937; AVX512BW-NEXT:    shrb $7, %cl
938; AVX512BW-NEXT:    sarb $2, %al
939; AVX512BW-NEXT:    addb %cl, %al
940; AVX512BW-NEXT:    movzbl %al, %eax
941; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
942; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
943; AVX512BW-NEXT:    movsbl %al, %eax
944; AVX512BW-NEXT:    imull $-109, %eax, %ecx
945; AVX512BW-NEXT:    shrl $8, %ecx
946; AVX512BW-NEXT:    addb %cl, %al
947; AVX512BW-NEXT:    movl %eax, %ecx
948; AVX512BW-NEXT:    shrb $7, %cl
949; AVX512BW-NEXT:    sarb $2, %al
950; AVX512BW-NEXT:    addb %cl, %al
951; AVX512BW-NEXT:    movzbl %al, %eax
952; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
953; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
954; AVX512BW-NEXT:    movsbl %al, %eax
955; AVX512BW-NEXT:    imull $-109, %eax, %ecx
956; AVX512BW-NEXT:    shrl $8, %ecx
957; AVX512BW-NEXT:    addb %cl, %al
958; AVX512BW-NEXT:    movl %eax, %ecx
959; AVX512BW-NEXT:    shrb $7, %cl
960; AVX512BW-NEXT:    sarb $2, %al
961; AVX512BW-NEXT:    addb %cl, %al
962; AVX512BW-NEXT:    movzbl %al, %eax
963; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
964; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
965; AVX512BW-NEXT:    movsbl %al, %eax
966; AVX512BW-NEXT:    imull $-109, %eax, %ecx
967; AVX512BW-NEXT:    shrl $8, %ecx
968; AVX512BW-NEXT:    addb %cl, %al
969; AVX512BW-NEXT:    movl %eax, %ecx
970; AVX512BW-NEXT:    shrb $7, %cl
971; AVX512BW-NEXT:    sarb $2, %al
972; AVX512BW-NEXT:    addb %cl, %al
973; AVX512BW-NEXT:    movzbl %al, %eax
974; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
975; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
976; AVX512BW-NEXT:    movsbl %al, %eax
977; AVX512BW-NEXT:    imull $-109, %eax, %ecx
978; AVX512BW-NEXT:    shrl $8, %ecx
979; AVX512BW-NEXT:    addb %cl, %al
980; AVX512BW-NEXT:    movl %eax, %ecx
981; AVX512BW-NEXT:    shrb $7, %cl
982; AVX512BW-NEXT:    sarb $2, %al
983; AVX512BW-NEXT:    addb %cl, %al
984; AVX512BW-NEXT:    movzbl %al, %eax
985; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
986; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
987; AVX512BW-NEXT:    movsbl %al, %eax
988; AVX512BW-NEXT:    imull $-109, %eax, %ecx
989; AVX512BW-NEXT:    shrl $8, %ecx
990; AVX512BW-NEXT:    addb %cl, %al
991; AVX512BW-NEXT:    movl %eax, %ecx
992; AVX512BW-NEXT:    shrb $7, %cl
993; AVX512BW-NEXT:    sarb $2, %al
994; AVX512BW-NEXT:    addb %cl, %al
995; AVX512BW-NEXT:    movzbl %al, %eax
996; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
997; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
998; AVX512BW-NEXT:    movsbl %al, %eax
999; AVX512BW-NEXT:    imull $-109, %eax, %ecx
1000; AVX512BW-NEXT:    shrl $8, %ecx
1001; AVX512BW-NEXT:    addb %cl, %al
1002; AVX512BW-NEXT:    movl %eax, %ecx
1003; AVX512BW-NEXT:    shrb $7, %cl
1004; AVX512BW-NEXT:    sarb $2, %al
1005; AVX512BW-NEXT:    addb %cl, %al
1006; AVX512BW-NEXT:    movzbl %al, %eax
1007; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
1008; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
1009; AVX512BW-NEXT:    movsbl %al, %eax
1010; AVX512BW-NEXT:    imull $-109, %eax, %ecx
1011; AVX512BW-NEXT:    shrl $8, %ecx
1012; AVX512BW-NEXT:    addb %cl, %al
1013; AVX512BW-NEXT:    movl %eax, %ecx
1014; AVX512BW-NEXT:    shrb $7, %cl
1015; AVX512BW-NEXT:    sarb $2, %al
1016; AVX512BW-NEXT:    addb %cl, %al
1017; AVX512BW-NEXT:    movzbl %al, %eax
1018; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
1019; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
1020; AVX512BW-NEXT:    movsbl %al, %eax
1021; AVX512BW-NEXT:    imull $-109, %eax, %ecx
1022; AVX512BW-NEXT:    shrl $8, %ecx
1023; AVX512BW-NEXT:    addb %cl, %al
1024; AVX512BW-NEXT:    movl %eax, %ecx
1025; AVX512BW-NEXT:    shrb $7, %cl
1026; AVX512BW-NEXT:    sarb $2, %al
1027; AVX512BW-NEXT:    addb %cl, %al
1028; AVX512BW-NEXT:    movzbl %al, %eax
1029; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
1030; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
1031; AVX512BW-NEXT:    movsbl %al, %eax
1032; AVX512BW-NEXT:    imull $-109, %eax, %ecx
1033; AVX512BW-NEXT:    shrl $8, %ecx
1034; AVX512BW-NEXT:    addb %cl, %al
1035; AVX512BW-NEXT:    movl %eax, %ecx
1036; AVX512BW-NEXT:    shrb $7, %cl
1037; AVX512BW-NEXT:    sarb $2, %al
1038; AVX512BW-NEXT:    addb %cl, %al
1039; AVX512BW-NEXT:    movzbl %al, %eax
1040; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
1041; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1042; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1043; AVX512BW-NEXT:    retq
1044  %res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
1045  ret <64 x i8> %res
1046}
1047
1048;
1049; srem by 7
1050;
1051
1052define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
1053; AVX-LABEL: test_rem7_8i64:
1054; AVX:       # BB#0:
1055; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1056; AVX-NEXT:    vpextrq $1, %xmm1, %rcx
1057; AVX-NEXT:    movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925
1058; AVX-NEXT:    movq %rcx, %rax
1059; AVX-NEXT:    imulq %rsi
1060; AVX-NEXT:    movq %rdx, %rax
1061; AVX-NEXT:    shrq $63, %rax
1062; AVX-NEXT:    sarq %rdx
1063; AVX-NEXT:    addq %rax, %rdx
1064; AVX-NEXT:    leaq (,%rdx,8), %rax
1065; AVX-NEXT:    subq %rdx, %rax
1066; AVX-NEXT:    subq %rax, %rcx
1067; AVX-NEXT:    vmovq %rcx, %xmm2
1068; AVX-NEXT:    vmovq %xmm1, %rcx
1069; AVX-NEXT:    movq %rcx, %rax
1070; AVX-NEXT:    imulq %rsi
1071; AVX-NEXT:    movq %rdx, %rax
1072; AVX-NEXT:    shrq $63, %rax
1073; AVX-NEXT:    sarq %rdx
1074; AVX-NEXT:    addq %rax, %rdx
1075; AVX-NEXT:    leaq (,%rdx,8), %rax
1076; AVX-NEXT:    subq %rdx, %rax
1077; AVX-NEXT:    subq %rax, %rcx
1078; AVX-NEXT:    vmovq %rcx, %xmm1
1079; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1080; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1081; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
1082; AVX-NEXT:    movq %rcx, %rax
1083; AVX-NEXT:    imulq %rsi
1084; AVX-NEXT:    movq %rdx, %rax
1085; AVX-NEXT:    shrq $63, %rax
1086; AVX-NEXT:    sarq %rdx
1087; AVX-NEXT:    addq %rax, %rdx
1088; AVX-NEXT:    leaq (,%rdx,8), %rax
1089; AVX-NEXT:    subq %rdx, %rax
1090; AVX-NEXT:    subq %rax, %rcx
1091; AVX-NEXT:    vmovq %rcx, %xmm3
1092; AVX-NEXT:    vmovq %xmm2, %rcx
1093; AVX-NEXT:    movq %rcx, %rax
1094; AVX-NEXT:    imulq %rsi
1095; AVX-NEXT:    movq %rdx, %rax
1096; AVX-NEXT:    shrq $63, %rax
1097; AVX-NEXT:    sarq %rdx
1098; AVX-NEXT:    addq %rax, %rdx
1099; AVX-NEXT:    leaq (,%rdx,8), %rax
1100; AVX-NEXT:    subq %rdx, %rax
1101; AVX-NEXT:    subq %rax, %rcx
1102; AVX-NEXT:    vmovq %rcx, %xmm2
1103; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1104; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
1105; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
1106; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
1107; AVX-NEXT:    movq %rcx, %rax
1108; AVX-NEXT:    imulq %rsi
1109; AVX-NEXT:    movq %rdx, %rax
1110; AVX-NEXT:    shrq $63, %rax
1111; AVX-NEXT:    sarq %rdx
1112; AVX-NEXT:    addq %rax, %rdx
1113; AVX-NEXT:    leaq (,%rdx,8), %rax
1114; AVX-NEXT:    subq %rdx, %rax
1115; AVX-NEXT:    subq %rax, %rcx
1116; AVX-NEXT:    vmovq %rcx, %xmm3
1117; AVX-NEXT:    vmovq %xmm2, %rcx
1118; AVX-NEXT:    movq %rcx, %rax
1119; AVX-NEXT:    imulq %rsi
1120; AVX-NEXT:    movq %rdx, %rax
1121; AVX-NEXT:    shrq $63, %rax
1122; AVX-NEXT:    sarq %rdx
1123; AVX-NEXT:    addq %rax, %rdx
1124; AVX-NEXT:    leaq (,%rdx,8), %rax
1125; AVX-NEXT:    subq %rdx, %rax
1126; AVX-NEXT:    subq %rax, %rcx
1127; AVX-NEXT:    vmovq %rcx, %xmm2
1128; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1129; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
1130; AVX-NEXT:    movq %rcx, %rax
1131; AVX-NEXT:    imulq %rsi
1132; AVX-NEXT:    movq %rdx, %rax
1133; AVX-NEXT:    shrq $63, %rax
1134; AVX-NEXT:    sarq %rdx
1135; AVX-NEXT:    addq %rax, %rdx
1136; AVX-NEXT:    leaq (,%rdx,8), %rax
1137; AVX-NEXT:    subq %rdx, %rax
1138; AVX-NEXT:    subq %rax, %rcx
1139; AVX-NEXT:    vmovq %rcx, %xmm3
1140; AVX-NEXT:    vmovq %xmm0, %rcx
1141; AVX-NEXT:    movq %rcx, %rax
1142; AVX-NEXT:    imulq %rsi
1143; AVX-NEXT:    movq %rdx, %rax
1144; AVX-NEXT:    shrq $63, %rax
1145; AVX-NEXT:    sarq %rdx
1146; AVX-NEXT:    addq %rax, %rdx
1147; AVX-NEXT:    leaq (,%rdx,8), %rax
1148; AVX-NEXT:    subq %rdx, %rax
1149; AVX-NEXT:    subq %rax, %rcx
1150; AVX-NEXT:    vmovq %rcx, %xmm0
1151; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1152; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1153; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1154; AVX-NEXT:    retq
1155  %res = srem <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
1156  ret <8 x i64> %res
1157}
1158
1159define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
1160; AVX-LABEL: test_rem7_16i32:
1161; AVX:       # BB#0:
1162; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1163; AVX-NEXT:    vpextrd $1, %xmm1, %eax
1164; AVX-NEXT:    cltq
1165; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1166; AVX-NEXT:    shrq $32, %rcx
1167; AVX-NEXT:    addl %eax, %ecx
1168; AVX-NEXT:    movl %ecx, %edx
1169; AVX-NEXT:    shrl $31, %edx
1170; AVX-NEXT:    sarl $2, %ecx
1171; AVX-NEXT:    addl %edx, %ecx
1172; AVX-NEXT:    leal (,%rcx,8), %edx
1173; AVX-NEXT:    subl %ecx, %edx
1174; AVX-NEXT:    subl %edx, %eax
1175; AVX-NEXT:    vmovd %xmm1, %ecx
1176; AVX-NEXT:    movslq %ecx, %rcx
1177; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
1178; AVX-NEXT:    shrq $32, %rdx
1179; AVX-NEXT:    addl %ecx, %edx
1180; AVX-NEXT:    movl %edx, %esi
1181; AVX-NEXT:    shrl $31, %esi
1182; AVX-NEXT:    sarl $2, %edx
1183; AVX-NEXT:    addl %esi, %edx
1184; AVX-NEXT:    leal (,%rdx,8), %esi
1185; AVX-NEXT:    subl %edx, %esi
1186; AVX-NEXT:    subl %esi, %ecx
1187; AVX-NEXT:    vmovd %ecx, %xmm2
1188; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
1189; AVX-NEXT:    vpextrd $2, %xmm1, %eax
1190; AVX-NEXT:    cltq
1191; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1192; AVX-NEXT:    shrq $32, %rcx
1193; AVX-NEXT:    addl %eax, %ecx
1194; AVX-NEXT:    movl %ecx, %edx
1195; AVX-NEXT:    shrl $31, %edx
1196; AVX-NEXT:    sarl $2, %ecx
1197; AVX-NEXT:    addl %edx, %ecx
1198; AVX-NEXT:    leal (,%rcx,8), %edx
1199; AVX-NEXT:    subl %ecx, %edx
1200; AVX-NEXT:    subl %edx, %eax
1201; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
1202; AVX-NEXT:    vpextrd $3, %xmm1, %eax
1203; AVX-NEXT:    cltq
1204; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1205; AVX-NEXT:    shrq $32, %rcx
1206; AVX-NEXT:    addl %eax, %ecx
1207; AVX-NEXT:    movl %ecx, %edx
1208; AVX-NEXT:    shrl $31, %edx
1209; AVX-NEXT:    sarl $2, %ecx
1210; AVX-NEXT:    addl %edx, %ecx
1211; AVX-NEXT:    leal (,%rcx,8), %edx
1212; AVX-NEXT:    subl %ecx, %edx
1213; AVX-NEXT:    subl %edx, %eax
1214; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
1215; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1216; AVX-NEXT:    vpextrd $1, %xmm2, %eax
1217; AVX-NEXT:    cltq
1218; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1219; AVX-NEXT:    shrq $32, %rcx
1220; AVX-NEXT:    addl %eax, %ecx
1221; AVX-NEXT:    movl %ecx, %edx
1222; AVX-NEXT:    shrl $31, %edx
1223; AVX-NEXT:    sarl $2, %ecx
1224; AVX-NEXT:    addl %edx, %ecx
1225; AVX-NEXT:    leal (,%rcx,8), %edx
1226; AVX-NEXT:    subl %ecx, %edx
1227; AVX-NEXT:    subl %edx, %eax
1228; AVX-NEXT:    vmovd %xmm2, %ecx
1229; AVX-NEXT:    movslq %ecx, %rcx
1230; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
1231; AVX-NEXT:    shrq $32, %rdx
1232; AVX-NEXT:    addl %ecx, %edx
1233; AVX-NEXT:    movl %edx, %esi
1234; AVX-NEXT:    shrl $31, %esi
1235; AVX-NEXT:    sarl $2, %edx
1236; AVX-NEXT:    addl %esi, %edx
1237; AVX-NEXT:    leal (,%rdx,8), %esi
1238; AVX-NEXT:    subl %edx, %esi
1239; AVX-NEXT:    subl %esi, %ecx
1240; AVX-NEXT:    vmovd %ecx, %xmm3
1241; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
1242; AVX-NEXT:    vpextrd $2, %xmm2, %eax
1243; AVX-NEXT:    cltq
1244; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1245; AVX-NEXT:    shrq $32, %rcx
1246; AVX-NEXT:    addl %eax, %ecx
1247; AVX-NEXT:    movl %ecx, %edx
1248; AVX-NEXT:    shrl $31, %edx
1249; AVX-NEXT:    sarl $2, %ecx
1250; AVX-NEXT:    addl %edx, %ecx
1251; AVX-NEXT:    leal (,%rcx,8), %edx
1252; AVX-NEXT:    subl %ecx, %edx
1253; AVX-NEXT:    subl %edx, %eax
1254; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
1255; AVX-NEXT:    vpextrd $3, %xmm2, %eax
1256; AVX-NEXT:    cltq
1257; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1258; AVX-NEXT:    shrq $32, %rcx
1259; AVX-NEXT:    addl %eax, %ecx
1260; AVX-NEXT:    movl %ecx, %edx
1261; AVX-NEXT:    shrl $31, %edx
1262; AVX-NEXT:    sarl $2, %ecx
1263; AVX-NEXT:    addl %edx, %ecx
1264; AVX-NEXT:    leal (,%rcx,8), %edx
1265; AVX-NEXT:    subl %ecx, %edx
1266; AVX-NEXT:    subl %edx, %eax
1267; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
1268; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
1269; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
1270; AVX-NEXT:    vpextrd $1, %xmm2, %eax
1271; AVX-NEXT:    cltq
1272; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1273; AVX-NEXT:    shrq $32, %rcx
1274; AVX-NEXT:    addl %eax, %ecx
1275; AVX-NEXT:    movl %ecx, %edx
1276; AVX-NEXT:    shrl $31, %edx
1277; AVX-NEXT:    sarl $2, %ecx
1278; AVX-NEXT:    addl %edx, %ecx
1279; AVX-NEXT:    leal (,%rcx,8), %edx
1280; AVX-NEXT:    subl %ecx, %edx
1281; AVX-NEXT:    subl %edx, %eax
1282; AVX-NEXT:    vmovd %xmm2, %ecx
1283; AVX-NEXT:    movslq %ecx, %rcx
1284; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
1285; AVX-NEXT:    shrq $32, %rdx
1286; AVX-NEXT:    addl %ecx, %edx
1287; AVX-NEXT:    movl %edx, %esi
1288; AVX-NEXT:    shrl $31, %esi
1289; AVX-NEXT:    sarl $2, %edx
1290; AVX-NEXT:    addl %esi, %edx
1291; AVX-NEXT:    leal (,%rdx,8), %esi
1292; AVX-NEXT:    subl %edx, %esi
1293; AVX-NEXT:    subl %esi, %ecx
1294; AVX-NEXT:    vmovd %ecx, %xmm3
1295; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
1296; AVX-NEXT:    vpextrd $2, %xmm2, %eax
1297; AVX-NEXT:    cltq
1298; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1299; AVX-NEXT:    shrq $32, %rcx
1300; AVX-NEXT:    addl %eax, %ecx
1301; AVX-NEXT:    movl %ecx, %edx
1302; AVX-NEXT:    shrl $31, %edx
1303; AVX-NEXT:    sarl $2, %ecx
1304; AVX-NEXT:    addl %edx, %ecx
1305; AVX-NEXT:    leal (,%rcx,8), %edx
1306; AVX-NEXT:    subl %ecx, %edx
1307; AVX-NEXT:    subl %edx, %eax
1308; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
1309; AVX-NEXT:    vpextrd $3, %xmm2, %eax
1310; AVX-NEXT:    cltq
1311; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1312; AVX-NEXT:    shrq $32, %rcx
1313; AVX-NEXT:    addl %eax, %ecx
1314; AVX-NEXT:    movl %ecx, %edx
1315; AVX-NEXT:    shrl $31, %edx
1316; AVX-NEXT:    sarl $2, %ecx
1317; AVX-NEXT:    addl %edx, %ecx
1318; AVX-NEXT:    leal (,%rcx,8), %edx
1319; AVX-NEXT:    subl %ecx, %edx
1320; AVX-NEXT:    subl %edx, %eax
1321; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
1322; AVX-NEXT:    vpextrd $1, %xmm0, %eax
1323; AVX-NEXT:    cltq
1324; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1325; AVX-NEXT:    shrq $32, %rcx
1326; AVX-NEXT:    addl %eax, %ecx
1327; AVX-NEXT:    movl %ecx, %edx
1328; AVX-NEXT:    shrl $31, %edx
1329; AVX-NEXT:    sarl $2, %ecx
1330; AVX-NEXT:    addl %edx, %ecx
1331; AVX-NEXT:    leal (,%rcx,8), %edx
1332; AVX-NEXT:    subl %ecx, %edx
1333; AVX-NEXT:    subl %edx, %eax
1334; AVX-NEXT:    vmovd %xmm0, %ecx
1335; AVX-NEXT:    movslq %ecx, %rcx
1336; AVX-NEXT:    imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
1337; AVX-NEXT:    shrq $32, %rdx
1338; AVX-NEXT:    addl %ecx, %edx
1339; AVX-NEXT:    movl %edx, %esi
1340; AVX-NEXT:    shrl $31, %esi
1341; AVX-NEXT:    sarl $2, %edx
1342; AVX-NEXT:    addl %esi, %edx
1343; AVX-NEXT:    leal (,%rdx,8), %esi
1344; AVX-NEXT:    subl %edx, %esi
1345; AVX-NEXT:    subl %esi, %ecx
1346; AVX-NEXT:    vmovd %ecx, %xmm3
1347; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
1348; AVX-NEXT:    vpextrd $2, %xmm0, %eax
1349; AVX-NEXT:    cltq
1350; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1351; AVX-NEXT:    shrq $32, %rcx
1352; AVX-NEXT:    addl %eax, %ecx
1353; AVX-NEXT:    movl %ecx, %edx
1354; AVX-NEXT:    shrl $31, %edx
1355; AVX-NEXT:    sarl $2, %ecx
1356; AVX-NEXT:    addl %edx, %ecx
1357; AVX-NEXT:    leal (,%rcx,8), %edx
1358; AVX-NEXT:    subl %ecx, %edx
1359; AVX-NEXT:    subl %edx, %eax
1360; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
1361; AVX-NEXT:    vpextrd $3, %xmm0, %eax
1362; AVX-NEXT:    cltq
1363; AVX-NEXT:    imulq $-1840700269, %rax, %rcx # imm = 0x92492493
1364; AVX-NEXT:    shrq $32, %rcx
1365; AVX-NEXT:    addl %eax, %ecx
1366; AVX-NEXT:    movl %ecx, %edx
1367; AVX-NEXT:    shrl $31, %edx
1368; AVX-NEXT:    sarl $2, %ecx
1369; AVX-NEXT:    addl %edx, %ecx
1370; AVX-NEXT:    leal (,%rcx,8), %edx
1371; AVX-NEXT:    subl %ecx, %edx
1372; AVX-NEXT:    subl %edx, %eax
1373; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
1374; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1375; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1376; AVX-NEXT:    retq
1377  %res = srem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
1378  ret <16 x i32> %res
1379}
1380
1381define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
1382; AVX512F-LABEL: test_rem7_32i16:
1383; AVX512F:       # BB#0:
1384; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725]
1385; AVX512F-NEXT:    vpmulhw %ymm2, %ymm0, %ymm3
1386; AVX512F-NEXT:    vpsrlw $15, %ymm3, %ymm4
1387; AVX512F-NEXT:    vpsraw $1, %ymm3, %ymm3
1388; AVX512F-NEXT:    vpaddw %ymm4, %ymm3, %ymm3
1389; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1390; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
1391; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
1392; AVX512F-NEXT:    vpmulhw %ymm2, %ymm1, %ymm2
1393; AVX512F-NEXT:    vpsrlw $15, %ymm2, %ymm3
1394; AVX512F-NEXT:    vpsraw $1, %ymm2, %ymm2
1395; AVX512F-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
1396; AVX512F-NEXT:    vpmullw %ymm4, %ymm2, %ymm2
1397; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
1398; AVX512F-NEXT:    retq
1399;
1400; AVX512BW-LABEL: test_rem7_32i16:
1401; AVX512BW:       # BB#0:
1402; AVX512BW-NEXT:    vpmulhw {{.*}}(%rip), %zmm0, %zmm1
1403; AVX512BW-NEXT:    vpsrlw $15, %zmm1, %zmm2
1404; AVX512BW-NEXT:    vpsraw $1, %zmm1, %zmm1
1405; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
1406; AVX512BW-NEXT:    vpmullw {{.*}}(%rip), %zmm1, %zmm1
1407; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
1408; AVX512BW-NEXT:    retq
1409  %res = srem <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1410  ret <32 x i16> %res
1411}
1412
1413define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
1414; AVX512F-LABEL: test_rem7_64i8:
1415; AVX512F:       # BB#0:
1416; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147]
1417; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm2
1418; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
1419; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
1420; AVX512F-NEXT:    vpmovsxbw %xmm4, %ymm4
1421; AVX512F-NEXT:    vpmullw %ymm2, %ymm4, %ymm4
1422; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
1423; AVX512F-NEXT:    vpmovsxbw %xmm3, %ymm3
1424; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm5
1425; AVX512F-NEXT:    vpmullw %ymm3, %ymm5, %ymm5
1426; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5
1427; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm5[2,3],ymm4[2,3]
1428; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
1429; AVX512F-NEXT:    vpackuswb %ymm6, %ymm4, %ymm4
1430; AVX512F-NEXT:    vpaddb %ymm0, %ymm4, %ymm4
1431; AVX512F-NEXT:    vpsrlw $7, %ymm4, %ymm6
1432; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm10 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1433; AVX512F-NEXT:    vpand %ymm10, %ymm6, %ymm8
1434; AVX512F-NEXT:    vpsrlw $2, %ymm4, %ymm4
1435; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
1436; AVX512F-NEXT:    vpand %ymm6, %ymm4, %ymm4
1437; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
1438; AVX512F-NEXT:    vpxor %ymm7, %ymm4, %ymm4
1439; AVX512F-NEXT:    vpsubb %ymm7, %ymm4, %ymm4
1440; AVX512F-NEXT:    vpaddb %ymm8, %ymm4, %ymm8
1441; AVX512F-NEXT:    vpmovsxbw %xmm8, %ymm9
1442; AVX512F-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm4
1443; AVX512F-NEXT:    vpmullw %ymm4, %ymm9, %ymm9
1444; AVX512F-NEXT:    vpmovsxwd %ymm9, %zmm9
1445; AVX512F-NEXT:    vpmovdb %zmm9, %xmm9
1446; AVX512F-NEXT:    vextracti128 $1, %ymm8, %xmm5
1447; AVX512F-NEXT:    vpmovsxbw %xmm5, %ymm5
1448; AVX512F-NEXT:    vpmullw %ymm4, %ymm5, %ymm5
1449; AVX512F-NEXT:    vpmovsxwd %ymm5, %zmm5
1450; AVX512F-NEXT:    vpmovdb %zmm5, %xmm5
1451; AVX512F-NEXT:    vinserti128 $1, %xmm5, %ymm9, %ymm5
1452; AVX512F-NEXT:    vpsubb %ymm5, %ymm0, %ymm0
1453; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm5
1454; AVX512F-NEXT:    vpmovsxbw %xmm5, %ymm5
1455; AVX512F-NEXT:    vpmullw %ymm2, %ymm5, %ymm2
1456; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
1457; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm5
1458; AVX512F-NEXT:    vpmullw %ymm3, %ymm5, %ymm3
1459; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
1460; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm5 = ymm3[2,3],ymm2[2,3]
1461; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
1462; AVX512F-NEXT:    vpackuswb %ymm5, %ymm2, %ymm2
1463; AVX512F-NEXT:    vpaddb %ymm1, %ymm2, %ymm2
1464; AVX512F-NEXT:    vpsrlw $7, %ymm2, %ymm3
1465; AVX512F-NEXT:    vpand %ymm10, %ymm3, %ymm3
1466; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
1467; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
1468; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
1469; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
1470; AVX512F-NEXT:    vpaddb %ymm3, %ymm2, %ymm2
1471; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm3
1472; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
1473; AVX512F-NEXT:    vpmovsxwd %ymm3, %zmm3
1474; AVX512F-NEXT:    vpmovdb %zmm3, %xmm3
1475; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
1476; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
1477; AVX512F-NEXT:    vpmullw %ymm4, %ymm2, %ymm2
1478; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
1479; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
1480; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
1481; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
1482; AVX512F-NEXT:    retq
1483;
1484; AVX512BW-LABEL: test_rem7_64i8:
1485; AVX512BW:       # BB#0:
1486; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1487; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
1488; AVX512BW-NEXT:    movsbl %al, %edx
1489; AVX512BW-NEXT:    imull $-109, %edx, %eax
1490; AVX512BW-NEXT:    shrl $8, %eax
1491; AVX512BW-NEXT:    addb %dl, %al
1492; AVX512BW-NEXT:    movl %eax, %ecx
1493; AVX512BW-NEXT:    shrb $7, %cl
1494; AVX512BW-NEXT:    sarb $2, %al
1495; AVX512BW-NEXT:    addb %cl, %al
1496; AVX512BW-NEXT:    movb $7, %dil
1497; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1498; AVX512BW-NEXT:    mulb %dil
1499; AVX512BW-NEXT:    subb %al, %dl
1500; AVX512BW-NEXT:    movzbl %dl, %edx
1501; AVX512BW-NEXT:    vpextrb $0, %xmm1, %eax
1502; AVX512BW-NEXT:    movsbl %al, %esi
1503; AVX512BW-NEXT:    imull $-109, %esi, %eax
1504; AVX512BW-NEXT:    shrl $8, %eax
1505; AVX512BW-NEXT:    addb %sil, %al
1506; AVX512BW-NEXT:    movl %eax, %ecx
1507; AVX512BW-NEXT:    shrb $7, %cl
1508; AVX512BW-NEXT:    sarb $2, %al
1509; AVX512BW-NEXT:    addb %cl, %al
1510; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1511; AVX512BW-NEXT:    mulb %dil
1512; AVX512BW-NEXT:    subb %al, %sil
1513; AVX512BW-NEXT:    movzbl %sil, %eax
1514; AVX512BW-NEXT:    vmovd %eax, %xmm2
1515; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
1516; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
1517; AVX512BW-NEXT:    movsbl %al, %ecx
1518; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1519; AVX512BW-NEXT:    shrl $8, %eax
1520; AVX512BW-NEXT:    addb %cl, %al
1521; AVX512BW-NEXT:    movl %eax, %edx
1522; AVX512BW-NEXT:    shrb $7, %dl
1523; AVX512BW-NEXT:    sarb $2, %al
1524; AVX512BW-NEXT:    addb %dl, %al
1525; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1526; AVX512BW-NEXT:    mulb %dil
1527; AVX512BW-NEXT:    subb %al, %cl
1528; AVX512BW-NEXT:    movzbl %cl, %eax
1529; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1530; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
1531; AVX512BW-NEXT:    movsbl %al, %ecx
1532; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1533; AVX512BW-NEXT:    shrl $8, %eax
1534; AVX512BW-NEXT:    addb %cl, %al
1535; AVX512BW-NEXT:    movl %eax, %edx
1536; AVX512BW-NEXT:    shrb $7, %dl
1537; AVX512BW-NEXT:    sarb $2, %al
1538; AVX512BW-NEXT:    addb %dl, %al
1539; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1540; AVX512BW-NEXT:    mulb %dil
1541; AVX512BW-NEXT:    subb %al, %cl
1542; AVX512BW-NEXT:    movzbl %cl, %eax
1543; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1544; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
1545; AVX512BW-NEXT:    movsbl %al, %ecx
1546; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1547; AVX512BW-NEXT:    shrl $8, %eax
1548; AVX512BW-NEXT:    addb %cl, %al
1549; AVX512BW-NEXT:    movl %eax, %edx
1550; AVX512BW-NEXT:    shrb $7, %dl
1551; AVX512BW-NEXT:    sarb $2, %al
1552; AVX512BW-NEXT:    addb %dl, %al
1553; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1554; AVX512BW-NEXT:    mulb %dil
1555; AVX512BW-NEXT:    subb %al, %cl
1556; AVX512BW-NEXT:    movzbl %cl, %eax
1557; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1558; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
1559; AVX512BW-NEXT:    movsbl %al, %ecx
1560; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1561; AVX512BW-NEXT:    shrl $8, %eax
1562; AVX512BW-NEXT:    addb %cl, %al
1563; AVX512BW-NEXT:    movl %eax, %edx
1564; AVX512BW-NEXT:    shrb $7, %dl
1565; AVX512BW-NEXT:    sarb $2, %al
1566; AVX512BW-NEXT:    addb %dl, %al
1567; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1568; AVX512BW-NEXT:    mulb %dil
1569; AVX512BW-NEXT:    subb %al, %cl
1570; AVX512BW-NEXT:    movzbl %cl, %eax
1571; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1572; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
1573; AVX512BW-NEXT:    movsbl %al, %ecx
1574; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1575; AVX512BW-NEXT:    shrl $8, %eax
1576; AVX512BW-NEXT:    addb %cl, %al
1577; AVX512BW-NEXT:    movl %eax, %edx
1578; AVX512BW-NEXT:    shrb $7, %dl
1579; AVX512BW-NEXT:    sarb $2, %al
1580; AVX512BW-NEXT:    addb %dl, %al
1581; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1582; AVX512BW-NEXT:    mulb %dil
1583; AVX512BW-NEXT:    subb %al, %cl
1584; AVX512BW-NEXT:    movzbl %cl, %eax
1585; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1586; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
1587; AVX512BW-NEXT:    movsbl %al, %ecx
1588; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1589; AVX512BW-NEXT:    shrl $8, %eax
1590; AVX512BW-NEXT:    addb %cl, %al
1591; AVX512BW-NEXT:    movl %eax, %edx
1592; AVX512BW-NEXT:    shrb $7, %dl
1593; AVX512BW-NEXT:    sarb $2, %al
1594; AVX512BW-NEXT:    addb %dl, %al
1595; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1596; AVX512BW-NEXT:    mulb %dil
1597; AVX512BW-NEXT:    subb %al, %cl
1598; AVX512BW-NEXT:    movzbl %cl, %eax
1599; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1600; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
1601; AVX512BW-NEXT:    movsbl %al, %ecx
1602; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1603; AVX512BW-NEXT:    shrl $8, %eax
1604; AVX512BW-NEXT:    addb %cl, %al
1605; AVX512BW-NEXT:    movl %eax, %edx
1606; AVX512BW-NEXT:    shrb $7, %dl
1607; AVX512BW-NEXT:    sarb $2, %al
1608; AVX512BW-NEXT:    addb %dl, %al
1609; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1610; AVX512BW-NEXT:    mulb %dil
1611; AVX512BW-NEXT:    subb %al, %cl
1612; AVX512BW-NEXT:    movzbl %cl, %eax
1613; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1614; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
1615; AVX512BW-NEXT:    movsbl %al, %ecx
1616; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1617; AVX512BW-NEXT:    shrl $8, %eax
1618; AVX512BW-NEXT:    addb %cl, %al
1619; AVX512BW-NEXT:    movl %eax, %edx
1620; AVX512BW-NEXT:    shrb $7, %dl
1621; AVX512BW-NEXT:    sarb $2, %al
1622; AVX512BW-NEXT:    addb %dl, %al
1623; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1624; AVX512BW-NEXT:    mulb %dil
1625; AVX512BW-NEXT:    subb %al, %cl
1626; AVX512BW-NEXT:    movzbl %cl, %eax
1627; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1628; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
1629; AVX512BW-NEXT:    movsbl %al, %ecx
1630; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1631; AVX512BW-NEXT:    shrl $8, %eax
1632; AVX512BW-NEXT:    addb %cl, %al
1633; AVX512BW-NEXT:    movl %eax, %edx
1634; AVX512BW-NEXT:    shrb $7, %dl
1635; AVX512BW-NEXT:    sarb $2, %al
1636; AVX512BW-NEXT:    addb %dl, %al
1637; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1638; AVX512BW-NEXT:    mulb %dil
1639; AVX512BW-NEXT:    subb %al, %cl
1640; AVX512BW-NEXT:    movzbl %cl, %eax
1641; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1642; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
1643; AVX512BW-NEXT:    movsbl %al, %ecx
1644; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1645; AVX512BW-NEXT:    shrl $8, %eax
1646; AVX512BW-NEXT:    addb %cl, %al
1647; AVX512BW-NEXT:    movl %eax, %edx
1648; AVX512BW-NEXT:    shrb $7, %dl
1649; AVX512BW-NEXT:    sarb $2, %al
1650; AVX512BW-NEXT:    addb %dl, %al
1651; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1652; AVX512BW-NEXT:    mulb %dil
1653; AVX512BW-NEXT:    subb %al, %cl
1654; AVX512BW-NEXT:    movzbl %cl, %eax
1655; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1656; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
1657; AVX512BW-NEXT:    movsbl %al, %ecx
1658; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1659; AVX512BW-NEXT:    shrl $8, %eax
1660; AVX512BW-NEXT:    addb %cl, %al
1661; AVX512BW-NEXT:    movl %eax, %edx
1662; AVX512BW-NEXT:    shrb $7, %dl
1663; AVX512BW-NEXT:    sarb $2, %al
1664; AVX512BW-NEXT:    addb %dl, %al
1665; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1666; AVX512BW-NEXT:    mulb %dil
1667; AVX512BW-NEXT:    subb %al, %cl
1668; AVX512BW-NEXT:    movzbl %cl, %eax
1669; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1670; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
1671; AVX512BW-NEXT:    movsbl %al, %ecx
1672; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1673; AVX512BW-NEXT:    shrl $8, %eax
1674; AVX512BW-NEXT:    addb %cl, %al
1675; AVX512BW-NEXT:    movl %eax, %edx
1676; AVX512BW-NEXT:    shrb $7, %dl
1677; AVX512BW-NEXT:    sarb $2, %al
1678; AVX512BW-NEXT:    addb %dl, %al
1679; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1680; AVX512BW-NEXT:    mulb %dil
1681; AVX512BW-NEXT:    subb %al, %cl
1682; AVX512BW-NEXT:    movzbl %cl, %eax
1683; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1684; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
1685; AVX512BW-NEXT:    movsbl %al, %ecx
1686; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1687; AVX512BW-NEXT:    shrl $8, %eax
1688; AVX512BW-NEXT:    addb %cl, %al
1689; AVX512BW-NEXT:    movl %eax, %edx
1690; AVX512BW-NEXT:    shrb $7, %dl
1691; AVX512BW-NEXT:    sarb $2, %al
1692; AVX512BW-NEXT:    addb %dl, %al
1693; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1694; AVX512BW-NEXT:    mulb %dil
1695; AVX512BW-NEXT:    subb %al, %cl
1696; AVX512BW-NEXT:    movzbl %cl, %eax
1697; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1698; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
1699; AVX512BW-NEXT:    movsbl %al, %ecx
1700; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1701; AVX512BW-NEXT:    shrl $8, %eax
1702; AVX512BW-NEXT:    addb %cl, %al
1703; AVX512BW-NEXT:    movl %eax, %edx
1704; AVX512BW-NEXT:    shrb $7, %dl
1705; AVX512BW-NEXT:    sarb $2, %al
1706; AVX512BW-NEXT:    addb %dl, %al
1707; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1708; AVX512BW-NEXT:    mulb %dil
1709; AVX512BW-NEXT:    subb %al, %cl
1710; AVX512BW-NEXT:    movzbl %cl, %eax
1711; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
1712; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1713; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
1714; AVX512BW-NEXT:    movsbl %al, %ecx
1715; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1716; AVX512BW-NEXT:    shrl $8, %eax
1717; AVX512BW-NEXT:    addb %cl, %al
1718; AVX512BW-NEXT:    movl %eax, %edx
1719; AVX512BW-NEXT:    shrb $7, %dl
1720; AVX512BW-NEXT:    sarb $2, %al
1721; AVX512BW-NEXT:    addb %dl, %al
1722; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1723; AVX512BW-NEXT:    mulb %dil
1724; AVX512BW-NEXT:    subb %al, %cl
1725; AVX512BW-NEXT:    movzbl %cl, %esi
1726; AVX512BW-NEXT:    vpextrb $0, %xmm2, %eax
1727; AVX512BW-NEXT:    movsbl %al, %edx
1728; AVX512BW-NEXT:    imull $-109, %edx, %eax
1729; AVX512BW-NEXT:    shrl $8, %eax
1730; AVX512BW-NEXT:    addb %dl, %al
1731; AVX512BW-NEXT:    movl %eax, %ecx
1732; AVX512BW-NEXT:    shrb $7, %cl
1733; AVX512BW-NEXT:    sarb $2, %al
1734; AVX512BW-NEXT:    addb %cl, %al
1735; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1736; AVX512BW-NEXT:    mulb %dil
1737; AVX512BW-NEXT:    subb %al, %dl
1738; AVX512BW-NEXT:    movzbl %dl, %eax
1739; AVX512BW-NEXT:    vmovd %eax, %xmm3
1740; AVX512BW-NEXT:    vpinsrb $1, %esi, %xmm3, %xmm3
1741; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
1742; AVX512BW-NEXT:    movsbl %al, %ecx
1743; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1744; AVX512BW-NEXT:    shrl $8, %eax
1745; AVX512BW-NEXT:    addb %cl, %al
1746; AVX512BW-NEXT:    movl %eax, %edx
1747; AVX512BW-NEXT:    shrb $7, %dl
1748; AVX512BW-NEXT:    sarb $2, %al
1749; AVX512BW-NEXT:    addb %dl, %al
1750; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1751; AVX512BW-NEXT:    mulb %dil
1752; AVX512BW-NEXT:    subb %al, %cl
1753; AVX512BW-NEXT:    movzbl %cl, %eax
1754; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1755; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
1756; AVX512BW-NEXT:    movsbl %al, %ecx
1757; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1758; AVX512BW-NEXT:    shrl $8, %eax
1759; AVX512BW-NEXT:    addb %cl, %al
1760; AVX512BW-NEXT:    movl %eax, %edx
1761; AVX512BW-NEXT:    shrb $7, %dl
1762; AVX512BW-NEXT:    sarb $2, %al
1763; AVX512BW-NEXT:    addb %dl, %al
1764; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1765; AVX512BW-NEXT:    mulb %dil
1766; AVX512BW-NEXT:    subb %al, %cl
1767; AVX512BW-NEXT:    movzbl %cl, %eax
1768; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1769; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
1770; AVX512BW-NEXT:    movsbl %al, %ecx
1771; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1772; AVX512BW-NEXT:    shrl $8, %eax
1773; AVX512BW-NEXT:    addb %cl, %al
1774; AVX512BW-NEXT:    movl %eax, %edx
1775; AVX512BW-NEXT:    shrb $7, %dl
1776; AVX512BW-NEXT:    sarb $2, %al
1777; AVX512BW-NEXT:    addb %dl, %al
1778; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1779; AVX512BW-NEXT:    mulb %dil
1780; AVX512BW-NEXT:    subb %al, %cl
1781; AVX512BW-NEXT:    movzbl %cl, %eax
1782; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
1783; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
1784; AVX512BW-NEXT:    movsbl %al, %ecx
1785; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1786; AVX512BW-NEXT:    shrl $8, %eax
1787; AVX512BW-NEXT:    addb %cl, %al
1788; AVX512BW-NEXT:    movl %eax, %edx
1789; AVX512BW-NEXT:    shrb $7, %dl
1790; AVX512BW-NEXT:    sarb $2, %al
1791; AVX512BW-NEXT:    addb %dl, %al
1792; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1793; AVX512BW-NEXT:    mulb %dil
1794; AVX512BW-NEXT:    subb %al, %cl
1795; AVX512BW-NEXT:    movzbl %cl, %eax
1796; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
1797; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
1798; AVX512BW-NEXT:    movsbl %al, %ecx
1799; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1800; AVX512BW-NEXT:    shrl $8, %eax
1801; AVX512BW-NEXT:    addb %cl, %al
1802; AVX512BW-NEXT:    movl %eax, %edx
1803; AVX512BW-NEXT:    shrb $7, %dl
1804; AVX512BW-NEXT:    sarb $2, %al
1805; AVX512BW-NEXT:    addb %dl, %al
1806; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1807; AVX512BW-NEXT:    mulb %dil
1808; AVX512BW-NEXT:    subb %al, %cl
1809; AVX512BW-NEXT:    movzbl %cl, %eax
1810; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
1811; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
1812; AVX512BW-NEXT:    movsbl %al, %ecx
1813; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1814; AVX512BW-NEXT:    shrl $8, %eax
1815; AVX512BW-NEXT:    addb %cl, %al
1816; AVX512BW-NEXT:    movl %eax, %edx
1817; AVX512BW-NEXT:    shrb $7, %dl
1818; AVX512BW-NEXT:    sarb $2, %al
1819; AVX512BW-NEXT:    addb %dl, %al
1820; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1821; AVX512BW-NEXT:    mulb %dil
1822; AVX512BW-NEXT:    subb %al, %cl
1823; AVX512BW-NEXT:    movzbl %cl, %eax
1824; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
1825; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
1826; AVX512BW-NEXT:    movsbl %al, %ecx
1827; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1828; AVX512BW-NEXT:    shrl $8, %eax
1829; AVX512BW-NEXT:    addb %cl, %al
1830; AVX512BW-NEXT:    movl %eax, %edx
1831; AVX512BW-NEXT:    shrb $7, %dl
1832; AVX512BW-NEXT:    sarb $2, %al
1833; AVX512BW-NEXT:    addb %dl, %al
1834; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1835; AVX512BW-NEXT:    mulb %dil
1836; AVX512BW-NEXT:    subb %al, %cl
1837; AVX512BW-NEXT:    movzbl %cl, %eax
1838; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
1839; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
1840; AVX512BW-NEXT:    movsbl %al, %ecx
1841; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1842; AVX512BW-NEXT:    shrl $8, %eax
1843; AVX512BW-NEXT:    addb %cl, %al
1844; AVX512BW-NEXT:    movl %eax, %edx
1845; AVX512BW-NEXT:    shrb $7, %dl
1846; AVX512BW-NEXT:    sarb $2, %al
1847; AVX512BW-NEXT:    addb %dl, %al
1848; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1849; AVX512BW-NEXT:    mulb %dil
1850; AVX512BW-NEXT:    subb %al, %cl
1851; AVX512BW-NEXT:    movzbl %cl, %eax
1852; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
1853; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
1854; AVX512BW-NEXT:    movsbl %al, %ecx
1855; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1856; AVX512BW-NEXT:    shrl $8, %eax
1857; AVX512BW-NEXT:    addb %cl, %al
1858; AVX512BW-NEXT:    movl %eax, %edx
1859; AVX512BW-NEXT:    shrb $7, %dl
1860; AVX512BW-NEXT:    sarb $2, %al
1861; AVX512BW-NEXT:    addb %dl, %al
1862; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1863; AVX512BW-NEXT:    mulb %dil
1864; AVX512BW-NEXT:    subb %al, %cl
1865; AVX512BW-NEXT:    movzbl %cl, %eax
1866; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
1867; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
1868; AVX512BW-NEXT:    movsbl %al, %ecx
1869; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1870; AVX512BW-NEXT:    shrl $8, %eax
1871; AVX512BW-NEXT:    addb %cl, %al
1872; AVX512BW-NEXT:    movl %eax, %edx
1873; AVX512BW-NEXT:    shrb $7, %dl
1874; AVX512BW-NEXT:    sarb $2, %al
1875; AVX512BW-NEXT:    addb %dl, %al
1876; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1877; AVX512BW-NEXT:    mulb %dil
1878; AVX512BW-NEXT:    subb %al, %cl
1879; AVX512BW-NEXT:    movzbl %cl, %eax
1880; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
1881; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
1882; AVX512BW-NEXT:    movsbl %al, %ecx
1883; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1884; AVX512BW-NEXT:    shrl $8, %eax
1885; AVX512BW-NEXT:    addb %cl, %al
1886; AVX512BW-NEXT:    movl %eax, %edx
1887; AVX512BW-NEXT:    shrb $7, %dl
1888; AVX512BW-NEXT:    sarb $2, %al
1889; AVX512BW-NEXT:    addb %dl, %al
1890; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1891; AVX512BW-NEXT:    mulb %dil
1892; AVX512BW-NEXT:    subb %al, %cl
1893; AVX512BW-NEXT:    movzbl %cl, %eax
1894; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
1895; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
1896; AVX512BW-NEXT:    movsbl %al, %ecx
1897; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1898; AVX512BW-NEXT:    shrl $8, %eax
1899; AVX512BW-NEXT:    addb %cl, %al
1900; AVX512BW-NEXT:    movl %eax, %edx
1901; AVX512BW-NEXT:    shrb $7, %dl
1902; AVX512BW-NEXT:    sarb $2, %al
1903; AVX512BW-NEXT:    addb %dl, %al
1904; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1905; AVX512BW-NEXT:    mulb %dil
1906; AVX512BW-NEXT:    subb %al, %cl
1907; AVX512BW-NEXT:    movzbl %cl, %eax
1908; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
1909; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
1910; AVX512BW-NEXT:    movsbl %al, %ecx
1911; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1912; AVX512BW-NEXT:    shrl $8, %eax
1913; AVX512BW-NEXT:    addb %cl, %al
1914; AVX512BW-NEXT:    movl %eax, %edx
1915; AVX512BW-NEXT:    shrb $7, %dl
1916; AVX512BW-NEXT:    sarb $2, %al
1917; AVX512BW-NEXT:    addb %dl, %al
1918; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1919; AVX512BW-NEXT:    mulb %dil
1920; AVX512BW-NEXT:    subb %al, %cl
1921; AVX512BW-NEXT:    movzbl %cl, %eax
1922; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
1923; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
1924; AVX512BW-NEXT:    movsbl %al, %ecx
1925; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1926; AVX512BW-NEXT:    shrl $8, %eax
1927; AVX512BW-NEXT:    addb %cl, %al
1928; AVX512BW-NEXT:    movl %eax, %edx
1929; AVX512BW-NEXT:    shrb $7, %dl
1930; AVX512BW-NEXT:    sarb $2, %al
1931; AVX512BW-NEXT:    addb %dl, %al
1932; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1933; AVX512BW-NEXT:    mulb %dil
1934; AVX512BW-NEXT:    subb %al, %cl
1935; AVX512BW-NEXT:    movzbl %cl, %eax
1936; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
1937; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
1938; AVX512BW-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
1939; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
1940; AVX512BW-NEXT:    movsbl %al, %ecx
1941; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1942; AVX512BW-NEXT:    shrl $8, %eax
1943; AVX512BW-NEXT:    addb %cl, %al
1944; AVX512BW-NEXT:    movl %eax, %edx
1945; AVX512BW-NEXT:    shrb $7, %dl
1946; AVX512BW-NEXT:    sarb $2, %al
1947; AVX512BW-NEXT:    addb %dl, %al
1948; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1949; AVX512BW-NEXT:    mulb %dil
1950; AVX512BW-NEXT:    subb %al, %cl
1951; AVX512BW-NEXT:    movzbl %cl, %esi
1952; AVX512BW-NEXT:    vpextrb $0, %xmm2, %eax
1953; AVX512BW-NEXT:    movsbl %al, %edx
1954; AVX512BW-NEXT:    imull $-109, %edx, %eax
1955; AVX512BW-NEXT:    shrl $8, %eax
1956; AVX512BW-NEXT:    addb %dl, %al
1957; AVX512BW-NEXT:    movl %eax, %ecx
1958; AVX512BW-NEXT:    shrb $7, %cl
1959; AVX512BW-NEXT:    sarb $2, %al
1960; AVX512BW-NEXT:    addb %cl, %al
1961; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1962; AVX512BW-NEXT:    mulb %dil
1963; AVX512BW-NEXT:    subb %al, %dl
1964; AVX512BW-NEXT:    movzbl %dl, %eax
1965; AVX512BW-NEXT:    vmovd %eax, %xmm3
1966; AVX512BW-NEXT:    vpinsrb $1, %esi, %xmm3, %xmm3
1967; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
1968; AVX512BW-NEXT:    movsbl %al, %ecx
1969; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1970; AVX512BW-NEXT:    shrl $8, %eax
1971; AVX512BW-NEXT:    addb %cl, %al
1972; AVX512BW-NEXT:    movl %eax, %edx
1973; AVX512BW-NEXT:    shrb $7, %dl
1974; AVX512BW-NEXT:    sarb $2, %al
1975; AVX512BW-NEXT:    addb %dl, %al
1976; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1977; AVX512BW-NEXT:    mulb %dil
1978; AVX512BW-NEXT:    subb %al, %cl
1979; AVX512BW-NEXT:    movzbl %cl, %eax
1980; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1981; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
1982; AVX512BW-NEXT:    movsbl %al, %ecx
1983; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1984; AVX512BW-NEXT:    shrl $8, %eax
1985; AVX512BW-NEXT:    addb %cl, %al
1986; AVX512BW-NEXT:    movl %eax, %edx
1987; AVX512BW-NEXT:    shrb $7, %dl
1988; AVX512BW-NEXT:    sarb $2, %al
1989; AVX512BW-NEXT:    addb %dl, %al
1990; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
1991; AVX512BW-NEXT:    mulb %dil
1992; AVX512BW-NEXT:    subb %al, %cl
1993; AVX512BW-NEXT:    movzbl %cl, %eax
1994; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1995; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
1996; AVX512BW-NEXT:    movsbl %al, %ecx
1997; AVX512BW-NEXT:    imull $-109, %ecx, %eax
1998; AVX512BW-NEXT:    shrl $8, %eax
1999; AVX512BW-NEXT:    addb %cl, %al
2000; AVX512BW-NEXT:    movl %eax, %edx
2001; AVX512BW-NEXT:    shrb $7, %dl
2002; AVX512BW-NEXT:    sarb $2, %al
2003; AVX512BW-NEXT:    addb %dl, %al
2004; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2005; AVX512BW-NEXT:    mulb %dil
2006; AVX512BW-NEXT:    subb %al, %cl
2007; AVX512BW-NEXT:    movzbl %cl, %eax
2008; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
2009; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
2010; AVX512BW-NEXT:    movsbl %al, %ecx
2011; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2012; AVX512BW-NEXT:    shrl $8, %eax
2013; AVX512BW-NEXT:    addb %cl, %al
2014; AVX512BW-NEXT:    movl %eax, %edx
2015; AVX512BW-NEXT:    shrb $7, %dl
2016; AVX512BW-NEXT:    sarb $2, %al
2017; AVX512BW-NEXT:    addb %dl, %al
2018; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2019; AVX512BW-NEXT:    mulb %dil
2020; AVX512BW-NEXT:    subb %al, %cl
2021; AVX512BW-NEXT:    movzbl %cl, %eax
2022; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
2023; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
2024; AVX512BW-NEXT:    movsbl %al, %ecx
2025; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2026; AVX512BW-NEXT:    shrl $8, %eax
2027; AVX512BW-NEXT:    addb %cl, %al
2028; AVX512BW-NEXT:    movl %eax, %edx
2029; AVX512BW-NEXT:    shrb $7, %dl
2030; AVX512BW-NEXT:    sarb $2, %al
2031; AVX512BW-NEXT:    addb %dl, %al
2032; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2033; AVX512BW-NEXT:    mulb %dil
2034; AVX512BW-NEXT:    subb %al, %cl
2035; AVX512BW-NEXT:    movzbl %cl, %eax
2036; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
2037; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
2038; AVX512BW-NEXT:    movsbl %al, %ecx
2039; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2040; AVX512BW-NEXT:    shrl $8, %eax
2041; AVX512BW-NEXT:    addb %cl, %al
2042; AVX512BW-NEXT:    movl %eax, %edx
2043; AVX512BW-NEXT:    shrb $7, %dl
2044; AVX512BW-NEXT:    sarb $2, %al
2045; AVX512BW-NEXT:    addb %dl, %al
2046; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2047; AVX512BW-NEXT:    mulb %dil
2048; AVX512BW-NEXT:    subb %al, %cl
2049; AVX512BW-NEXT:    movzbl %cl, %eax
2050; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
2051; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
2052; AVX512BW-NEXT:    movsbl %al, %ecx
2053; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2054; AVX512BW-NEXT:    shrl $8, %eax
2055; AVX512BW-NEXT:    addb %cl, %al
2056; AVX512BW-NEXT:    movl %eax, %edx
2057; AVX512BW-NEXT:    shrb $7, %dl
2058; AVX512BW-NEXT:    sarb $2, %al
2059; AVX512BW-NEXT:    addb %dl, %al
2060; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2061; AVX512BW-NEXT:    mulb %dil
2062; AVX512BW-NEXT:    subb %al, %cl
2063; AVX512BW-NEXT:    movzbl %cl, %eax
2064; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
2065; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
2066; AVX512BW-NEXT:    movsbl %al, %ecx
2067; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2068; AVX512BW-NEXT:    shrl $8, %eax
2069; AVX512BW-NEXT:    addb %cl, %al
2070; AVX512BW-NEXT:    movl %eax, %edx
2071; AVX512BW-NEXT:    shrb $7, %dl
2072; AVX512BW-NEXT:    sarb $2, %al
2073; AVX512BW-NEXT:    addb %dl, %al
2074; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2075; AVX512BW-NEXT:    mulb %dil
2076; AVX512BW-NEXT:    subb %al, %cl
2077; AVX512BW-NEXT:    movzbl %cl, %eax
2078; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
2079; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
2080; AVX512BW-NEXT:    movsbl %al, %ecx
2081; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2082; AVX512BW-NEXT:    shrl $8, %eax
2083; AVX512BW-NEXT:    addb %cl, %al
2084; AVX512BW-NEXT:    movl %eax, %edx
2085; AVX512BW-NEXT:    shrb $7, %dl
2086; AVX512BW-NEXT:    sarb $2, %al
2087; AVX512BW-NEXT:    addb %dl, %al
2088; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2089; AVX512BW-NEXT:    mulb %dil
2090; AVX512BW-NEXT:    subb %al, %cl
2091; AVX512BW-NEXT:    movzbl %cl, %eax
2092; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
2093; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
2094; AVX512BW-NEXT:    movsbl %al, %ecx
2095; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2096; AVX512BW-NEXT:    shrl $8, %eax
2097; AVX512BW-NEXT:    addb %cl, %al
2098; AVX512BW-NEXT:    movl %eax, %edx
2099; AVX512BW-NEXT:    shrb $7, %dl
2100; AVX512BW-NEXT:    sarb $2, %al
2101; AVX512BW-NEXT:    addb %dl, %al
2102; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2103; AVX512BW-NEXT:    mulb %dil
2104; AVX512BW-NEXT:    subb %al, %cl
2105; AVX512BW-NEXT:    movzbl %cl, %eax
2106; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
2107; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
2108; AVX512BW-NEXT:    movsbl %al, %ecx
2109; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2110; AVX512BW-NEXT:    shrl $8, %eax
2111; AVX512BW-NEXT:    addb %cl, %al
2112; AVX512BW-NEXT:    movl %eax, %edx
2113; AVX512BW-NEXT:    shrb $7, %dl
2114; AVX512BW-NEXT:    sarb $2, %al
2115; AVX512BW-NEXT:    addb %dl, %al
2116; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2117; AVX512BW-NEXT:    mulb %dil
2118; AVX512BW-NEXT:    subb %al, %cl
2119; AVX512BW-NEXT:    movzbl %cl, %eax
2120; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
2121; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
2122; AVX512BW-NEXT:    movsbl %al, %ecx
2123; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2124; AVX512BW-NEXT:    shrl $8, %eax
2125; AVX512BW-NEXT:    addb %cl, %al
2126; AVX512BW-NEXT:    movl %eax, %edx
2127; AVX512BW-NEXT:    shrb $7, %dl
2128; AVX512BW-NEXT:    sarb $2, %al
2129; AVX512BW-NEXT:    addb %dl, %al
2130; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2131; AVX512BW-NEXT:    mulb %dil
2132; AVX512BW-NEXT:    subb %al, %cl
2133; AVX512BW-NEXT:    movzbl %cl, %eax
2134; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
2135; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
2136; AVX512BW-NEXT:    movsbl %al, %ecx
2137; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2138; AVX512BW-NEXT:    shrl $8, %eax
2139; AVX512BW-NEXT:    addb %cl, %al
2140; AVX512BW-NEXT:    movl %eax, %edx
2141; AVX512BW-NEXT:    shrb $7, %dl
2142; AVX512BW-NEXT:    sarb $2, %al
2143; AVX512BW-NEXT:    addb %dl, %al
2144; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2145; AVX512BW-NEXT:    mulb %dil
2146; AVX512BW-NEXT:    subb %al, %cl
2147; AVX512BW-NEXT:    movzbl %cl, %eax
2148; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
2149; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
2150; AVX512BW-NEXT:    movsbl %al, %ecx
2151; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2152; AVX512BW-NEXT:    shrl $8, %eax
2153; AVX512BW-NEXT:    addb %cl, %al
2154; AVX512BW-NEXT:    movl %eax, %edx
2155; AVX512BW-NEXT:    shrb $7, %dl
2156; AVX512BW-NEXT:    sarb $2, %al
2157; AVX512BW-NEXT:    addb %dl, %al
2158; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2159; AVX512BW-NEXT:    mulb %dil
2160; AVX512BW-NEXT:    subb %al, %cl
2161; AVX512BW-NEXT:    movzbl %cl, %eax
2162; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
2163; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
2164; AVX512BW-NEXT:    movsbl %al, %ecx
2165; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2166; AVX512BW-NEXT:    shrl $8, %eax
2167; AVX512BW-NEXT:    addb %cl, %al
2168; AVX512BW-NEXT:    movl %eax, %edx
2169; AVX512BW-NEXT:    shrb $7, %dl
2170; AVX512BW-NEXT:    sarb $2, %al
2171; AVX512BW-NEXT:    addb %dl, %al
2172; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2173; AVX512BW-NEXT:    mulb %dil
2174; AVX512BW-NEXT:    subb %al, %cl
2175; AVX512BW-NEXT:    movzbl %cl, %esi
2176; AVX512BW-NEXT:    vpextrb $0, %xmm0, %eax
2177; AVX512BW-NEXT:    movsbl %al, %edx
2178; AVX512BW-NEXT:    imull $-109, %edx, %eax
2179; AVX512BW-NEXT:    shrl $8, %eax
2180; AVX512BW-NEXT:    addb %dl, %al
2181; AVX512BW-NEXT:    movl %eax, %ecx
2182; AVX512BW-NEXT:    shrb $7, %cl
2183; AVX512BW-NEXT:    sarb $2, %al
2184; AVX512BW-NEXT:    addb %cl, %al
2185; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2186; AVX512BW-NEXT:    mulb %dil
2187; AVX512BW-NEXT:    subb %al, %dl
2188; AVX512BW-NEXT:    movzbl %dl, %eax
2189; AVX512BW-NEXT:    vmovd %eax, %xmm3
2190; AVX512BW-NEXT:    vpinsrb $1, %esi, %xmm3, %xmm3
2191; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
2192; AVX512BW-NEXT:    movsbl %al, %ecx
2193; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2194; AVX512BW-NEXT:    shrl $8, %eax
2195; AVX512BW-NEXT:    addb %cl, %al
2196; AVX512BW-NEXT:    movl %eax, %edx
2197; AVX512BW-NEXT:    shrb $7, %dl
2198; AVX512BW-NEXT:    sarb $2, %al
2199; AVX512BW-NEXT:    addb %dl, %al
2200; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2201; AVX512BW-NEXT:    mulb %dil
2202; AVX512BW-NEXT:    subb %al, %cl
2203; AVX512BW-NEXT:    movzbl %cl, %eax
2204; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
2205; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
2206; AVX512BW-NEXT:    movsbl %al, %ecx
2207; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2208; AVX512BW-NEXT:    shrl $8, %eax
2209; AVX512BW-NEXT:    addb %cl, %al
2210; AVX512BW-NEXT:    movl %eax, %edx
2211; AVX512BW-NEXT:    shrb $7, %dl
2212; AVX512BW-NEXT:    sarb $2, %al
2213; AVX512BW-NEXT:    addb %dl, %al
2214; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2215; AVX512BW-NEXT:    mulb %dil
2216; AVX512BW-NEXT:    subb %al, %cl
2217; AVX512BW-NEXT:    movzbl %cl, %eax
2218; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
2219; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
2220; AVX512BW-NEXT:    movsbl %al, %ecx
2221; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2222; AVX512BW-NEXT:    shrl $8, %eax
2223; AVX512BW-NEXT:    addb %cl, %al
2224; AVX512BW-NEXT:    movl %eax, %edx
2225; AVX512BW-NEXT:    shrb $7, %dl
2226; AVX512BW-NEXT:    sarb $2, %al
2227; AVX512BW-NEXT:    addb %dl, %al
2228; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2229; AVX512BW-NEXT:    mulb %dil
2230; AVX512BW-NEXT:    subb %al, %cl
2231; AVX512BW-NEXT:    movzbl %cl, %eax
2232; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
2233; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
2234; AVX512BW-NEXT:    movsbl %al, %ecx
2235; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2236; AVX512BW-NEXT:    shrl $8, %eax
2237; AVX512BW-NEXT:    addb %cl, %al
2238; AVX512BW-NEXT:    movl %eax, %edx
2239; AVX512BW-NEXT:    shrb $7, %dl
2240; AVX512BW-NEXT:    sarb $2, %al
2241; AVX512BW-NEXT:    addb %dl, %al
2242; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2243; AVX512BW-NEXT:    mulb %dil
2244; AVX512BW-NEXT:    subb %al, %cl
2245; AVX512BW-NEXT:    movzbl %cl, %eax
2246; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
2247; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
2248; AVX512BW-NEXT:    movsbl %al, %ecx
2249; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2250; AVX512BW-NEXT:    shrl $8, %eax
2251; AVX512BW-NEXT:    addb %cl, %al
2252; AVX512BW-NEXT:    movl %eax, %edx
2253; AVX512BW-NEXT:    shrb $7, %dl
2254; AVX512BW-NEXT:    sarb $2, %al
2255; AVX512BW-NEXT:    addb %dl, %al
2256; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2257; AVX512BW-NEXT:    mulb %dil
2258; AVX512BW-NEXT:    subb %al, %cl
2259; AVX512BW-NEXT:    movzbl %cl, %eax
2260; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
2261; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
2262; AVX512BW-NEXT:    movsbl %al, %ecx
2263; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2264; AVX512BW-NEXT:    shrl $8, %eax
2265; AVX512BW-NEXT:    addb %cl, %al
2266; AVX512BW-NEXT:    movl %eax, %edx
2267; AVX512BW-NEXT:    shrb $7, %dl
2268; AVX512BW-NEXT:    sarb $2, %al
2269; AVX512BW-NEXT:    addb %dl, %al
2270; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2271; AVX512BW-NEXT:    mulb %dil
2272; AVX512BW-NEXT:    subb %al, %cl
2273; AVX512BW-NEXT:    movzbl %cl, %eax
2274; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
2275; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
2276; AVX512BW-NEXT:    movsbl %al, %ecx
2277; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2278; AVX512BW-NEXT:    shrl $8, %eax
2279; AVX512BW-NEXT:    addb %cl, %al
2280; AVX512BW-NEXT:    movl %eax, %edx
2281; AVX512BW-NEXT:    shrb $7, %dl
2282; AVX512BW-NEXT:    sarb $2, %al
2283; AVX512BW-NEXT:    addb %dl, %al
2284; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2285; AVX512BW-NEXT:    mulb %dil
2286; AVX512BW-NEXT:    subb %al, %cl
2287; AVX512BW-NEXT:    movzbl %cl, %eax
2288; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
2289; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
2290; AVX512BW-NEXT:    movsbl %al, %ecx
2291; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2292; AVX512BW-NEXT:    shrl $8, %eax
2293; AVX512BW-NEXT:    addb %cl, %al
2294; AVX512BW-NEXT:    movl %eax, %edx
2295; AVX512BW-NEXT:    shrb $7, %dl
2296; AVX512BW-NEXT:    sarb $2, %al
2297; AVX512BW-NEXT:    addb %dl, %al
2298; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2299; AVX512BW-NEXT:    mulb %dil
2300; AVX512BW-NEXT:    subb %al, %cl
2301; AVX512BW-NEXT:    movzbl %cl, %eax
2302; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
2303; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
2304; AVX512BW-NEXT:    movsbl %al, %ecx
2305; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2306; AVX512BW-NEXT:    shrl $8, %eax
2307; AVX512BW-NEXT:    addb %cl, %al
2308; AVX512BW-NEXT:    movl %eax, %edx
2309; AVX512BW-NEXT:    shrb $7, %dl
2310; AVX512BW-NEXT:    sarb $2, %al
2311; AVX512BW-NEXT:    addb %dl, %al
2312; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2313; AVX512BW-NEXT:    mulb %dil
2314; AVX512BW-NEXT:    subb %al, %cl
2315; AVX512BW-NEXT:    movzbl %cl, %eax
2316; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
2317; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
2318; AVX512BW-NEXT:    movsbl %al, %ecx
2319; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2320; AVX512BW-NEXT:    shrl $8, %eax
2321; AVX512BW-NEXT:    addb %cl, %al
2322; AVX512BW-NEXT:    movl %eax, %edx
2323; AVX512BW-NEXT:    shrb $7, %dl
2324; AVX512BW-NEXT:    sarb $2, %al
2325; AVX512BW-NEXT:    addb %dl, %al
2326; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2327; AVX512BW-NEXT:    mulb %dil
2328; AVX512BW-NEXT:    subb %al, %cl
2329; AVX512BW-NEXT:    movzbl %cl, %eax
2330; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
2331; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
2332; AVX512BW-NEXT:    movsbl %al, %ecx
2333; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2334; AVX512BW-NEXT:    shrl $8, %eax
2335; AVX512BW-NEXT:    addb %cl, %al
2336; AVX512BW-NEXT:    movl %eax, %edx
2337; AVX512BW-NEXT:    shrb $7, %dl
2338; AVX512BW-NEXT:    sarb $2, %al
2339; AVX512BW-NEXT:    addb %dl, %al
2340; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2341; AVX512BW-NEXT:    mulb %dil
2342; AVX512BW-NEXT:    subb %al, %cl
2343; AVX512BW-NEXT:    movzbl %cl, %eax
2344; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
2345; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
2346; AVX512BW-NEXT:    movsbl %al, %ecx
2347; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2348; AVX512BW-NEXT:    shrl $8, %eax
2349; AVX512BW-NEXT:    addb %cl, %al
2350; AVX512BW-NEXT:    movl %eax, %edx
2351; AVX512BW-NEXT:    shrb $7, %dl
2352; AVX512BW-NEXT:    sarb $2, %al
2353; AVX512BW-NEXT:    addb %dl, %al
2354; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2355; AVX512BW-NEXT:    mulb %dil
2356; AVX512BW-NEXT:    subb %al, %cl
2357; AVX512BW-NEXT:    movzbl %cl, %eax
2358; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
2359; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
2360; AVX512BW-NEXT:    movsbl %al, %ecx
2361; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2362; AVX512BW-NEXT:    shrl $8, %eax
2363; AVX512BW-NEXT:    addb %cl, %al
2364; AVX512BW-NEXT:    movl %eax, %edx
2365; AVX512BW-NEXT:    shrb $7, %dl
2366; AVX512BW-NEXT:    sarb $2, %al
2367; AVX512BW-NEXT:    addb %dl, %al
2368; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2369; AVX512BW-NEXT:    mulb %dil
2370; AVX512BW-NEXT:    subb %al, %cl
2371; AVX512BW-NEXT:    movzbl %cl, %eax
2372; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
2373; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
2374; AVX512BW-NEXT:    movsbl %al, %ecx
2375; AVX512BW-NEXT:    imull $-109, %ecx, %eax
2376; AVX512BW-NEXT:    shrl $8, %eax
2377; AVX512BW-NEXT:    addb %cl, %al
2378; AVX512BW-NEXT:    movl %eax, %edx
2379; AVX512BW-NEXT:    shrb $7, %dl
2380; AVX512BW-NEXT:    sarb $2, %al
2381; AVX512BW-NEXT:    addb %dl, %al
2382; AVX512BW-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
2383; AVX512BW-NEXT:    mulb %dil
2384; AVX512BW-NEXT:    subb %al, %cl
2385; AVX512BW-NEXT:    movzbl %cl, %eax
2386; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
2387; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
2388; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2389; AVX512BW-NEXT:    retq
2390  %res = srem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
2391  ret <64 x i8> %res
2392}
2393