• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
4
5;
6; udiv by 7
7;
8
9define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
10; AVX-LABEL: test_div7_8i64:
11; AVX:       # BB#0:
12; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
13; AVX-NEXT:    vpextrq $1, %xmm1, %rcx
14; AVX-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
15; AVX-NEXT:    movq %rcx, %rax
16; AVX-NEXT:    mulq %rsi
17; AVX-NEXT:    subq %rdx, %rcx
18; AVX-NEXT:    shrq %rcx
19; AVX-NEXT:    addq %rdx, %rcx
20; AVX-NEXT:    shrq $2, %rcx
21; AVX-NEXT:    vmovq %rcx, %xmm2
22; AVX-NEXT:    vmovq %xmm1, %rcx
23; AVX-NEXT:    movq %rcx, %rax
24; AVX-NEXT:    mulq %rsi
25; AVX-NEXT:    subq %rdx, %rcx
26; AVX-NEXT:    shrq %rcx
27; AVX-NEXT:    addq %rdx, %rcx
28; AVX-NEXT:    shrq $2, %rcx
29; AVX-NEXT:    vmovq %rcx, %xmm1
30; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
31; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
32; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
33; AVX-NEXT:    movq %rcx, %rax
34; AVX-NEXT:    mulq %rsi
35; AVX-NEXT:    subq %rdx, %rcx
36; AVX-NEXT:    shrq %rcx
37; AVX-NEXT:    addq %rdx, %rcx
38; AVX-NEXT:    shrq $2, %rcx
39; AVX-NEXT:    vmovq %rcx, %xmm3
40; AVX-NEXT:    vmovq %xmm2, %rcx
41; AVX-NEXT:    movq %rcx, %rax
42; AVX-NEXT:    mulq %rsi
43; AVX-NEXT:    subq %rdx, %rcx
44; AVX-NEXT:    shrq %rcx
45; AVX-NEXT:    addq %rdx, %rcx
46; AVX-NEXT:    shrq $2, %rcx
47; AVX-NEXT:    vmovq %rcx, %xmm2
48; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
49; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
50; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
51; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
52; AVX-NEXT:    movq %rcx, %rax
53; AVX-NEXT:    mulq %rsi
54; AVX-NEXT:    subq %rdx, %rcx
55; AVX-NEXT:    shrq %rcx
56; AVX-NEXT:    addq %rdx, %rcx
57; AVX-NEXT:    shrq $2, %rcx
58; AVX-NEXT:    vmovq %rcx, %xmm3
59; AVX-NEXT:    vmovq %xmm2, %rcx
60; AVX-NEXT:    movq %rcx, %rax
61; AVX-NEXT:    mulq %rsi
62; AVX-NEXT:    subq %rdx, %rcx
63; AVX-NEXT:    shrq %rcx
64; AVX-NEXT:    addq %rdx, %rcx
65; AVX-NEXT:    shrq $2, %rcx
66; AVX-NEXT:    vmovq %rcx, %xmm2
67; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
68; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
69; AVX-NEXT:    movq %rcx, %rax
70; AVX-NEXT:    mulq %rsi
71; AVX-NEXT:    subq %rdx, %rcx
72; AVX-NEXT:    shrq %rcx
73; AVX-NEXT:    addq %rdx, %rcx
74; AVX-NEXT:    shrq $2, %rcx
75; AVX-NEXT:    vmovq %rcx, %xmm3
76; AVX-NEXT:    vmovq %xmm0, %rcx
77; AVX-NEXT:    movq %rcx, %rax
78; AVX-NEXT:    mulq %rsi
79; AVX-NEXT:    subq %rdx, %rcx
80; AVX-NEXT:    shrq %rcx
81; AVX-NEXT:    addq %rdx, %rcx
82; AVX-NEXT:    shrq $2, %rcx
83; AVX-NEXT:    vmovq %rcx, %xmm0
84; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
85; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
86; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
87; AVX-NEXT:    retq
88  %res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
89  ret <8 x i64> %res
90}
91
92define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
93; AVX-LABEL: test_div7_16i32:
94; AVX:       # BB#0:
95; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
96; AVX-NEXT:    vpextrd $1, %xmm1, %eax
97; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
98; AVX-NEXT:    shrq $32, %rcx
99; AVX-NEXT:    subl %ecx, %eax
100; AVX-NEXT:    shrl %eax
101; AVX-NEXT:    addl %ecx, %eax
102; AVX-NEXT:    shrl $2, %eax
103; AVX-NEXT:    vmovd %xmm1, %ecx
104; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
105; AVX-NEXT:    shrq $32, %rdx
106; AVX-NEXT:    subl %edx, %ecx
107; AVX-NEXT:    shrl %ecx
108; AVX-NEXT:    addl %edx, %ecx
109; AVX-NEXT:    shrl $2, %ecx
110; AVX-NEXT:    vmovd %ecx, %xmm2
111; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
112; AVX-NEXT:    vpextrd $2, %xmm1, %eax
113; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
114; AVX-NEXT:    shrq $32, %rcx
115; AVX-NEXT:    subl %ecx, %eax
116; AVX-NEXT:    shrl %eax
117; AVX-NEXT:    addl %ecx, %eax
118; AVX-NEXT:    shrl $2, %eax
119; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
120; AVX-NEXT:    vpextrd $3, %xmm1, %eax
121; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
122; AVX-NEXT:    shrq $32, %rcx
123; AVX-NEXT:    subl %ecx, %eax
124; AVX-NEXT:    shrl %eax
125; AVX-NEXT:    addl %ecx, %eax
126; AVX-NEXT:    shrl $2, %eax
127; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
128; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
129; AVX-NEXT:    vpextrd $1, %xmm2, %eax
130; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
131; AVX-NEXT:    shrq $32, %rcx
132; AVX-NEXT:    subl %ecx, %eax
133; AVX-NEXT:    shrl %eax
134; AVX-NEXT:    addl %ecx, %eax
135; AVX-NEXT:    shrl $2, %eax
136; AVX-NEXT:    vmovd %xmm2, %ecx
137; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
138; AVX-NEXT:    shrq $32, %rdx
139; AVX-NEXT:    subl %edx, %ecx
140; AVX-NEXT:    shrl %ecx
141; AVX-NEXT:    addl %edx, %ecx
142; AVX-NEXT:    shrl $2, %ecx
143; AVX-NEXT:    vmovd %ecx, %xmm3
144; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
145; AVX-NEXT:    vpextrd $2, %xmm2, %eax
146; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
147; AVX-NEXT:    shrq $32, %rcx
148; AVX-NEXT:    subl %ecx, %eax
149; AVX-NEXT:    shrl %eax
150; AVX-NEXT:    addl %ecx, %eax
151; AVX-NEXT:    shrl $2, %eax
152; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
153; AVX-NEXT:    vpextrd $3, %xmm2, %eax
154; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
155; AVX-NEXT:    shrq $32, %rcx
156; AVX-NEXT:    subl %ecx, %eax
157; AVX-NEXT:    shrl %eax
158; AVX-NEXT:    addl %ecx, %eax
159; AVX-NEXT:    shrl $2, %eax
160; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
161; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
162; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
163; AVX-NEXT:    vpextrd $1, %xmm2, %eax
164; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
165; AVX-NEXT:    shrq $32, %rcx
166; AVX-NEXT:    subl %ecx, %eax
167; AVX-NEXT:    shrl %eax
168; AVX-NEXT:    addl %ecx, %eax
169; AVX-NEXT:    shrl $2, %eax
170; AVX-NEXT:    vmovd %xmm2, %ecx
171; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
172; AVX-NEXT:    shrq $32, %rdx
173; AVX-NEXT:    subl %edx, %ecx
174; AVX-NEXT:    shrl %ecx
175; AVX-NEXT:    addl %edx, %ecx
176; AVX-NEXT:    shrl $2, %ecx
177; AVX-NEXT:    vmovd %ecx, %xmm3
178; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
179; AVX-NEXT:    vpextrd $2, %xmm2, %eax
180; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
181; AVX-NEXT:    shrq $32, %rcx
182; AVX-NEXT:    subl %ecx, %eax
183; AVX-NEXT:    shrl %eax
184; AVX-NEXT:    addl %ecx, %eax
185; AVX-NEXT:    shrl $2, %eax
186; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
187; AVX-NEXT:    vpextrd $3, %xmm2, %eax
188; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
189; AVX-NEXT:    shrq $32, %rcx
190; AVX-NEXT:    subl %ecx, %eax
191; AVX-NEXT:    shrl %eax
192; AVX-NEXT:    addl %ecx, %eax
193; AVX-NEXT:    shrl $2, %eax
194; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
195; AVX-NEXT:    vpextrd $1, %xmm0, %eax
196; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
197; AVX-NEXT:    shrq $32, %rcx
198; AVX-NEXT:    subl %ecx, %eax
199; AVX-NEXT:    shrl %eax
200; AVX-NEXT:    addl %ecx, %eax
201; AVX-NEXT:    shrl $2, %eax
202; AVX-NEXT:    vmovd %xmm0, %ecx
203; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
204; AVX-NEXT:    shrq $32, %rdx
205; AVX-NEXT:    subl %edx, %ecx
206; AVX-NEXT:    shrl %ecx
207; AVX-NEXT:    addl %edx, %ecx
208; AVX-NEXT:    shrl $2, %ecx
209; AVX-NEXT:    vmovd %ecx, %xmm3
210; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
211; AVX-NEXT:    vpextrd $2, %xmm0, %eax
212; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
213; AVX-NEXT:    shrq $32, %rcx
214; AVX-NEXT:    subl %ecx, %eax
215; AVX-NEXT:    shrl %eax
216; AVX-NEXT:    addl %ecx, %eax
217; AVX-NEXT:    shrl $2, %eax
218; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
219; AVX-NEXT:    vpextrd $3, %xmm0, %eax
220; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
221; AVX-NEXT:    shrq $32, %rcx
222; AVX-NEXT:    subl %ecx, %eax
223; AVX-NEXT:    shrl %eax
224; AVX-NEXT:    addl %ecx, %eax
225; AVX-NEXT:    shrl $2, %eax
226; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
227; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
228; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
229; AVX-NEXT:    retq
230  %res = udiv <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
231  ret <16 x i32> %res
232}
233
234define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind {
235; AVX512F-LABEL: test_div7_32i16:
236; AVX512F:       # BB#0:
237; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363]
238; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm3
239; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
240; AVX512F-NEXT:    vpsrlw $1, %ymm0, %ymm0
241; AVX512F-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
242; AVX512F-NEXT:    vpsrlw $2, %ymm0, %ymm0
243; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm1, %ymm2
244; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
245; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
246; AVX512F-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
247; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
248; AVX512F-NEXT:    retq
249;
250; AVX512BW-LABEL: test_div7_32i16:
251; AVX512BW:       # BB#0:
252; AVX512BW-NEXT:    vpmulhuw {{.*}}(%rip), %zmm0, %zmm1
253; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
254; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm0
255; AVX512BW-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
256; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm0
257; AVX512BW-NEXT:    retq
258  %res = udiv <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
259  ret <32 x i16> %res
260}
261
262define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
263; AVX512F-LABEL: test_div7_64i8:
264; AVX512F:       # BB#0:
265; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
266; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
267; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
268; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
269; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
270; AVX512F-NEXT:    vpmullw %ymm3, %ymm4, %ymm4
271; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
272; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
273; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
274; AVX512F-NEXT:    vpmullw %ymm2, %ymm5, %ymm5
275; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5
276; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm5[2,3],ymm4[2,3]
277; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
278; AVX512F-NEXT:    vpackuswb %ymm6, %ymm4, %ymm4
279; AVX512F-NEXT:    vpsubb %ymm4, %ymm0, %ymm0
280; AVX512F-NEXT:    vpsrlw $1, %ymm0, %ymm0
281; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
282; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
283; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
284; AVX512F-NEXT:    vpsrlw $2, %ymm0, %ymm0
285; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
286; AVX512F-NEXT:    vpand %ymm4, %ymm0, %ymm0
287; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm6
288; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
289; AVX512F-NEXT:    vpmullw %ymm3, %ymm6, %ymm3
290; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
291; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
292; AVX512F-NEXT:    vpmullw %ymm2, %ymm6, %ymm2
293; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
294; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm2[2,3],ymm3[2,3]
295; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
296; AVX512F-NEXT:    vpackuswb %ymm6, %ymm2, %ymm2
297; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
298; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
299; AVX512F-NEXT:    vpand %ymm5, %ymm1, %ymm1
300; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
301; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
302; AVX512F-NEXT:    vpand %ymm4, %ymm1, %ymm1
303; AVX512F-NEXT:    retq
304;
305; AVX512BW-LABEL: test_div7_64i8:
306; AVX512BW:       # BB#0:
307; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
308; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
309; AVX512BW-NEXT:    imull $37, %eax, %ecx
310; AVX512BW-NEXT:    shrl $8, %ecx
311; AVX512BW-NEXT:    subb %cl, %al
312; AVX512BW-NEXT:    shrb %al
313; AVX512BW-NEXT:    addb %cl, %al
314; AVX512BW-NEXT:    shrb $2, %al
315; AVX512BW-NEXT:    movzbl %al, %eax
316; AVX512BW-NEXT:    vpextrb $0, %xmm1, %ecx
317; AVX512BW-NEXT:    imull $37, %ecx, %edx
318; AVX512BW-NEXT:    shrl $8, %edx
319; AVX512BW-NEXT:    subb %dl, %cl
320; AVX512BW-NEXT:    shrb %cl
321; AVX512BW-NEXT:    addb %dl, %cl
322; AVX512BW-NEXT:    shrb $2, %cl
323; AVX512BW-NEXT:    movzbl %cl, %ecx
324; AVX512BW-NEXT:    vmovd %ecx, %xmm2
325; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
326; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
327; AVX512BW-NEXT:    imull $37, %eax, %ecx
328; AVX512BW-NEXT:    shrl $8, %ecx
329; AVX512BW-NEXT:    subb %cl, %al
330; AVX512BW-NEXT:    shrb %al
331; AVX512BW-NEXT:    addb %cl, %al
332; AVX512BW-NEXT:    shrb $2, %al
333; AVX512BW-NEXT:    movzbl %al, %eax
334; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
335; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
336; AVX512BW-NEXT:    imull $37, %eax, %ecx
337; AVX512BW-NEXT:    shrl $8, %ecx
338; AVX512BW-NEXT:    subb %cl, %al
339; AVX512BW-NEXT:    shrb %al
340; AVX512BW-NEXT:    addb %cl, %al
341; AVX512BW-NEXT:    shrb $2, %al
342; AVX512BW-NEXT:    movzbl %al, %eax
343; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
344; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
345; AVX512BW-NEXT:    imull $37, %eax, %ecx
346; AVX512BW-NEXT:    shrl $8, %ecx
347; AVX512BW-NEXT:    subb %cl, %al
348; AVX512BW-NEXT:    shrb %al
349; AVX512BW-NEXT:    addb %cl, %al
350; AVX512BW-NEXT:    shrb $2, %al
351; AVX512BW-NEXT:    movzbl %al, %eax
352; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
353; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
354; AVX512BW-NEXT:    imull $37, %eax, %ecx
355; AVX512BW-NEXT:    shrl $8, %ecx
356; AVX512BW-NEXT:    subb %cl, %al
357; AVX512BW-NEXT:    shrb %al
358; AVX512BW-NEXT:    addb %cl, %al
359; AVX512BW-NEXT:    shrb $2, %al
360; AVX512BW-NEXT:    movzbl %al, %eax
361; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
362; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
363; AVX512BW-NEXT:    imull $37, %eax, %ecx
364; AVX512BW-NEXT:    shrl $8, %ecx
365; AVX512BW-NEXT:    subb %cl, %al
366; AVX512BW-NEXT:    shrb %al
367; AVX512BW-NEXT:    addb %cl, %al
368; AVX512BW-NEXT:    shrb $2, %al
369; AVX512BW-NEXT:    movzbl %al, %eax
370; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
371; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
372; AVX512BW-NEXT:    imull $37, %eax, %ecx
373; AVX512BW-NEXT:    shrl $8, %ecx
374; AVX512BW-NEXT:    subb %cl, %al
375; AVX512BW-NEXT:    shrb %al
376; AVX512BW-NEXT:    addb %cl, %al
377; AVX512BW-NEXT:    shrb $2, %al
378; AVX512BW-NEXT:    movzbl %al, %eax
379; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
380; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
381; AVX512BW-NEXT:    imull $37, %eax, %ecx
382; AVX512BW-NEXT:    shrl $8, %ecx
383; AVX512BW-NEXT:    subb %cl, %al
384; AVX512BW-NEXT:    shrb %al
385; AVX512BW-NEXT:    addb %cl, %al
386; AVX512BW-NEXT:    shrb $2, %al
387; AVX512BW-NEXT:    movzbl %al, %eax
388; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
389; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
390; AVX512BW-NEXT:    imull $37, %eax, %ecx
391; AVX512BW-NEXT:    shrl $8, %ecx
392; AVX512BW-NEXT:    subb %cl, %al
393; AVX512BW-NEXT:    shrb %al
394; AVX512BW-NEXT:    addb %cl, %al
395; AVX512BW-NEXT:    shrb $2, %al
396; AVX512BW-NEXT:    movzbl %al, %eax
397; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
398; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
399; AVX512BW-NEXT:    imull $37, %eax, %ecx
400; AVX512BW-NEXT:    shrl $8, %ecx
401; AVX512BW-NEXT:    subb %cl, %al
402; AVX512BW-NEXT:    shrb %al
403; AVX512BW-NEXT:    addb %cl, %al
404; AVX512BW-NEXT:    shrb $2, %al
405; AVX512BW-NEXT:    movzbl %al, %eax
406; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
407; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
408; AVX512BW-NEXT:    imull $37, %eax, %ecx
409; AVX512BW-NEXT:    shrl $8, %ecx
410; AVX512BW-NEXT:    subb %cl, %al
411; AVX512BW-NEXT:    shrb %al
412; AVX512BW-NEXT:    addb %cl, %al
413; AVX512BW-NEXT:    shrb $2, %al
414; AVX512BW-NEXT:    movzbl %al, %eax
415; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
416; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
417; AVX512BW-NEXT:    imull $37, %eax, %ecx
418; AVX512BW-NEXT:    shrl $8, %ecx
419; AVX512BW-NEXT:    subb %cl, %al
420; AVX512BW-NEXT:    shrb %al
421; AVX512BW-NEXT:    addb %cl, %al
422; AVX512BW-NEXT:    shrb $2, %al
423; AVX512BW-NEXT:    movzbl %al, %eax
424; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
425; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
426; AVX512BW-NEXT:    imull $37, %eax, %ecx
427; AVX512BW-NEXT:    shrl $8, %ecx
428; AVX512BW-NEXT:    subb %cl, %al
429; AVX512BW-NEXT:    shrb %al
430; AVX512BW-NEXT:    addb %cl, %al
431; AVX512BW-NEXT:    shrb $2, %al
432; AVX512BW-NEXT:    movzbl %al, %eax
433; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
434; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
435; AVX512BW-NEXT:    imull $37, %eax, %ecx
436; AVX512BW-NEXT:    shrl $8, %ecx
437; AVX512BW-NEXT:    subb %cl, %al
438; AVX512BW-NEXT:    shrb %al
439; AVX512BW-NEXT:    addb %cl, %al
440; AVX512BW-NEXT:    shrb $2, %al
441; AVX512BW-NEXT:    movzbl %al, %eax
442; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
443; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
444; AVX512BW-NEXT:    imull $37, %eax, %ecx
445; AVX512BW-NEXT:    shrl $8, %ecx
446; AVX512BW-NEXT:    subb %cl, %al
447; AVX512BW-NEXT:    shrb %al
448; AVX512BW-NEXT:    addb %cl, %al
449; AVX512BW-NEXT:    shrb $2, %al
450; AVX512BW-NEXT:    movzbl %al, %eax
451; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
452; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
453; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
454; AVX512BW-NEXT:    imull $37, %eax, %ecx
455; AVX512BW-NEXT:    shrl $8, %ecx
456; AVX512BW-NEXT:    subb %cl, %al
457; AVX512BW-NEXT:    shrb %al
458; AVX512BW-NEXT:    addb %cl, %al
459; AVX512BW-NEXT:    shrb $2, %al
460; AVX512BW-NEXT:    movzbl %al, %eax
461; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
462; AVX512BW-NEXT:    imull $37, %ecx, %edx
463; AVX512BW-NEXT:    shrl $8, %edx
464; AVX512BW-NEXT:    subb %dl, %cl
465; AVX512BW-NEXT:    shrb %cl
466; AVX512BW-NEXT:    addb %dl, %cl
467; AVX512BW-NEXT:    shrb $2, %cl
468; AVX512BW-NEXT:    movzbl %cl, %ecx
469; AVX512BW-NEXT:    vmovd %ecx, %xmm3
470; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
471; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
472; AVX512BW-NEXT:    imull $37, %eax, %ecx
473; AVX512BW-NEXT:    shrl $8, %ecx
474; AVX512BW-NEXT:    subb %cl, %al
475; AVX512BW-NEXT:    shrb %al
476; AVX512BW-NEXT:    addb %cl, %al
477; AVX512BW-NEXT:    shrb $2, %al
478; AVX512BW-NEXT:    movzbl %al, %eax
479; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
480; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
481; AVX512BW-NEXT:    imull $37, %eax, %ecx
482; AVX512BW-NEXT:    shrl $8, %ecx
483; AVX512BW-NEXT:    subb %cl, %al
484; AVX512BW-NEXT:    shrb %al
485; AVX512BW-NEXT:    addb %cl, %al
486; AVX512BW-NEXT:    shrb $2, %al
487; AVX512BW-NEXT:    movzbl %al, %eax
488; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
489; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
490; AVX512BW-NEXT:    imull $37, %eax, %ecx
491; AVX512BW-NEXT:    shrl $8, %ecx
492; AVX512BW-NEXT:    subb %cl, %al
493; AVX512BW-NEXT:    shrb %al
494; AVX512BW-NEXT:    addb %cl, %al
495; AVX512BW-NEXT:    shrb $2, %al
496; AVX512BW-NEXT:    movzbl %al, %eax
497; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
498; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
499; AVX512BW-NEXT:    imull $37, %eax, %ecx
500; AVX512BW-NEXT:    shrl $8, %ecx
501; AVX512BW-NEXT:    subb %cl, %al
502; AVX512BW-NEXT:    shrb %al
503; AVX512BW-NEXT:    addb %cl, %al
504; AVX512BW-NEXT:    shrb $2, %al
505; AVX512BW-NEXT:    movzbl %al, %eax
506; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
507; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
508; AVX512BW-NEXT:    imull $37, %eax, %ecx
509; AVX512BW-NEXT:    shrl $8, %ecx
510; AVX512BW-NEXT:    subb %cl, %al
511; AVX512BW-NEXT:    shrb %al
512; AVX512BW-NEXT:    addb %cl, %al
513; AVX512BW-NEXT:    shrb $2, %al
514; AVX512BW-NEXT:    movzbl %al, %eax
515; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
516; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
517; AVX512BW-NEXT:    imull $37, %eax, %ecx
518; AVX512BW-NEXT:    shrl $8, %ecx
519; AVX512BW-NEXT:    subb %cl, %al
520; AVX512BW-NEXT:    shrb %al
521; AVX512BW-NEXT:    addb %cl, %al
522; AVX512BW-NEXT:    shrb $2, %al
523; AVX512BW-NEXT:    movzbl %al, %eax
524; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
525; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
526; AVX512BW-NEXT:    imull $37, %eax, %ecx
527; AVX512BW-NEXT:    shrl $8, %ecx
528; AVX512BW-NEXT:    subb %cl, %al
529; AVX512BW-NEXT:    shrb %al
530; AVX512BW-NEXT:    addb %cl, %al
531; AVX512BW-NEXT:    shrb $2, %al
532; AVX512BW-NEXT:    movzbl %al, %eax
533; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
534; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
535; AVX512BW-NEXT:    imull $37, %eax, %ecx
536; AVX512BW-NEXT:    shrl $8, %ecx
537; AVX512BW-NEXT:    subb %cl, %al
538; AVX512BW-NEXT:    shrb %al
539; AVX512BW-NEXT:    addb %cl, %al
540; AVX512BW-NEXT:    shrb $2, %al
541; AVX512BW-NEXT:    movzbl %al, %eax
542; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
543; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
544; AVX512BW-NEXT:    imull $37, %eax, %ecx
545; AVX512BW-NEXT:    shrl $8, %ecx
546; AVX512BW-NEXT:    subb %cl, %al
547; AVX512BW-NEXT:    shrb %al
548; AVX512BW-NEXT:    addb %cl, %al
549; AVX512BW-NEXT:    shrb $2, %al
550; AVX512BW-NEXT:    movzbl %al, %eax
551; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
552; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
553; AVX512BW-NEXT:    imull $37, %eax, %ecx
554; AVX512BW-NEXT:    shrl $8, %ecx
555; AVX512BW-NEXT:    subb %cl, %al
556; AVX512BW-NEXT:    shrb %al
557; AVX512BW-NEXT:    addb %cl, %al
558; AVX512BW-NEXT:    shrb $2, %al
559; AVX512BW-NEXT:    movzbl %al, %eax
560; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
561; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
562; AVX512BW-NEXT:    imull $37, %eax, %ecx
563; AVX512BW-NEXT:    shrl $8, %ecx
564; AVX512BW-NEXT:    subb %cl, %al
565; AVX512BW-NEXT:    shrb %al
566; AVX512BW-NEXT:    addb %cl, %al
567; AVX512BW-NEXT:    shrb $2, %al
568; AVX512BW-NEXT:    movzbl %al, %eax
569; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
570; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
571; AVX512BW-NEXT:    imull $37, %eax, %ecx
572; AVX512BW-NEXT:    shrl $8, %ecx
573; AVX512BW-NEXT:    subb %cl, %al
574; AVX512BW-NEXT:    shrb %al
575; AVX512BW-NEXT:    addb %cl, %al
576; AVX512BW-NEXT:    shrb $2, %al
577; AVX512BW-NEXT:    movzbl %al, %eax
578; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
579; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
580; AVX512BW-NEXT:    imull $37, %eax, %ecx
581; AVX512BW-NEXT:    shrl $8, %ecx
582; AVX512BW-NEXT:    subb %cl, %al
583; AVX512BW-NEXT:    shrb %al
584; AVX512BW-NEXT:    addb %cl, %al
585; AVX512BW-NEXT:    shrb $2, %al
586; AVX512BW-NEXT:    movzbl %al, %eax
587; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
588; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
589; AVX512BW-NEXT:    imull $37, %eax, %ecx
590; AVX512BW-NEXT:    shrl $8, %ecx
591; AVX512BW-NEXT:    subb %cl, %al
592; AVX512BW-NEXT:    shrb %al
593; AVX512BW-NEXT:    addb %cl, %al
594; AVX512BW-NEXT:    shrb $2, %al
595; AVX512BW-NEXT:    movzbl %al, %eax
596; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
597; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
598; AVX512BW-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
599; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
600; AVX512BW-NEXT:    imull $37, %eax, %ecx
601; AVX512BW-NEXT:    shrl $8, %ecx
602; AVX512BW-NEXT:    subb %cl, %al
603; AVX512BW-NEXT:    shrb %al
604; AVX512BW-NEXT:    addb %cl, %al
605; AVX512BW-NEXT:    shrb $2, %al
606; AVX512BW-NEXT:    movzbl %al, %eax
607; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
608; AVX512BW-NEXT:    imull $37, %ecx, %edx
609; AVX512BW-NEXT:    shrl $8, %edx
610; AVX512BW-NEXT:    subb %dl, %cl
611; AVX512BW-NEXT:    shrb %cl
612; AVX512BW-NEXT:    addb %dl, %cl
613; AVX512BW-NEXT:    shrb $2, %cl
614; AVX512BW-NEXT:    movzbl %cl, %ecx
615; AVX512BW-NEXT:    vmovd %ecx, %xmm3
616; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
617; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
618; AVX512BW-NEXT:    imull $37, %eax, %ecx
619; AVX512BW-NEXT:    shrl $8, %ecx
620; AVX512BW-NEXT:    subb %cl, %al
621; AVX512BW-NEXT:    shrb %al
622; AVX512BW-NEXT:    addb %cl, %al
623; AVX512BW-NEXT:    shrb $2, %al
624; AVX512BW-NEXT:    movzbl %al, %eax
625; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
626; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
627; AVX512BW-NEXT:    imull $37, %eax, %ecx
628; AVX512BW-NEXT:    shrl $8, %ecx
629; AVX512BW-NEXT:    subb %cl, %al
630; AVX512BW-NEXT:    shrb %al
631; AVX512BW-NEXT:    addb %cl, %al
632; AVX512BW-NEXT:    shrb $2, %al
633; AVX512BW-NEXT:    movzbl %al, %eax
634; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
635; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
636; AVX512BW-NEXT:    imull $37, %eax, %ecx
637; AVX512BW-NEXT:    shrl $8, %ecx
638; AVX512BW-NEXT:    subb %cl, %al
639; AVX512BW-NEXT:    shrb %al
640; AVX512BW-NEXT:    addb %cl, %al
641; AVX512BW-NEXT:    shrb $2, %al
642; AVX512BW-NEXT:    movzbl %al, %eax
643; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
644; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
645; AVX512BW-NEXT:    imull $37, %eax, %ecx
646; AVX512BW-NEXT:    shrl $8, %ecx
647; AVX512BW-NEXT:    subb %cl, %al
648; AVX512BW-NEXT:    shrb %al
649; AVX512BW-NEXT:    addb %cl, %al
650; AVX512BW-NEXT:    shrb $2, %al
651; AVX512BW-NEXT:    movzbl %al, %eax
652; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
653; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
654; AVX512BW-NEXT:    imull $37, %eax, %ecx
655; AVX512BW-NEXT:    shrl $8, %ecx
656; AVX512BW-NEXT:    subb %cl, %al
657; AVX512BW-NEXT:    shrb %al
658; AVX512BW-NEXT:    addb %cl, %al
659; AVX512BW-NEXT:    shrb $2, %al
660; AVX512BW-NEXT:    movzbl %al, %eax
661; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
662; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
663; AVX512BW-NEXT:    imull $37, %eax, %ecx
664; AVX512BW-NEXT:    shrl $8, %ecx
665; AVX512BW-NEXT:    subb %cl, %al
666; AVX512BW-NEXT:    shrb %al
667; AVX512BW-NEXT:    addb %cl, %al
668; AVX512BW-NEXT:    shrb $2, %al
669; AVX512BW-NEXT:    movzbl %al, %eax
670; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
671; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
672; AVX512BW-NEXT:    imull $37, %eax, %ecx
673; AVX512BW-NEXT:    shrl $8, %ecx
674; AVX512BW-NEXT:    subb %cl, %al
675; AVX512BW-NEXT:    shrb %al
676; AVX512BW-NEXT:    addb %cl, %al
677; AVX512BW-NEXT:    shrb $2, %al
678; AVX512BW-NEXT:    movzbl %al, %eax
679; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
680; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
681; AVX512BW-NEXT:    imull $37, %eax, %ecx
682; AVX512BW-NEXT:    shrl $8, %ecx
683; AVX512BW-NEXT:    subb %cl, %al
684; AVX512BW-NEXT:    shrb %al
685; AVX512BW-NEXT:    addb %cl, %al
686; AVX512BW-NEXT:    shrb $2, %al
687; AVX512BW-NEXT:    movzbl %al, %eax
688; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
689; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
690; AVX512BW-NEXT:    imull $37, %eax, %ecx
691; AVX512BW-NEXT:    shrl $8, %ecx
692; AVX512BW-NEXT:    subb %cl, %al
693; AVX512BW-NEXT:    shrb %al
694; AVX512BW-NEXT:    addb %cl, %al
695; AVX512BW-NEXT:    shrb $2, %al
696; AVX512BW-NEXT:    movzbl %al, %eax
697; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
698; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
699; AVX512BW-NEXT:    imull $37, %eax, %ecx
700; AVX512BW-NEXT:    shrl $8, %ecx
701; AVX512BW-NEXT:    subb %cl, %al
702; AVX512BW-NEXT:    shrb %al
703; AVX512BW-NEXT:    addb %cl, %al
704; AVX512BW-NEXT:    shrb $2, %al
705; AVX512BW-NEXT:    movzbl %al, %eax
706; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
707; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
708; AVX512BW-NEXT:    imull $37, %eax, %ecx
709; AVX512BW-NEXT:    shrl $8, %ecx
710; AVX512BW-NEXT:    subb %cl, %al
711; AVX512BW-NEXT:    shrb %al
712; AVX512BW-NEXT:    addb %cl, %al
713; AVX512BW-NEXT:    shrb $2, %al
714; AVX512BW-NEXT:    movzbl %al, %eax
715; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
716; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
717; AVX512BW-NEXT:    imull $37, %eax, %ecx
718; AVX512BW-NEXT:    shrl $8, %ecx
719; AVX512BW-NEXT:    subb %cl, %al
720; AVX512BW-NEXT:    shrb %al
721; AVX512BW-NEXT:    addb %cl, %al
722; AVX512BW-NEXT:    shrb $2, %al
723; AVX512BW-NEXT:    movzbl %al, %eax
724; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
725; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
726; AVX512BW-NEXT:    imull $37, %eax, %ecx
727; AVX512BW-NEXT:    shrl $8, %ecx
728; AVX512BW-NEXT:    subb %cl, %al
729; AVX512BW-NEXT:    shrb %al
730; AVX512BW-NEXT:    addb %cl, %al
731; AVX512BW-NEXT:    shrb $2, %al
732; AVX512BW-NEXT:    movzbl %al, %eax
733; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
734; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
735; AVX512BW-NEXT:    imull $37, %eax, %ecx
736; AVX512BW-NEXT:    shrl $8, %ecx
737; AVX512BW-NEXT:    subb %cl, %al
738; AVX512BW-NEXT:    shrb %al
739; AVX512BW-NEXT:    addb %cl, %al
740; AVX512BW-NEXT:    shrb $2, %al
741; AVX512BW-NEXT:    movzbl %al, %eax
742; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
743; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
744; AVX512BW-NEXT:    imull $37, %eax, %ecx
745; AVX512BW-NEXT:    shrl $8, %ecx
746; AVX512BW-NEXT:    subb %cl, %al
747; AVX512BW-NEXT:    shrb %al
748; AVX512BW-NEXT:    addb %cl, %al
749; AVX512BW-NEXT:    shrb $2, %al
750; AVX512BW-NEXT:    movzbl %al, %eax
751; AVX512BW-NEXT:    vpextrb $0, %xmm0, %ecx
752; AVX512BW-NEXT:    imull $37, %ecx, %edx
753; AVX512BW-NEXT:    shrl $8, %edx
754; AVX512BW-NEXT:    subb %dl, %cl
755; AVX512BW-NEXT:    shrb %cl
756; AVX512BW-NEXT:    addb %dl, %cl
757; AVX512BW-NEXT:    shrb $2, %cl
758; AVX512BW-NEXT:    movzbl %cl, %ecx
759; AVX512BW-NEXT:    vmovd %ecx, %xmm3
760; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
761; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
762; AVX512BW-NEXT:    imull $37, %eax, %ecx
763; AVX512BW-NEXT:    shrl $8, %ecx
764; AVX512BW-NEXT:    subb %cl, %al
765; AVX512BW-NEXT:    shrb %al
766; AVX512BW-NEXT:    addb %cl, %al
767; AVX512BW-NEXT:    shrb $2, %al
768; AVX512BW-NEXT:    movzbl %al, %eax
769; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
770; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
771; AVX512BW-NEXT:    imull $37, %eax, %ecx
772; AVX512BW-NEXT:    shrl $8, %ecx
773; AVX512BW-NEXT:    subb %cl, %al
774; AVX512BW-NEXT:    shrb %al
775; AVX512BW-NEXT:    addb %cl, %al
776; AVX512BW-NEXT:    shrb $2, %al
777; AVX512BW-NEXT:    movzbl %al, %eax
778; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
779; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
780; AVX512BW-NEXT:    imull $37, %eax, %ecx
781; AVX512BW-NEXT:    shrl $8, %ecx
782; AVX512BW-NEXT:    subb %cl, %al
783; AVX512BW-NEXT:    shrb %al
784; AVX512BW-NEXT:    addb %cl, %al
785; AVX512BW-NEXT:    shrb $2, %al
786; AVX512BW-NEXT:    movzbl %al, %eax
787; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
788; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
789; AVX512BW-NEXT:    imull $37, %eax, %ecx
790; AVX512BW-NEXT:    shrl $8, %ecx
791; AVX512BW-NEXT:    subb %cl, %al
792; AVX512BW-NEXT:    shrb %al
793; AVX512BW-NEXT:    addb %cl, %al
794; AVX512BW-NEXT:    shrb $2, %al
795; AVX512BW-NEXT:    movzbl %al, %eax
796; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
797; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
798; AVX512BW-NEXT:    imull $37, %eax, %ecx
799; AVX512BW-NEXT:    shrl $8, %ecx
800; AVX512BW-NEXT:    subb %cl, %al
801; AVX512BW-NEXT:    shrb %al
802; AVX512BW-NEXT:    addb %cl, %al
803; AVX512BW-NEXT:    shrb $2, %al
804; AVX512BW-NEXT:    movzbl %al, %eax
805; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
806; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
807; AVX512BW-NEXT:    imull $37, %eax, %ecx
808; AVX512BW-NEXT:    shrl $8, %ecx
809; AVX512BW-NEXT:    subb %cl, %al
810; AVX512BW-NEXT:    shrb %al
811; AVX512BW-NEXT:    addb %cl, %al
812; AVX512BW-NEXT:    shrb $2, %al
813; AVX512BW-NEXT:    movzbl %al, %eax
814; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
815; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
816; AVX512BW-NEXT:    imull $37, %eax, %ecx
817; AVX512BW-NEXT:    shrl $8, %ecx
818; AVX512BW-NEXT:    subb %cl, %al
819; AVX512BW-NEXT:    shrb %al
820; AVX512BW-NEXT:    addb %cl, %al
821; AVX512BW-NEXT:    shrb $2, %al
822; AVX512BW-NEXT:    movzbl %al, %eax
823; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
824; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
825; AVX512BW-NEXT:    imull $37, %eax, %ecx
826; AVX512BW-NEXT:    shrl $8, %ecx
827; AVX512BW-NEXT:    subb %cl, %al
828; AVX512BW-NEXT:    shrb %al
829; AVX512BW-NEXT:    addb %cl, %al
830; AVX512BW-NEXT:    shrb $2, %al
831; AVX512BW-NEXT:    movzbl %al, %eax
832; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
833; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
834; AVX512BW-NEXT:    imull $37, %eax, %ecx
835; AVX512BW-NEXT:    shrl $8, %ecx
836; AVX512BW-NEXT:    subb %cl, %al
837; AVX512BW-NEXT:    shrb %al
838; AVX512BW-NEXT:    addb %cl, %al
839; AVX512BW-NEXT:    shrb $2, %al
840; AVX512BW-NEXT:    movzbl %al, %eax
841; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
842; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
843; AVX512BW-NEXT:    imull $37, %eax, %ecx
844; AVX512BW-NEXT:    shrl $8, %ecx
845; AVX512BW-NEXT:    subb %cl, %al
846; AVX512BW-NEXT:    shrb %al
847; AVX512BW-NEXT:    addb %cl, %al
848; AVX512BW-NEXT:    shrb $2, %al
849; AVX512BW-NEXT:    movzbl %al, %eax
850; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
851; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
852; AVX512BW-NEXT:    imull $37, %eax, %ecx
853; AVX512BW-NEXT:    shrl $8, %ecx
854; AVX512BW-NEXT:    subb %cl, %al
855; AVX512BW-NEXT:    shrb %al
856; AVX512BW-NEXT:    addb %cl, %al
857; AVX512BW-NEXT:    shrb $2, %al
858; AVX512BW-NEXT:    movzbl %al, %eax
859; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
860; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
861; AVX512BW-NEXT:    imull $37, %eax, %ecx
862; AVX512BW-NEXT:    shrl $8, %ecx
863; AVX512BW-NEXT:    subb %cl, %al
864; AVX512BW-NEXT:    shrb %al
865; AVX512BW-NEXT:    addb %cl, %al
866; AVX512BW-NEXT:    shrb $2, %al
867; AVX512BW-NEXT:    movzbl %al, %eax
868; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
869; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
870; AVX512BW-NEXT:    imull $37, %eax, %ecx
871; AVX512BW-NEXT:    shrl $8, %ecx
872; AVX512BW-NEXT:    subb %cl, %al
873; AVX512BW-NEXT:    shrb %al
874; AVX512BW-NEXT:    addb %cl, %al
875; AVX512BW-NEXT:    shrb $2, %al
876; AVX512BW-NEXT:    movzbl %al, %eax
877; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
878; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
879; AVX512BW-NEXT:    imull $37, %eax, %ecx
880; AVX512BW-NEXT:    shrl $8, %ecx
881; AVX512BW-NEXT:    subb %cl, %al
882; AVX512BW-NEXT:    shrb %al
883; AVX512BW-NEXT:    addb %cl, %al
884; AVX512BW-NEXT:    shrb $2, %al
885; AVX512BW-NEXT:    movzbl %al, %eax
886; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
887; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
888; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
889; AVX512BW-NEXT:    retq
890  %res = udiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
891  ret <64 x i8> %res
892}
893
894;
895; urem by 7
896;
897
898define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
899; AVX-LABEL: test_rem7_8i64:
900; AVX:       # BB#0:
901; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
902; AVX-NEXT:    vpextrq $1, %xmm1, %rcx
903; AVX-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
904; AVX-NEXT:    movq %rcx, %rax
905; AVX-NEXT:    mulq %rsi
906; AVX-NEXT:    movq %rcx, %rax
907; AVX-NEXT:    subq %rdx, %rax
908; AVX-NEXT:    shrq %rax
909; AVX-NEXT:    addq %rdx, %rax
910; AVX-NEXT:    shrq $2, %rax
911; AVX-NEXT:    leaq (,%rax,8), %rdx
912; AVX-NEXT:    subq %rax, %rdx
913; AVX-NEXT:    subq %rdx, %rcx
914; AVX-NEXT:    vmovq %rcx, %xmm2
915; AVX-NEXT:    vmovq %xmm1, %rcx
916; AVX-NEXT:    movq %rcx, %rax
917; AVX-NEXT:    mulq %rsi
918; AVX-NEXT:    movq %rcx, %rax
919; AVX-NEXT:    subq %rdx, %rax
920; AVX-NEXT:    shrq %rax
921; AVX-NEXT:    addq %rdx, %rax
922; AVX-NEXT:    shrq $2, %rax
923; AVX-NEXT:    leaq (,%rax,8), %rdx
924; AVX-NEXT:    subq %rax, %rdx
925; AVX-NEXT:    subq %rdx, %rcx
926; AVX-NEXT:    vmovq %rcx, %xmm1
927; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
928; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
929; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
930; AVX-NEXT:    movq %rcx, %rax
931; AVX-NEXT:    mulq %rsi
932; AVX-NEXT:    movq %rcx, %rax
933; AVX-NEXT:    subq %rdx, %rax
934; AVX-NEXT:    shrq %rax
935; AVX-NEXT:    addq %rdx, %rax
936; AVX-NEXT:    shrq $2, %rax
937; AVX-NEXT:    leaq (,%rax,8), %rdx
938; AVX-NEXT:    subq %rax, %rdx
939; AVX-NEXT:    subq %rdx, %rcx
940; AVX-NEXT:    vmovq %rcx, %xmm3
941; AVX-NEXT:    vmovq %xmm2, %rcx
942; AVX-NEXT:    movq %rcx, %rax
943; AVX-NEXT:    mulq %rsi
944; AVX-NEXT:    movq %rcx, %rax
945; AVX-NEXT:    subq %rdx, %rax
946; AVX-NEXT:    shrq %rax
947; AVX-NEXT:    addq %rdx, %rax
948; AVX-NEXT:    shrq $2, %rax
949; AVX-NEXT:    leaq (,%rax,8), %rdx
950; AVX-NEXT:    subq %rax, %rdx
951; AVX-NEXT:    subq %rdx, %rcx
952; AVX-NEXT:    vmovq %rcx, %xmm2
953; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
954; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
955; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
956; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
957; AVX-NEXT:    movq %rcx, %rax
958; AVX-NEXT:    mulq %rsi
959; AVX-NEXT:    movq %rcx, %rax
960; AVX-NEXT:    subq %rdx, %rax
961; AVX-NEXT:    shrq %rax
962; AVX-NEXT:    addq %rdx, %rax
963; AVX-NEXT:    shrq $2, %rax
964; AVX-NEXT:    leaq (,%rax,8), %rdx
965; AVX-NEXT:    subq %rax, %rdx
966; AVX-NEXT:    subq %rdx, %rcx
967; AVX-NEXT:    vmovq %rcx, %xmm3
968; AVX-NEXT:    vmovq %xmm2, %rcx
969; AVX-NEXT:    movq %rcx, %rax
970; AVX-NEXT:    mulq %rsi
971; AVX-NEXT:    movq %rcx, %rax
972; AVX-NEXT:    subq %rdx, %rax
973; AVX-NEXT:    shrq %rax
974; AVX-NEXT:    addq %rdx, %rax
975; AVX-NEXT:    shrq $2, %rax
976; AVX-NEXT:    leaq (,%rax,8), %rdx
977; AVX-NEXT:    subq %rax, %rdx
978; AVX-NEXT:    subq %rdx, %rcx
979; AVX-NEXT:    vmovq %rcx, %xmm2
980; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
981; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
982; AVX-NEXT:    movq %rcx, %rax
983; AVX-NEXT:    mulq %rsi
984; AVX-NEXT:    movq %rcx, %rax
985; AVX-NEXT:    subq %rdx, %rax
986; AVX-NEXT:    shrq %rax
987; AVX-NEXT:    addq %rdx, %rax
988; AVX-NEXT:    shrq $2, %rax
989; AVX-NEXT:    leaq (,%rax,8), %rdx
990; AVX-NEXT:    subq %rax, %rdx
991; AVX-NEXT:    subq %rdx, %rcx
992; AVX-NEXT:    vmovq %rcx, %xmm3
993; AVX-NEXT:    vmovq %xmm0, %rcx
994; AVX-NEXT:    movq %rcx, %rax
995; AVX-NEXT:    mulq %rsi
996; AVX-NEXT:    movq %rcx, %rax
997; AVX-NEXT:    subq %rdx, %rax
998; AVX-NEXT:    shrq %rax
999; AVX-NEXT:    addq %rdx, %rax
1000; AVX-NEXT:    shrq $2, %rax
1001; AVX-NEXT:    leaq (,%rax,8), %rdx
1002; AVX-NEXT:    subq %rax, %rdx
1003; AVX-NEXT:    subq %rdx, %rcx
1004; AVX-NEXT:    vmovq %rcx, %xmm0
1005; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1006; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1007; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1008; AVX-NEXT:    retq
1009  %res = urem <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
1010  ret <8 x i64> %res
1011}
1012
1013define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
1014; AVX-LABEL: test_rem7_16i32:
1015; AVX:       # BB#0:
1016; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1017; AVX-NEXT:    vpextrd $1, %xmm1, %eax
1018; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1019; AVX-NEXT:    shrq $32, %rcx
1020; AVX-NEXT:    movl %eax, %edx
1021; AVX-NEXT:    subl %ecx, %edx
1022; AVX-NEXT:    shrl %edx
1023; AVX-NEXT:    addl %ecx, %edx
1024; AVX-NEXT:    shrl $2, %edx
1025; AVX-NEXT:    leal (,%rdx,8), %ecx
1026; AVX-NEXT:    subl %edx, %ecx
1027; AVX-NEXT:    subl %ecx, %eax
1028; AVX-NEXT:    vmovd %xmm1, %ecx
1029; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
1030; AVX-NEXT:    shrq $32, %rdx
1031; AVX-NEXT:    movl %ecx, %esi
1032; AVX-NEXT:    subl %edx, %esi
1033; AVX-NEXT:    shrl %esi
1034; AVX-NEXT:    addl %edx, %esi
1035; AVX-NEXT:    shrl $2, %esi
1036; AVX-NEXT:    leal (,%rsi,8), %edx
1037; AVX-NEXT:    subl %esi, %edx
1038; AVX-NEXT:    subl %edx, %ecx
1039; AVX-NEXT:    vmovd %ecx, %xmm2
1040; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
1041; AVX-NEXT:    vpextrd $2, %xmm1, %eax
1042; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1043; AVX-NEXT:    shrq $32, %rcx
1044; AVX-NEXT:    movl %eax, %edx
1045; AVX-NEXT:    subl %ecx, %edx
1046; AVX-NEXT:    shrl %edx
1047; AVX-NEXT:    addl %ecx, %edx
1048; AVX-NEXT:    shrl $2, %edx
1049; AVX-NEXT:    leal (,%rdx,8), %ecx
1050; AVX-NEXT:    subl %edx, %ecx
1051; AVX-NEXT:    subl %ecx, %eax
1052; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
1053; AVX-NEXT:    vpextrd $3, %xmm1, %eax
1054; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1055; AVX-NEXT:    shrq $32, %rcx
1056; AVX-NEXT:    movl %eax, %edx
1057; AVX-NEXT:    subl %ecx, %edx
1058; AVX-NEXT:    shrl %edx
1059; AVX-NEXT:    addl %ecx, %edx
1060; AVX-NEXT:    shrl $2, %edx
1061; AVX-NEXT:    leal (,%rdx,8), %ecx
1062; AVX-NEXT:    subl %edx, %ecx
1063; AVX-NEXT:    subl %ecx, %eax
1064; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
1065; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1066; AVX-NEXT:    vpextrd $1, %xmm2, %eax
1067; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1068; AVX-NEXT:    shrq $32, %rcx
1069; AVX-NEXT:    movl %eax, %edx
1070; AVX-NEXT:    subl %ecx, %edx
1071; AVX-NEXT:    shrl %edx
1072; AVX-NEXT:    addl %ecx, %edx
1073; AVX-NEXT:    shrl $2, %edx
1074; AVX-NEXT:    leal (,%rdx,8), %ecx
1075; AVX-NEXT:    subl %edx, %ecx
1076; AVX-NEXT:    subl %ecx, %eax
1077; AVX-NEXT:    vmovd %xmm2, %ecx
1078; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
1079; AVX-NEXT:    shrq $32, %rdx
1080; AVX-NEXT:    movl %ecx, %esi
1081; AVX-NEXT:    subl %edx, %esi
1082; AVX-NEXT:    shrl %esi
1083; AVX-NEXT:    addl %edx, %esi
1084; AVX-NEXT:    shrl $2, %esi
1085; AVX-NEXT:    leal (,%rsi,8), %edx
1086; AVX-NEXT:    subl %esi, %edx
1087; AVX-NEXT:    subl %edx, %ecx
1088; AVX-NEXT:    vmovd %ecx, %xmm3
1089; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
1090; AVX-NEXT:    vpextrd $2, %xmm2, %eax
1091; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1092; AVX-NEXT:    shrq $32, %rcx
1093; AVX-NEXT:    movl %eax, %edx
1094; AVX-NEXT:    subl %ecx, %edx
1095; AVX-NEXT:    shrl %edx
1096; AVX-NEXT:    addl %ecx, %edx
1097; AVX-NEXT:    shrl $2, %edx
1098; AVX-NEXT:    leal (,%rdx,8), %ecx
1099; AVX-NEXT:    subl %edx, %ecx
1100; AVX-NEXT:    subl %ecx, %eax
1101; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
1102; AVX-NEXT:    vpextrd $3, %xmm2, %eax
1103; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1104; AVX-NEXT:    shrq $32, %rcx
1105; AVX-NEXT:    movl %eax, %edx
1106; AVX-NEXT:    subl %ecx, %edx
1107; AVX-NEXT:    shrl %edx
1108; AVX-NEXT:    addl %ecx, %edx
1109; AVX-NEXT:    shrl $2, %edx
1110; AVX-NEXT:    leal (,%rdx,8), %ecx
1111; AVX-NEXT:    subl %edx, %ecx
1112; AVX-NEXT:    subl %ecx, %eax
1113; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
1114; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
1115; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
1116; AVX-NEXT:    vpextrd $1, %xmm2, %eax
1117; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1118; AVX-NEXT:    shrq $32, %rcx
1119; AVX-NEXT:    movl %eax, %edx
1120; AVX-NEXT:    subl %ecx, %edx
1121; AVX-NEXT:    shrl %edx
1122; AVX-NEXT:    addl %ecx, %edx
1123; AVX-NEXT:    shrl $2, %edx
1124; AVX-NEXT:    leal (,%rdx,8), %ecx
1125; AVX-NEXT:    subl %edx, %ecx
1126; AVX-NEXT:    subl %ecx, %eax
1127; AVX-NEXT:    vmovd %xmm2, %ecx
1128; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
1129; AVX-NEXT:    shrq $32, %rdx
1130; AVX-NEXT:    movl %ecx, %esi
1131; AVX-NEXT:    subl %edx, %esi
1132; AVX-NEXT:    shrl %esi
1133; AVX-NEXT:    addl %edx, %esi
1134; AVX-NEXT:    shrl $2, %esi
1135; AVX-NEXT:    leal (,%rsi,8), %edx
1136; AVX-NEXT:    subl %esi, %edx
1137; AVX-NEXT:    subl %edx, %ecx
1138; AVX-NEXT:    vmovd %ecx, %xmm3
1139; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
1140; AVX-NEXT:    vpextrd $2, %xmm2, %eax
1141; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1142; AVX-NEXT:    shrq $32, %rcx
1143; AVX-NEXT:    movl %eax, %edx
1144; AVX-NEXT:    subl %ecx, %edx
1145; AVX-NEXT:    shrl %edx
1146; AVX-NEXT:    addl %ecx, %edx
1147; AVX-NEXT:    shrl $2, %edx
1148; AVX-NEXT:    leal (,%rdx,8), %ecx
1149; AVX-NEXT:    subl %edx, %ecx
1150; AVX-NEXT:    subl %ecx, %eax
1151; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
1152; AVX-NEXT:    vpextrd $3, %xmm2, %eax
1153; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1154; AVX-NEXT:    shrq $32, %rcx
1155; AVX-NEXT:    movl %eax, %edx
1156; AVX-NEXT:    subl %ecx, %edx
1157; AVX-NEXT:    shrl %edx
1158; AVX-NEXT:    addl %ecx, %edx
1159; AVX-NEXT:    shrl $2, %edx
1160; AVX-NEXT:    leal (,%rdx,8), %ecx
1161; AVX-NEXT:    subl %edx, %ecx
1162; AVX-NEXT:    subl %ecx, %eax
1163; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
1164; AVX-NEXT:    vpextrd $1, %xmm0, %eax
1165; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1166; AVX-NEXT:    shrq $32, %rcx
1167; AVX-NEXT:    movl %eax, %edx
1168; AVX-NEXT:    subl %ecx, %edx
1169; AVX-NEXT:    shrl %edx
1170; AVX-NEXT:    addl %ecx, %edx
1171; AVX-NEXT:    shrl $2, %edx
1172; AVX-NEXT:    leal (,%rdx,8), %ecx
1173; AVX-NEXT:    subl %edx, %ecx
1174; AVX-NEXT:    subl %ecx, %eax
1175; AVX-NEXT:    vmovd %xmm0, %ecx
1176; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
1177; AVX-NEXT:    shrq $32, %rdx
1178; AVX-NEXT:    movl %ecx, %esi
1179; AVX-NEXT:    subl %edx, %esi
1180; AVX-NEXT:    shrl %esi
1181; AVX-NEXT:    addl %edx, %esi
1182; AVX-NEXT:    shrl $2, %esi
1183; AVX-NEXT:    leal (,%rsi,8), %edx
1184; AVX-NEXT:    subl %esi, %edx
1185; AVX-NEXT:    subl %edx, %ecx
1186; AVX-NEXT:    vmovd %ecx, %xmm3
1187; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
1188; AVX-NEXT:    vpextrd $2, %xmm0, %eax
1189; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1190; AVX-NEXT:    shrq $32, %rcx
1191; AVX-NEXT:    movl %eax, %edx
1192; AVX-NEXT:    subl %ecx, %edx
1193; AVX-NEXT:    shrl %edx
1194; AVX-NEXT:    addl %ecx, %edx
1195; AVX-NEXT:    shrl $2, %edx
1196; AVX-NEXT:    leal (,%rdx,8), %ecx
1197; AVX-NEXT:    subl %edx, %ecx
1198; AVX-NEXT:    subl %ecx, %eax
1199; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
1200; AVX-NEXT:    vpextrd $3, %xmm0, %eax
1201; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
1202; AVX-NEXT:    shrq $32, %rcx
1203; AVX-NEXT:    movl %eax, %edx
1204; AVX-NEXT:    subl %ecx, %edx
1205; AVX-NEXT:    shrl %edx
1206; AVX-NEXT:    addl %ecx, %edx
1207; AVX-NEXT:    shrl $2, %edx
1208; AVX-NEXT:    leal (,%rdx,8), %ecx
1209; AVX-NEXT:    subl %edx, %ecx
1210; AVX-NEXT:    subl %ecx, %eax
1211; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
1212; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1213; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1214; AVX-NEXT:    retq
1215  %res = urem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
1216  ret <16 x i32> %res
1217}
1218
1219define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
1220; AVX512F-LABEL: test_rem7_32i16:
1221; AVX512F:       # BB#0:
1222; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363]
1223; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm3
1224; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm4
1225; AVX512F-NEXT:    vpsrlw $1, %ymm4, %ymm4
1226; AVX512F-NEXT:    vpaddw %ymm3, %ymm4, %ymm3
1227; AVX512F-NEXT:    vpsrlw $2, %ymm3, %ymm3
1228; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1229; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
1230; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
1231; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm1, %ymm2
1232; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm3
1233; AVX512F-NEXT:    vpsrlw $1, %ymm3, %ymm3
1234; AVX512F-NEXT:    vpaddw %ymm2, %ymm3, %ymm2
1235; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
1236; AVX512F-NEXT:    vpmullw %ymm4, %ymm2, %ymm2
1237; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
1238; AVX512F-NEXT:    retq
1239;
1240; AVX512BW-LABEL: test_rem7_32i16:
1241; AVX512BW:       # BB#0:
1242; AVX512BW-NEXT:    vpmulhuw {{.*}}(%rip), %zmm0, %zmm1
1243; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm2
1244; AVX512BW-NEXT:    vpsrlw $1, %zmm2, %zmm2
1245; AVX512BW-NEXT:    vpaddw %zmm1, %zmm2, %zmm1
1246; AVX512BW-NEXT:    vpsrlw $2, %zmm1, %zmm1
1247; AVX512BW-NEXT:    vpmullw {{.*}}(%rip), %zmm1, %zmm1
1248; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
1249; AVX512BW-NEXT:    retq
1250  %res = urem <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1251  ret <32 x i16> %res
1252}
1253
1254define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
1255; AVX512F-LABEL: test_rem7_64i8:
1256; AVX512F:       # BB#0:
1257; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
1258; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm2
1259; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1260; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
1261; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
1262; AVX512F-NEXT:    vpmullw %ymm2, %ymm4, %ymm4
1263; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm5
1264; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
1265; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1266; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
1267; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
1268; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm3[2,3],ymm5[2,3]
1269; AVX512F-NEXT:    vinserti128 $1, %xmm5, %ymm3, %ymm3
1270; AVX512F-NEXT:    vpackuswb %ymm6, %ymm3, %ymm3
1271; AVX512F-NEXT:    vpsubb %ymm3, %ymm0, %ymm5
1272; AVX512F-NEXT:    vpsrlw $1, %ymm5, %ymm6
1273; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1274; AVX512F-NEXT:    vpand %ymm5, %ymm6, %ymm6
1275; AVX512F-NEXT:    vpaddb %ymm3, %ymm6, %ymm3
1276; AVX512F-NEXT:    vpsrlw $2, %ymm3, %ymm3
1277; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
1278; AVX512F-NEXT:    vpand %ymm6, %ymm3, %ymm7
1279; AVX512F-NEXT:    vpmovsxbw %xmm7, %ymm8
1280; AVX512F-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm3
1281; AVX512F-NEXT:    vpmullw %ymm3, %ymm8, %ymm8
1282; AVX512F-NEXT:    vpmovsxwd %ymm8, %zmm8
1283; AVX512F-NEXT:    vpmovdb %zmm8, %xmm8
1284; AVX512F-NEXT:    vextracti128 $1, %ymm7, %xmm7
1285; AVX512F-NEXT:    vpmovsxbw %xmm7, %ymm7
1286; AVX512F-NEXT:    vpmullw %ymm3, %ymm7, %ymm7
1287; AVX512F-NEXT:    vpmovsxwd %ymm7, %zmm7
1288; AVX512F-NEXT:    vpmovdb %zmm7, %xmm7
1289; AVX512F-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
1290; AVX512F-NEXT:    vpsubb %ymm7, %ymm0, %ymm0
1291; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm7
1292; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
1293; AVX512F-NEXT:    vpmullw %ymm2, %ymm7, %ymm2
1294; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
1295; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1296; AVX512F-NEXT:    vpmullw %ymm4, %ymm7, %ymm4
1297; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
1298; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm7 = ymm4[2,3],ymm2[2,3]
1299; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm2
1300; AVX512F-NEXT:    vpackuswb %ymm7, %ymm2, %ymm2
1301; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm4
1302; AVX512F-NEXT:    vpsrlw $1, %ymm4, %ymm4
1303; AVX512F-NEXT:    vpand %ymm5, %ymm4, %ymm4
1304; AVX512F-NEXT:    vpaddb %ymm2, %ymm4, %ymm2
1305; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
1306; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
1307; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm4
1308; AVX512F-NEXT:    vpmullw %ymm3, %ymm4, %ymm4
1309; AVX512F-NEXT:    vpmovsxwd %ymm4, %zmm4
1310; AVX512F-NEXT:    vpmovdb %zmm4, %xmm4
1311; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
1312; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
1313; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
1314; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
1315; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
1316; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm2
1317; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
1318; AVX512F-NEXT:    retq
1319;
1320; AVX512BW-LABEL: test_rem7_64i8:
1321; AVX512BW:       # BB#0:
1322; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1323; AVX512BW-NEXT:    vpextrb $1, %xmm1, %edx
1324; AVX512BW-NEXT:    imull $37, %edx, %ecx
1325; AVX512BW-NEXT:    shrl $8, %ecx
1326; AVX512BW-NEXT:    movl %edx, %eax
1327; AVX512BW-NEXT:    subb %cl, %al
1328; AVX512BW-NEXT:    shrb %al
1329; AVX512BW-NEXT:    addb %cl, %al
1330; AVX512BW-NEXT:    shrb $2, %al
1331; AVX512BW-NEXT:    movb $7, %cl
1332; AVX512BW-NEXT:    mulb %cl
1333; AVX512BW-NEXT:    subb %al, %dl
1334; AVX512BW-NEXT:    movzbl %dl, %edx
1335; AVX512BW-NEXT:    vpextrb $0, %xmm1, %esi
1336; AVX512BW-NEXT:    imull $37, %esi, %edi
1337; AVX512BW-NEXT:    shrl $8, %edi
1338; AVX512BW-NEXT:    movl %esi, %eax
1339; AVX512BW-NEXT:    subb %dil, %al
1340; AVX512BW-NEXT:    shrb %al
1341; AVX512BW-NEXT:    addb %dil, %al
1342; AVX512BW-NEXT:    shrb $2, %al
1343; AVX512BW-NEXT:    mulb %cl
1344; AVX512BW-NEXT:    subb %al, %sil
1345; AVX512BW-NEXT:    movzbl %sil, %eax
1346; AVX512BW-NEXT:    vmovd %eax, %xmm2
1347; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
1348; AVX512BW-NEXT:    vpextrb $2, %xmm1, %edx
1349; AVX512BW-NEXT:    imull $37, %edx, %esi
1350; AVX512BW-NEXT:    shrl $8, %esi
1351; AVX512BW-NEXT:    movl %edx, %eax
1352; AVX512BW-NEXT:    subb %sil, %al
1353; AVX512BW-NEXT:    shrb %al
1354; AVX512BW-NEXT:    addb %sil, %al
1355; AVX512BW-NEXT:    shrb $2, %al
1356; AVX512BW-NEXT:    mulb %cl
1357; AVX512BW-NEXT:    subb %al, %dl
1358; AVX512BW-NEXT:    movzbl %dl, %eax
1359; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1360; AVX512BW-NEXT:    vpextrb $3, %xmm1, %edx
1361; AVX512BW-NEXT:    imull $37, %edx, %esi
1362; AVX512BW-NEXT:    shrl $8, %esi
1363; AVX512BW-NEXT:    movl %edx, %eax
1364; AVX512BW-NEXT:    subb %sil, %al
1365; AVX512BW-NEXT:    shrb %al
1366; AVX512BW-NEXT:    addb %sil, %al
1367; AVX512BW-NEXT:    shrb $2, %al
1368; AVX512BW-NEXT:    mulb %cl
1369; AVX512BW-NEXT:    subb %al, %dl
1370; AVX512BW-NEXT:    movzbl %dl, %eax
1371; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1372; AVX512BW-NEXT:    vpextrb $4, %xmm1, %edx
1373; AVX512BW-NEXT:    imull $37, %edx, %esi
1374; AVX512BW-NEXT:    shrl $8, %esi
1375; AVX512BW-NEXT:    movl %edx, %eax
1376; AVX512BW-NEXT:    subb %sil, %al
1377; AVX512BW-NEXT:    shrb %al
1378; AVX512BW-NEXT:    addb %sil, %al
1379; AVX512BW-NEXT:    shrb $2, %al
1380; AVX512BW-NEXT:    mulb %cl
1381; AVX512BW-NEXT:    subb %al, %dl
1382; AVX512BW-NEXT:    movzbl %dl, %eax
1383; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1384; AVX512BW-NEXT:    vpextrb $5, %xmm1, %edx
1385; AVX512BW-NEXT:    imull $37, %edx, %esi
1386; AVX512BW-NEXT:    shrl $8, %esi
1387; AVX512BW-NEXT:    movl %edx, %eax
1388; AVX512BW-NEXT:    subb %sil, %al
1389; AVX512BW-NEXT:    shrb %al
1390; AVX512BW-NEXT:    addb %sil, %al
1391; AVX512BW-NEXT:    shrb $2, %al
1392; AVX512BW-NEXT:    mulb %cl
1393; AVX512BW-NEXT:    subb %al, %dl
1394; AVX512BW-NEXT:    movzbl %dl, %eax
1395; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1396; AVX512BW-NEXT:    vpextrb $6, %xmm1, %edx
1397; AVX512BW-NEXT:    imull $37, %edx, %esi
1398; AVX512BW-NEXT:    shrl $8, %esi
1399; AVX512BW-NEXT:    movl %edx, %eax
1400; AVX512BW-NEXT:    subb %sil, %al
1401; AVX512BW-NEXT:    shrb %al
1402; AVX512BW-NEXT:    addb %sil, %al
1403; AVX512BW-NEXT:    shrb $2, %al
1404; AVX512BW-NEXT:    mulb %cl
1405; AVX512BW-NEXT:    subb %al, %dl
1406; AVX512BW-NEXT:    movzbl %dl, %eax
1407; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1408; AVX512BW-NEXT:    vpextrb $7, %xmm1, %edx
1409; AVX512BW-NEXT:    imull $37, %edx, %esi
1410; AVX512BW-NEXT:    shrl $8, %esi
1411; AVX512BW-NEXT:    movl %edx, %eax
1412; AVX512BW-NEXT:    subb %sil, %al
1413; AVX512BW-NEXT:    shrb %al
1414; AVX512BW-NEXT:    addb %sil, %al
1415; AVX512BW-NEXT:    shrb $2, %al
1416; AVX512BW-NEXT:    mulb %cl
1417; AVX512BW-NEXT:    subb %al, %dl
1418; AVX512BW-NEXT:    movzbl %dl, %eax
1419; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1420; AVX512BW-NEXT:    vpextrb $8, %xmm1, %edx
1421; AVX512BW-NEXT:    imull $37, %edx, %esi
1422; AVX512BW-NEXT:    shrl $8, %esi
1423; AVX512BW-NEXT:    movl %edx, %eax
1424; AVX512BW-NEXT:    subb %sil, %al
1425; AVX512BW-NEXT:    shrb %al
1426; AVX512BW-NEXT:    addb %sil, %al
1427; AVX512BW-NEXT:    shrb $2, %al
1428; AVX512BW-NEXT:    mulb %cl
1429; AVX512BW-NEXT:    subb %al, %dl
1430; AVX512BW-NEXT:    movzbl %dl, %eax
1431; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1432; AVX512BW-NEXT:    vpextrb $9, %xmm1, %edx
1433; AVX512BW-NEXT:    imull $37, %edx, %esi
1434; AVX512BW-NEXT:    shrl $8, %esi
1435; AVX512BW-NEXT:    movl %edx, %eax
1436; AVX512BW-NEXT:    subb %sil, %al
1437; AVX512BW-NEXT:    shrb %al
1438; AVX512BW-NEXT:    addb %sil, %al
1439; AVX512BW-NEXT:    shrb $2, %al
1440; AVX512BW-NEXT:    mulb %cl
1441; AVX512BW-NEXT:    subb %al, %dl
1442; AVX512BW-NEXT:    movzbl %dl, %eax
1443; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1444; AVX512BW-NEXT:    vpextrb $10, %xmm1, %edx
1445; AVX512BW-NEXT:    imull $37, %edx, %esi
1446; AVX512BW-NEXT:    shrl $8, %esi
1447; AVX512BW-NEXT:    movl %edx, %eax
1448; AVX512BW-NEXT:    subb %sil, %al
1449; AVX512BW-NEXT:    shrb %al
1450; AVX512BW-NEXT:    addb %sil, %al
1451; AVX512BW-NEXT:    shrb $2, %al
1452; AVX512BW-NEXT:    mulb %cl
1453; AVX512BW-NEXT:    subb %al, %dl
1454; AVX512BW-NEXT:    movzbl %dl, %eax
1455; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1456; AVX512BW-NEXT:    vpextrb $11, %xmm1, %edx
1457; AVX512BW-NEXT:    imull $37, %edx, %esi
1458; AVX512BW-NEXT:    shrl $8, %esi
1459; AVX512BW-NEXT:    movl %edx, %eax
1460; AVX512BW-NEXT:    subb %sil, %al
1461; AVX512BW-NEXT:    shrb %al
1462; AVX512BW-NEXT:    addb %sil, %al
1463; AVX512BW-NEXT:    shrb $2, %al
1464; AVX512BW-NEXT:    mulb %cl
1465; AVX512BW-NEXT:    subb %al, %dl
1466; AVX512BW-NEXT:    movzbl %dl, %eax
1467; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1468; AVX512BW-NEXT:    vpextrb $12, %xmm1, %edx
1469; AVX512BW-NEXT:    imull $37, %edx, %esi
1470; AVX512BW-NEXT:    shrl $8, %esi
1471; AVX512BW-NEXT:    movl %edx, %eax
1472; AVX512BW-NEXT:    subb %sil, %al
1473; AVX512BW-NEXT:    shrb %al
1474; AVX512BW-NEXT:    addb %sil, %al
1475; AVX512BW-NEXT:    shrb $2, %al
1476; AVX512BW-NEXT:    mulb %cl
1477; AVX512BW-NEXT:    subb %al, %dl
1478; AVX512BW-NEXT:    movzbl %dl, %eax
1479; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1480; AVX512BW-NEXT:    vpextrb $13, %xmm1, %edx
1481; AVX512BW-NEXT:    imull $37, %edx, %esi
1482; AVX512BW-NEXT:    shrl $8, %esi
1483; AVX512BW-NEXT:    movl %edx, %eax
1484; AVX512BW-NEXT:    subb %sil, %al
1485; AVX512BW-NEXT:    shrb %al
1486; AVX512BW-NEXT:    addb %sil, %al
1487; AVX512BW-NEXT:    shrb $2, %al
1488; AVX512BW-NEXT:    mulb %cl
1489; AVX512BW-NEXT:    subb %al, %dl
1490; AVX512BW-NEXT:    movzbl %dl, %eax
1491; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1492; AVX512BW-NEXT:    vpextrb $14, %xmm1, %edx
1493; AVX512BW-NEXT:    imull $37, %edx, %esi
1494; AVX512BW-NEXT:    shrl $8, %esi
1495; AVX512BW-NEXT:    movl %edx, %eax
1496; AVX512BW-NEXT:    subb %sil, %al
1497; AVX512BW-NEXT:    shrb %al
1498; AVX512BW-NEXT:    addb %sil, %al
1499; AVX512BW-NEXT:    shrb $2, %al
1500; AVX512BW-NEXT:    mulb %cl
1501; AVX512BW-NEXT:    subb %al, %dl
1502; AVX512BW-NEXT:    movzbl %dl, %eax
1503; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1504; AVX512BW-NEXT:    vpextrb $15, %xmm1, %edx
1505; AVX512BW-NEXT:    imull $37, %edx, %esi
1506; AVX512BW-NEXT:    shrl $8, %esi
1507; AVX512BW-NEXT:    movl %edx, %eax
1508; AVX512BW-NEXT:    subb %sil, %al
1509; AVX512BW-NEXT:    shrb %al
1510; AVX512BW-NEXT:    addb %sil, %al
1511; AVX512BW-NEXT:    shrb $2, %al
1512; AVX512BW-NEXT:    mulb %cl
1513; AVX512BW-NEXT:    subb %al, %dl
1514; AVX512BW-NEXT:    movzbl %dl, %eax
1515; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
1516; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1517; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
1518; AVX512BW-NEXT:    imull $37, %edx, %esi
1519; AVX512BW-NEXT:    shrl $8, %esi
1520; AVX512BW-NEXT:    movl %edx, %eax
1521; AVX512BW-NEXT:    subb %sil, %al
1522; AVX512BW-NEXT:    shrb %al
1523; AVX512BW-NEXT:    addb %sil, %al
1524; AVX512BW-NEXT:    shrb $2, %al
1525; AVX512BW-NEXT:    mulb %cl
1526; AVX512BW-NEXT:    subb %al, %dl
1527; AVX512BW-NEXT:    movzbl %dl, %edx
1528; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
1529; AVX512BW-NEXT:    imull $37, %esi, %edi
1530; AVX512BW-NEXT:    shrl $8, %edi
1531; AVX512BW-NEXT:    movl %esi, %eax
1532; AVX512BW-NEXT:    subb %dil, %al
1533; AVX512BW-NEXT:    shrb %al
1534; AVX512BW-NEXT:    addb %dil, %al
1535; AVX512BW-NEXT:    shrb $2, %al
1536; AVX512BW-NEXT:    mulb %cl
1537; AVX512BW-NEXT:    subb %al, %sil
1538; AVX512BW-NEXT:    movzbl %sil, %eax
1539; AVX512BW-NEXT:    vmovd %eax, %xmm3
1540; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
1541; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
1542; AVX512BW-NEXT:    imull $37, %edx, %esi
1543; AVX512BW-NEXT:    shrl $8, %esi
1544; AVX512BW-NEXT:    movl %edx, %eax
1545; AVX512BW-NEXT:    subb %sil, %al
1546; AVX512BW-NEXT:    shrb %al
1547; AVX512BW-NEXT:    addb %sil, %al
1548; AVX512BW-NEXT:    shrb $2, %al
1549; AVX512BW-NEXT:    mulb %cl
1550; AVX512BW-NEXT:    subb %al, %dl
1551; AVX512BW-NEXT:    movzbl %dl, %eax
1552; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1553; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
1554; AVX512BW-NEXT:    imull $37, %edx, %esi
1555; AVX512BW-NEXT:    shrl $8, %esi
1556; AVX512BW-NEXT:    movl %edx, %eax
1557; AVX512BW-NEXT:    subb %sil, %al
1558; AVX512BW-NEXT:    shrb %al
1559; AVX512BW-NEXT:    addb %sil, %al
1560; AVX512BW-NEXT:    shrb $2, %al
1561; AVX512BW-NEXT:    mulb %cl
1562; AVX512BW-NEXT:    subb %al, %dl
1563; AVX512BW-NEXT:    movzbl %dl, %eax
1564; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1565; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
1566; AVX512BW-NEXT:    imull $37, %edx, %esi
1567; AVX512BW-NEXT:    shrl $8, %esi
1568; AVX512BW-NEXT:    movl %edx, %eax
1569; AVX512BW-NEXT:    subb %sil, %al
1570; AVX512BW-NEXT:    shrb %al
1571; AVX512BW-NEXT:    addb %sil, %al
1572; AVX512BW-NEXT:    shrb $2, %al
1573; AVX512BW-NEXT:    mulb %cl
1574; AVX512BW-NEXT:    subb %al, %dl
1575; AVX512BW-NEXT:    movzbl %dl, %eax
1576; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
1577; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
1578; AVX512BW-NEXT:    imull $37, %edx, %esi
1579; AVX512BW-NEXT:    shrl $8, %esi
1580; AVX512BW-NEXT:    movl %edx, %eax
1581; AVX512BW-NEXT:    subb %sil, %al
1582; AVX512BW-NEXT:    shrb %al
1583; AVX512BW-NEXT:    addb %sil, %al
1584; AVX512BW-NEXT:    shrb $2, %al
1585; AVX512BW-NEXT:    mulb %cl
1586; AVX512BW-NEXT:    subb %al, %dl
1587; AVX512BW-NEXT:    movzbl %dl, %eax
1588; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
1589; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
1590; AVX512BW-NEXT:    imull $37, %edx, %esi
1591; AVX512BW-NEXT:    shrl $8, %esi
1592; AVX512BW-NEXT:    movl %edx, %eax
1593; AVX512BW-NEXT:    subb %sil, %al
1594; AVX512BW-NEXT:    shrb %al
1595; AVX512BW-NEXT:    addb %sil, %al
1596; AVX512BW-NEXT:    shrb $2, %al
1597; AVX512BW-NEXT:    mulb %cl
1598; AVX512BW-NEXT:    subb %al, %dl
1599; AVX512BW-NEXT:    movzbl %dl, %eax
1600; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
1601; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
1602; AVX512BW-NEXT:    imull $37, %edx, %esi
1603; AVX512BW-NEXT:    shrl $8, %esi
1604; AVX512BW-NEXT:    movl %edx, %eax
1605; AVX512BW-NEXT:    subb %sil, %al
1606; AVX512BW-NEXT:    shrb %al
1607; AVX512BW-NEXT:    addb %sil, %al
1608; AVX512BW-NEXT:    shrb $2, %al
1609; AVX512BW-NEXT:    mulb %cl
1610; AVX512BW-NEXT:    subb %al, %dl
1611; AVX512BW-NEXT:    movzbl %dl, %eax
1612; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
1613; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
1614; AVX512BW-NEXT:    imull $37, %edx, %esi
1615; AVX512BW-NEXT:    shrl $8, %esi
1616; AVX512BW-NEXT:    movl %edx, %eax
1617; AVX512BW-NEXT:    subb %sil, %al
1618; AVX512BW-NEXT:    shrb %al
1619; AVX512BW-NEXT:    addb %sil, %al
1620; AVX512BW-NEXT:    shrb $2, %al
1621; AVX512BW-NEXT:    mulb %cl
1622; AVX512BW-NEXT:    subb %al, %dl
1623; AVX512BW-NEXT:    movzbl %dl, %eax
1624; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
1625; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
1626; AVX512BW-NEXT:    imull $37, %edx, %esi
1627; AVX512BW-NEXT:    shrl $8, %esi
1628; AVX512BW-NEXT:    movl %edx, %eax
1629; AVX512BW-NEXT:    subb %sil, %al
1630; AVX512BW-NEXT:    shrb %al
1631; AVX512BW-NEXT:    addb %sil, %al
1632; AVX512BW-NEXT:    shrb $2, %al
1633; AVX512BW-NEXT:    mulb %cl
1634; AVX512BW-NEXT:    subb %al, %dl
1635; AVX512BW-NEXT:    movzbl %dl, %eax
1636; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
1637; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
1638; AVX512BW-NEXT:    imull $37, %edx, %esi
1639; AVX512BW-NEXT:    shrl $8, %esi
1640; AVX512BW-NEXT:    movl %edx, %eax
1641; AVX512BW-NEXT:    subb %sil, %al
1642; AVX512BW-NEXT:    shrb %al
1643; AVX512BW-NEXT:    addb %sil, %al
1644; AVX512BW-NEXT:    shrb $2, %al
1645; AVX512BW-NEXT:    mulb %cl
1646; AVX512BW-NEXT:    subb %al, %dl
1647; AVX512BW-NEXT:    movzbl %dl, %eax
1648; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
1649; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
1650; AVX512BW-NEXT:    imull $37, %edx, %esi
1651; AVX512BW-NEXT:    shrl $8, %esi
1652; AVX512BW-NEXT:    movl %edx, %eax
1653; AVX512BW-NEXT:    subb %sil, %al
1654; AVX512BW-NEXT:    shrb %al
1655; AVX512BW-NEXT:    addb %sil, %al
1656; AVX512BW-NEXT:    shrb $2, %al
1657; AVX512BW-NEXT:    mulb %cl
1658; AVX512BW-NEXT:    subb %al, %dl
1659; AVX512BW-NEXT:    movzbl %dl, %eax
1660; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
1661; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
1662; AVX512BW-NEXT:    imull $37, %edx, %esi
1663; AVX512BW-NEXT:    shrl $8, %esi
1664; AVX512BW-NEXT:    movl %edx, %eax
1665; AVX512BW-NEXT:    subb %sil, %al
1666; AVX512BW-NEXT:    shrb %al
1667; AVX512BW-NEXT:    addb %sil, %al
1668; AVX512BW-NEXT:    shrb $2, %al
1669; AVX512BW-NEXT:    mulb %cl
1670; AVX512BW-NEXT:    subb %al, %dl
1671; AVX512BW-NEXT:    movzbl %dl, %eax
1672; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
1673; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
1674; AVX512BW-NEXT:    imull $37, %edx, %esi
1675; AVX512BW-NEXT:    shrl $8, %esi
1676; AVX512BW-NEXT:    movl %edx, %eax
1677; AVX512BW-NEXT:    subb %sil, %al
1678; AVX512BW-NEXT:    shrb %al
1679; AVX512BW-NEXT:    addb %sil, %al
1680; AVX512BW-NEXT:    shrb $2, %al
1681; AVX512BW-NEXT:    mulb %cl
1682; AVX512BW-NEXT:    subb %al, %dl
1683; AVX512BW-NEXT:    movzbl %dl, %eax
1684; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
1685; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
1686; AVX512BW-NEXT:    imull $37, %edx, %esi
1687; AVX512BW-NEXT:    shrl $8, %esi
1688; AVX512BW-NEXT:    movl %edx, %eax
1689; AVX512BW-NEXT:    subb %sil, %al
1690; AVX512BW-NEXT:    shrb %al
1691; AVX512BW-NEXT:    addb %sil, %al
1692; AVX512BW-NEXT:    shrb $2, %al
1693; AVX512BW-NEXT:    mulb %cl
1694; AVX512BW-NEXT:    subb %al, %dl
1695; AVX512BW-NEXT:    movzbl %dl, %eax
1696; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
1697; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
1698; AVX512BW-NEXT:    imull $37, %edx, %esi
1699; AVX512BW-NEXT:    shrl $8, %esi
1700; AVX512BW-NEXT:    movl %edx, %eax
1701; AVX512BW-NEXT:    subb %sil, %al
1702; AVX512BW-NEXT:    shrb %al
1703; AVX512BW-NEXT:    addb %sil, %al
1704; AVX512BW-NEXT:    shrb $2, %al
1705; AVX512BW-NEXT:    mulb %cl
1706; AVX512BW-NEXT:    subb %al, %dl
1707; AVX512BW-NEXT:    movzbl %dl, %eax
1708; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
1709; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
1710; AVX512BW-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
1711; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
1712; AVX512BW-NEXT:    imull $37, %edx, %esi
1713; AVX512BW-NEXT:    shrl $8, %esi
1714; AVX512BW-NEXT:    movl %edx, %eax
1715; AVX512BW-NEXT:    subb %sil, %al
1716; AVX512BW-NEXT:    shrb %al
1717; AVX512BW-NEXT:    addb %sil, %al
1718; AVX512BW-NEXT:    shrb $2, %al
1719; AVX512BW-NEXT:    mulb %cl
1720; AVX512BW-NEXT:    subb %al, %dl
1721; AVX512BW-NEXT:    movzbl %dl, %edx
1722; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
1723; AVX512BW-NEXT:    imull $37, %esi, %edi
1724; AVX512BW-NEXT:    shrl $8, %edi
1725; AVX512BW-NEXT:    movl %esi, %eax
1726; AVX512BW-NEXT:    subb %dil, %al
1727; AVX512BW-NEXT:    shrb %al
1728; AVX512BW-NEXT:    addb %dil, %al
1729; AVX512BW-NEXT:    shrb $2, %al
1730; AVX512BW-NEXT:    mulb %cl
1731; AVX512BW-NEXT:    subb %al, %sil
1732; AVX512BW-NEXT:    movzbl %sil, %eax
1733; AVX512BW-NEXT:    vmovd %eax, %xmm3
1734; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
1735; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
1736; AVX512BW-NEXT:    imull $37, %edx, %esi
1737; AVX512BW-NEXT:    shrl $8, %esi
1738; AVX512BW-NEXT:    movl %edx, %eax
1739; AVX512BW-NEXT:    subb %sil, %al
1740; AVX512BW-NEXT:    shrb %al
1741; AVX512BW-NEXT:    addb %sil, %al
1742; AVX512BW-NEXT:    shrb $2, %al
1743; AVX512BW-NEXT:    mulb %cl
1744; AVX512BW-NEXT:    subb %al, %dl
1745; AVX512BW-NEXT:    movzbl %dl, %eax
1746; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1747; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
1748; AVX512BW-NEXT:    imull $37, %edx, %esi
1749; AVX512BW-NEXT:    shrl $8, %esi
1750; AVX512BW-NEXT:    movl %edx, %eax
1751; AVX512BW-NEXT:    subb %sil, %al
1752; AVX512BW-NEXT:    shrb %al
1753; AVX512BW-NEXT:    addb %sil, %al
1754; AVX512BW-NEXT:    shrb $2, %al
1755; AVX512BW-NEXT:    mulb %cl
1756; AVX512BW-NEXT:    subb %al, %dl
1757; AVX512BW-NEXT:    movzbl %dl, %eax
1758; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1759; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
1760; AVX512BW-NEXT:    imull $37, %edx, %esi
1761; AVX512BW-NEXT:    shrl $8, %esi
1762; AVX512BW-NEXT:    movl %edx, %eax
1763; AVX512BW-NEXT:    subb %sil, %al
1764; AVX512BW-NEXT:    shrb %al
1765; AVX512BW-NEXT:    addb %sil, %al
1766; AVX512BW-NEXT:    shrb $2, %al
1767; AVX512BW-NEXT:    mulb %cl
1768; AVX512BW-NEXT:    subb %al, %dl
1769; AVX512BW-NEXT:    movzbl %dl, %eax
1770; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
1771; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
1772; AVX512BW-NEXT:    imull $37, %edx, %esi
1773; AVX512BW-NEXT:    shrl $8, %esi
1774; AVX512BW-NEXT:    movl %edx, %eax
1775; AVX512BW-NEXT:    subb %sil, %al
1776; AVX512BW-NEXT:    shrb %al
1777; AVX512BW-NEXT:    addb %sil, %al
1778; AVX512BW-NEXT:    shrb $2, %al
1779; AVX512BW-NEXT:    mulb %cl
1780; AVX512BW-NEXT:    subb %al, %dl
1781; AVX512BW-NEXT:    movzbl %dl, %eax
1782; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
1783; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
1784; AVX512BW-NEXT:    imull $37, %edx, %esi
1785; AVX512BW-NEXT:    shrl $8, %esi
1786; AVX512BW-NEXT:    movl %edx, %eax
1787; AVX512BW-NEXT:    subb %sil, %al
1788; AVX512BW-NEXT:    shrb %al
1789; AVX512BW-NEXT:    addb %sil, %al
1790; AVX512BW-NEXT:    shrb $2, %al
1791; AVX512BW-NEXT:    mulb %cl
1792; AVX512BW-NEXT:    subb %al, %dl
1793; AVX512BW-NEXT:    movzbl %dl, %eax
1794; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
1795; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
1796; AVX512BW-NEXT:    imull $37, %edx, %esi
1797; AVX512BW-NEXT:    shrl $8, %esi
1798; AVX512BW-NEXT:    movl %edx, %eax
1799; AVX512BW-NEXT:    subb %sil, %al
1800; AVX512BW-NEXT:    shrb %al
1801; AVX512BW-NEXT:    addb %sil, %al
1802; AVX512BW-NEXT:    shrb $2, %al
1803; AVX512BW-NEXT:    mulb %cl
1804; AVX512BW-NEXT:    subb %al, %dl
1805; AVX512BW-NEXT:    movzbl %dl, %eax
1806; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
1807; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
1808; AVX512BW-NEXT:    imull $37, %edx, %esi
1809; AVX512BW-NEXT:    shrl $8, %esi
1810; AVX512BW-NEXT:    movl %edx, %eax
1811; AVX512BW-NEXT:    subb %sil, %al
1812; AVX512BW-NEXT:    shrb %al
1813; AVX512BW-NEXT:    addb %sil, %al
1814; AVX512BW-NEXT:    shrb $2, %al
1815; AVX512BW-NEXT:    mulb %cl
1816; AVX512BW-NEXT:    subb %al, %dl
1817; AVX512BW-NEXT:    movzbl %dl, %eax
1818; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
1819; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
1820; AVX512BW-NEXT:    imull $37, %edx, %esi
1821; AVX512BW-NEXT:    shrl $8, %esi
1822; AVX512BW-NEXT:    movl %edx, %eax
1823; AVX512BW-NEXT:    subb %sil, %al
1824; AVX512BW-NEXT:    shrb %al
1825; AVX512BW-NEXT:    addb %sil, %al
1826; AVX512BW-NEXT:    shrb $2, %al
1827; AVX512BW-NEXT:    mulb %cl
1828; AVX512BW-NEXT:    subb %al, %dl
1829; AVX512BW-NEXT:    movzbl %dl, %eax
1830; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
1831; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
1832; AVX512BW-NEXT:    imull $37, %edx, %esi
1833; AVX512BW-NEXT:    shrl $8, %esi
1834; AVX512BW-NEXT:    movl %edx, %eax
1835; AVX512BW-NEXT:    subb %sil, %al
1836; AVX512BW-NEXT:    shrb %al
1837; AVX512BW-NEXT:    addb %sil, %al
1838; AVX512BW-NEXT:    shrb $2, %al
1839; AVX512BW-NEXT:    mulb %cl
1840; AVX512BW-NEXT:    subb %al, %dl
1841; AVX512BW-NEXT:    movzbl %dl, %eax
1842; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
1843; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
1844; AVX512BW-NEXT:    imull $37, %edx, %esi
1845; AVX512BW-NEXT:    shrl $8, %esi
1846; AVX512BW-NEXT:    movl %edx, %eax
1847; AVX512BW-NEXT:    subb %sil, %al
1848; AVX512BW-NEXT:    shrb %al
1849; AVX512BW-NEXT:    addb %sil, %al
1850; AVX512BW-NEXT:    shrb $2, %al
1851; AVX512BW-NEXT:    mulb %cl
1852; AVX512BW-NEXT:    subb %al, %dl
1853; AVX512BW-NEXT:    movzbl %dl, %eax
1854; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
1855; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
1856; AVX512BW-NEXT:    imull $37, %edx, %esi
1857; AVX512BW-NEXT:    shrl $8, %esi
1858; AVX512BW-NEXT:    movl %edx, %eax
1859; AVX512BW-NEXT:    subb %sil, %al
1860; AVX512BW-NEXT:    shrb %al
1861; AVX512BW-NEXT:    addb %sil, %al
1862; AVX512BW-NEXT:    shrb $2, %al
1863; AVX512BW-NEXT:    mulb %cl
1864; AVX512BW-NEXT:    subb %al, %dl
1865; AVX512BW-NEXT:    movzbl %dl, %eax
1866; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
1867; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
1868; AVX512BW-NEXT:    imull $37, %edx, %esi
1869; AVX512BW-NEXT:    shrl $8, %esi
1870; AVX512BW-NEXT:    movl %edx, %eax
1871; AVX512BW-NEXT:    subb %sil, %al
1872; AVX512BW-NEXT:    shrb %al
1873; AVX512BW-NEXT:    addb %sil, %al
1874; AVX512BW-NEXT:    shrb $2, %al
1875; AVX512BW-NEXT:    mulb %cl
1876; AVX512BW-NEXT:    subb %al, %dl
1877; AVX512BW-NEXT:    movzbl %dl, %eax
1878; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
1879; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
1880; AVX512BW-NEXT:    imull $37, %edx, %esi
1881; AVX512BW-NEXT:    shrl $8, %esi
1882; AVX512BW-NEXT:    movl %edx, %eax
1883; AVX512BW-NEXT:    subb %sil, %al
1884; AVX512BW-NEXT:    shrb %al
1885; AVX512BW-NEXT:    addb %sil, %al
1886; AVX512BW-NEXT:    shrb $2, %al
1887; AVX512BW-NEXT:    mulb %cl
1888; AVX512BW-NEXT:    subb %al, %dl
1889; AVX512BW-NEXT:    movzbl %dl, %eax
1890; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
1891; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
1892; AVX512BW-NEXT:    imull $37, %edx, %esi
1893; AVX512BW-NEXT:    shrl $8, %esi
1894; AVX512BW-NEXT:    movl %edx, %eax
1895; AVX512BW-NEXT:    subb %sil, %al
1896; AVX512BW-NEXT:    shrb %al
1897; AVX512BW-NEXT:    addb %sil, %al
1898; AVX512BW-NEXT:    shrb $2, %al
1899; AVX512BW-NEXT:    mulb %cl
1900; AVX512BW-NEXT:    subb %al, %dl
1901; AVX512BW-NEXT:    movzbl %dl, %eax
1902; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
1903; AVX512BW-NEXT:    vpextrb $1, %xmm0, %edx
1904; AVX512BW-NEXT:    imull $37, %edx, %esi
1905; AVX512BW-NEXT:    shrl $8, %esi
1906; AVX512BW-NEXT:    movl %edx, %eax
1907; AVX512BW-NEXT:    subb %sil, %al
1908; AVX512BW-NEXT:    shrb %al
1909; AVX512BW-NEXT:    addb %sil, %al
1910; AVX512BW-NEXT:    shrb $2, %al
1911; AVX512BW-NEXT:    mulb %cl
1912; AVX512BW-NEXT:    subb %al, %dl
1913; AVX512BW-NEXT:    movzbl %dl, %edx
1914; AVX512BW-NEXT:    vpextrb $0, %xmm0, %esi
1915; AVX512BW-NEXT:    imull $37, %esi, %edi
1916; AVX512BW-NEXT:    shrl $8, %edi
1917; AVX512BW-NEXT:    movl %esi, %eax
1918; AVX512BW-NEXT:    subb %dil, %al
1919; AVX512BW-NEXT:    shrb %al
1920; AVX512BW-NEXT:    addb %dil, %al
1921; AVX512BW-NEXT:    shrb $2, %al
1922; AVX512BW-NEXT:    mulb %cl
1923; AVX512BW-NEXT:    subb %al, %sil
1924; AVX512BW-NEXT:    movzbl %sil, %eax
1925; AVX512BW-NEXT:    vmovd %eax, %xmm3
1926; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
1927; AVX512BW-NEXT:    vpextrb $2, %xmm0, %edx
1928; AVX512BW-NEXT:    imull $37, %edx, %esi
1929; AVX512BW-NEXT:    shrl $8, %esi
1930; AVX512BW-NEXT:    movl %edx, %eax
1931; AVX512BW-NEXT:    subb %sil, %al
1932; AVX512BW-NEXT:    shrb %al
1933; AVX512BW-NEXT:    addb %sil, %al
1934; AVX512BW-NEXT:    shrb $2, %al
1935; AVX512BW-NEXT:    mulb %cl
1936; AVX512BW-NEXT:    subb %al, %dl
1937; AVX512BW-NEXT:    movzbl %dl, %eax
1938; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1939; AVX512BW-NEXT:    vpextrb $3, %xmm0, %edx
1940; AVX512BW-NEXT:    imull $37, %edx, %esi
1941; AVX512BW-NEXT:    shrl $8, %esi
1942; AVX512BW-NEXT:    movl %edx, %eax
1943; AVX512BW-NEXT:    subb %sil, %al
1944; AVX512BW-NEXT:    shrb %al
1945; AVX512BW-NEXT:    addb %sil, %al
1946; AVX512BW-NEXT:    shrb $2, %al
1947; AVX512BW-NEXT:    mulb %cl
1948; AVX512BW-NEXT:    subb %al, %dl
1949; AVX512BW-NEXT:    movzbl %dl, %eax
1950; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1951; AVX512BW-NEXT:    vpextrb $4, %xmm0, %edx
1952; AVX512BW-NEXT:    imull $37, %edx, %esi
1953; AVX512BW-NEXT:    shrl $8, %esi
1954; AVX512BW-NEXT:    movl %edx, %eax
1955; AVX512BW-NEXT:    subb %sil, %al
1956; AVX512BW-NEXT:    shrb %al
1957; AVX512BW-NEXT:    addb %sil, %al
1958; AVX512BW-NEXT:    shrb $2, %al
1959; AVX512BW-NEXT:    mulb %cl
1960; AVX512BW-NEXT:    subb %al, %dl
1961; AVX512BW-NEXT:    movzbl %dl, %eax
1962; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
1963; AVX512BW-NEXT:    vpextrb $5, %xmm0, %edx
1964; AVX512BW-NEXT:    imull $37, %edx, %esi
1965; AVX512BW-NEXT:    shrl $8, %esi
1966; AVX512BW-NEXT:    movl %edx, %eax
1967; AVX512BW-NEXT:    subb %sil, %al
1968; AVX512BW-NEXT:    shrb %al
1969; AVX512BW-NEXT:    addb %sil, %al
1970; AVX512BW-NEXT:    shrb $2, %al
1971; AVX512BW-NEXT:    mulb %cl
1972; AVX512BW-NEXT:    subb %al, %dl
1973; AVX512BW-NEXT:    movzbl %dl, %eax
1974; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
1975; AVX512BW-NEXT:    vpextrb $6, %xmm0, %edx
1976; AVX512BW-NEXT:    imull $37, %edx, %esi
1977; AVX512BW-NEXT:    shrl $8, %esi
1978; AVX512BW-NEXT:    movl %edx, %eax
1979; AVX512BW-NEXT:    subb %sil, %al
1980; AVX512BW-NEXT:    shrb %al
1981; AVX512BW-NEXT:    addb %sil, %al
1982; AVX512BW-NEXT:    shrb $2, %al
1983; AVX512BW-NEXT:    mulb %cl
1984; AVX512BW-NEXT:    subb %al, %dl
1985; AVX512BW-NEXT:    movzbl %dl, %eax
1986; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
1987; AVX512BW-NEXT:    vpextrb $7, %xmm0, %edx
1988; AVX512BW-NEXT:    imull $37, %edx, %esi
1989; AVX512BW-NEXT:    shrl $8, %esi
1990; AVX512BW-NEXT:    movl %edx, %eax
1991; AVX512BW-NEXT:    subb %sil, %al
1992; AVX512BW-NEXT:    shrb %al
1993; AVX512BW-NEXT:    addb %sil, %al
1994; AVX512BW-NEXT:    shrb $2, %al
1995; AVX512BW-NEXT:    mulb %cl
1996; AVX512BW-NEXT:    subb %al, %dl
1997; AVX512BW-NEXT:    movzbl %dl, %eax
1998; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
1999; AVX512BW-NEXT:    vpextrb $8, %xmm0, %edx
2000; AVX512BW-NEXT:    imull $37, %edx, %esi
2001; AVX512BW-NEXT:    shrl $8, %esi
2002; AVX512BW-NEXT:    movl %edx, %eax
2003; AVX512BW-NEXT:    subb %sil, %al
2004; AVX512BW-NEXT:    shrb %al
2005; AVX512BW-NEXT:    addb %sil, %al
2006; AVX512BW-NEXT:    shrb $2, %al
2007; AVX512BW-NEXT:    mulb %cl
2008; AVX512BW-NEXT:    subb %al, %dl
2009; AVX512BW-NEXT:    movzbl %dl, %eax
2010; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
2011; AVX512BW-NEXT:    vpextrb $9, %xmm0, %edx
2012; AVX512BW-NEXT:    imull $37, %edx, %esi
2013; AVX512BW-NEXT:    shrl $8, %esi
2014; AVX512BW-NEXT:    movl %edx, %eax
2015; AVX512BW-NEXT:    subb %sil, %al
2016; AVX512BW-NEXT:    shrb %al
2017; AVX512BW-NEXT:    addb %sil, %al
2018; AVX512BW-NEXT:    shrb $2, %al
2019; AVX512BW-NEXT:    mulb %cl
2020; AVX512BW-NEXT:    subb %al, %dl
2021; AVX512BW-NEXT:    movzbl %dl, %eax
2022; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
2023; AVX512BW-NEXT:    vpextrb $10, %xmm0, %edx
2024; AVX512BW-NEXT:    imull $37, %edx, %esi
2025; AVX512BW-NEXT:    shrl $8, %esi
2026; AVX512BW-NEXT:    movl %edx, %eax
2027; AVX512BW-NEXT:    subb %sil, %al
2028; AVX512BW-NEXT:    shrb %al
2029; AVX512BW-NEXT:    addb %sil, %al
2030; AVX512BW-NEXT:    shrb $2, %al
2031; AVX512BW-NEXT:    mulb %cl
2032; AVX512BW-NEXT:    subb %al, %dl
2033; AVX512BW-NEXT:    movzbl %dl, %eax
2034; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
2035; AVX512BW-NEXT:    vpextrb $11, %xmm0, %edx
2036; AVX512BW-NEXT:    imull $37, %edx, %esi
2037; AVX512BW-NEXT:    shrl $8, %esi
2038; AVX512BW-NEXT:    movl %edx, %eax
2039; AVX512BW-NEXT:    subb %sil, %al
2040; AVX512BW-NEXT:    shrb %al
2041; AVX512BW-NEXT:    addb %sil, %al
2042; AVX512BW-NEXT:    shrb $2, %al
2043; AVX512BW-NEXT:    mulb %cl
2044; AVX512BW-NEXT:    subb %al, %dl
2045; AVX512BW-NEXT:    movzbl %dl, %eax
2046; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
2047; AVX512BW-NEXT:    vpextrb $12, %xmm0, %edx
2048; AVX512BW-NEXT:    imull $37, %edx, %esi
2049; AVX512BW-NEXT:    shrl $8, %esi
2050; AVX512BW-NEXT:    movl %edx, %eax
2051; AVX512BW-NEXT:    subb %sil, %al
2052; AVX512BW-NEXT:    shrb %al
2053; AVX512BW-NEXT:    addb %sil, %al
2054; AVX512BW-NEXT:    shrb $2, %al
2055; AVX512BW-NEXT:    mulb %cl
2056; AVX512BW-NEXT:    subb %al, %dl
2057; AVX512BW-NEXT:    movzbl %dl, %eax
2058; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
2059; AVX512BW-NEXT:    vpextrb $13, %xmm0, %edx
2060; AVX512BW-NEXT:    imull $37, %edx, %esi
2061; AVX512BW-NEXT:    shrl $8, %esi
2062; AVX512BW-NEXT:    movl %edx, %eax
2063; AVX512BW-NEXT:    subb %sil, %al
2064; AVX512BW-NEXT:    shrb %al
2065; AVX512BW-NEXT:    addb %sil, %al
2066; AVX512BW-NEXT:    shrb $2, %al
2067; AVX512BW-NEXT:    mulb %cl
2068; AVX512BW-NEXT:    subb %al, %dl
2069; AVX512BW-NEXT:    movzbl %dl, %eax
2070; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
2071; AVX512BW-NEXT:    vpextrb $14, %xmm0, %edx
2072; AVX512BW-NEXT:    imull $37, %edx, %esi
2073; AVX512BW-NEXT:    shrl $8, %esi
2074; AVX512BW-NEXT:    movl %edx, %eax
2075; AVX512BW-NEXT:    subb %sil, %al
2076; AVX512BW-NEXT:    shrb %al
2077; AVX512BW-NEXT:    addb %sil, %al
2078; AVX512BW-NEXT:    shrb $2, %al
2079; AVX512BW-NEXT:    mulb %cl
2080; AVX512BW-NEXT:    subb %al, %dl
2081; AVX512BW-NEXT:    movzbl %dl, %eax
2082; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
2083; AVX512BW-NEXT:    vpextrb $15, %xmm0, %edx
2084; AVX512BW-NEXT:    imull $37, %edx, %esi
2085; AVX512BW-NEXT:    shrl $8, %esi
2086; AVX512BW-NEXT:    movl %edx, %eax
2087; AVX512BW-NEXT:    subb %sil, %al
2088; AVX512BW-NEXT:    shrb %al
2089; AVX512BW-NEXT:    addb %sil, %al
2090; AVX512BW-NEXT:    shrb $2, %al
2091; AVX512BW-NEXT:    mulb %cl
2092; AVX512BW-NEXT:    subb %al, %dl
2093; AVX512BW-NEXT:    movzbl %dl, %eax
2094; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
2095; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
2096; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2097; AVX512BW-NEXT:    retq
2098  %res = urem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
2099  ret <64 x i8> %res
2100}
2101