• Home
  • Raw
  • Download

Lines Matching refs:AVX512F

2 …ple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
103 ; AVX512F-LABEL: test_div7_32i16:
104 ; AVX512F: # %bb.0:
105 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
106 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18725,18725,18725,18725,18725,18725,18725,18725,18725,1…
107 ; AVX512F-NEXT: vpmulhw %ymm2, %ymm1, %ymm1
108 ; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm3
109 ; AVX512F-NEXT: vpsraw $1, %ymm1, %ymm1
110 ; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
111 ; AVX512F-NEXT: vpmulhw %ymm2, %ymm0, %ymm0
112 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm2
113 ; AVX512F-NEXT: vpsraw $1, %ymm0, %ymm0
114 ; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0
115 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
116 ; AVX512F-NEXT: retq
130 ; AVX512F-LABEL: test_div7_64i8:
131 ; AVX512F: # %bb.0:
132 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
133 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
134 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
135 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,6…
136 ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
137 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
138 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
139 ; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
140 ; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
141 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
142 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2
143 ; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm1
144 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
145 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1…
146 ; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
147 ; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
148 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,6…
149 ; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
150 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,3…
151 ; AVX512F-NEXT: vpxor %ymm6, %ymm1, %ymm1
152 ; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
153 ; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
154 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
155 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
156 ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
157 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
158 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm7 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
159 ; AVX512F-NEXT: vpsraw $8, %ymm7, %ymm7
160 ; AVX512F-NEXT: vpmullw %ymm3, %ymm7, %ymm3
161 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
162 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
163 ; AVX512F-NEXT: vpaddb %ymm0, %ymm2, %ymm0
164 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
165 ; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
166 ; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
167 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
168 ; AVX512F-NEXT: vpxor %ymm6, %ymm0, %ymm0
169 ; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
170 ; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
171 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
172 ; AVX512F-NEXT: retq
204 ; AVX512F-LABEL: test_divconstant_64i8:
205 ; AVX512F: # %bb.0:
206 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
207 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
208 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
209 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
210 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
211 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
212 ; AVX512F-NEXT: vpsraw $8, %ymm1, %ymm1
213 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
214 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
215 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
216 ; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm2
217 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3
218 ; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
219 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
220 ; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
221 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3
222 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
223 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
224 ; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
225 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm4, %ymm4
226 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
227 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
228 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1
229 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1…
230 ; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
231 ; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1
232 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
233 ; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
234 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3
235 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
236 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
237 ; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0
238 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
239 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
240 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
241 ; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
242 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
243 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
244 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
245 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
246 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
247 ; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
248 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3
249 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
250 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
251 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm0
252 ; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
253 ; AVX512F-NEXT: vpaddb %ymm0, %ymm2, %ymm0
254 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
255 ; AVX512F-NEXT: retq
419 ; AVX512F-LABEL: test_rem7_32i16:
420 ; AVX512F: # %bb.0:
421 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
422 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18725,18725,18725,18725,18725,18725,18725,18725,18725,1…
423 ; AVX512F-NEXT: vpmulhw %ymm2, %ymm1, %ymm3
424 ; AVX512F-NEXT: vpsrlw $15, %ymm3, %ymm4
425 ; AVX512F-NEXT: vpsraw $1, %ymm3, %ymm3
426 ; AVX512F-NEXT: vpaddw %ymm4, %ymm3, %ymm3
427 ; AVX512F-NEXT: vpsllw $3, %ymm3, %ymm4
428 ; AVX512F-NEXT: vpsubw %ymm4, %ymm3, %ymm3
429 ; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
430 ; AVX512F-NEXT: vpmulhw %ymm2, %ymm0, %ymm2
431 ; AVX512F-NEXT: vpsrlw $15, %ymm2, %ymm3
432 ; AVX512F-NEXT: vpsraw $1, %ymm2, %ymm2
433 ; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2
434 ; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
435 ; AVX512F-NEXT: vpsubw %ymm3, %ymm2, %ymm2
436 ; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0
437 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
438 ; AVX512F-NEXT: retq
454 ; AVX512F-LABEL: test_rem7_64i8:
455 ; AVX512F: # %bb.0:
456 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
457 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
458 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
459 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,6…
460 ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
461 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
462 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
463 ; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
464 ; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
465 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
466 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2
467 ; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm2
468 ; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm4
469 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1…
470 ; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
471 ; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
472 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,6…
473 ; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
474 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,3…
475 ; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
476 ; AVX512F-NEXT: vpaddb %ymm4, %ymm2, %ymm2
477 ; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
478 ; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm4
479 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248…
480 ; AVX512F-NEXT: vpand %ymm4, %ymm8, %ymm4
481 ; AVX512F-NEXT: vpsubb %ymm4, %ymm2, %ymm2
482 ; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
483 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
484 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
485 ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
486 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
487 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
488 ; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
489 ; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm3
490 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
491 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
492 ; AVX512F-NEXT: vpaddb %ymm0, %ymm2, %ymm2
493 ; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm3
494 ; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
495 ; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
496 ; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
497 ; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
498 ; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
499 ; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
500 ; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
501 ; AVX512F-NEXT: vpand %ymm3, %ymm8, %ymm3
502 ; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
503 ; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
504 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
505 ; AVX512F-NEXT: retq
541 ; AVX512F-LABEL: test_remconstant_64i8:
542 ; AVX512F: # %bb.0:
543 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
544 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
545 ; AVX512F-NEXT: vpsraw $8, %ymm1, %ymm1
546 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
547 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
548 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
549 ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
550 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
551 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
552 ; AVX512F-NEXT: vpackuswb %ymm1, %ymm2, %ymm2
553 ; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm1
554 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
555 ; AVX512F-NEXT: vpaddb %ymm4, %ymm2, %ymm2
556 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
557 ; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
558 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm4, %ymm4
559 ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
560 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
561 ; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
562 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm5, %ymm5
563 ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
564 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm5
565 ; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm2
566 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1…
567 ; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
568 ; AVX512F-NEXT: vpaddb %ymm2, %ymm5, %ymm5
569 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
570 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm6
571 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255…
572 ; AVX512F-NEXT: vpand %ymm2, %ymm6, %ymm6
573 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
574 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm5, %ymm5
575 ; AVX512F-NEXT: vpand %ymm2, %ymm5, %ymm5
576 ; AVX512F-NEXT: vpackuswb %ymm6, %ymm5, %ymm5
577 ; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
578 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
579 ; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
580 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm5, %ymm5
581 ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
582 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
583 ; AVX512F-NEXT: vpsraw $8, %ymm6, %ymm6
584 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm6, %ymm6
585 ; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
586 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm6, %ymm5
587 ; AVX512F-NEXT: vpaddb %ymm1, %ymm5, %ymm1
588 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
589 ; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
590 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm5, %ymm5
591 ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
592 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
593 ; AVX512F-NEXT: vpsraw $8, %ymm6, %ymm6
594 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm6, %ymm6
595 ; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
596 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm6, %ymm5
597 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1
598 ; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
599 ; AVX512F-NEXT: vpaddb %ymm1, %ymm5, %ymm1
600 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,2…
601 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm4, %ymm4
602 ; AVX512F-NEXT: vpand %ymm2, %ymm4, %ymm4
603 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,1…
604 ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
605 ; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
606 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
607 ; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0
608 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
609 ; AVX512F-NEXT: retq