• Home
  • Raw
  • Download

Lines Matching refs:xmm0

17 ; SSE2-NEXT:    movd %xmm0, %rax
23 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
24 ; SSE2-NEXT: movd %xmm0, %rax
28 ; SSE2-NEXT: movd %rax, %xmm0
29 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
30 ; SSE2-NEXT: movdqa %xmm1, %xmm0
35 ; SSE3-NEXT: movd %xmm0, %rax
41 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
42 ; SSE3-NEXT: movd %xmm0, %rax
46 ; SSE3-NEXT: movd %rax, %xmm0
47 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
48 ; SSE3-NEXT: movdqa %xmm1, %xmm0
53 ; SSSE3-NEXT: movd %xmm0, %rax
59 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
60 ; SSSE3-NEXT: movd %xmm0, %rax
64 ; SSSE3-NEXT: movd %rax, %xmm0
65 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
66 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
71 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
77 ; SSE41-NEXT: movd %xmm0, %rax
81 ; SSE41-NEXT: movd %rax, %xmm0
82 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
87 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
93 ; AVX-NEXT: vmovq %xmm0, %rax
97 ; AVX-NEXT: vmovq %rax, %xmm0
98 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
103 ; AVX512VLCD-NEXT: vplzcntq %xmm0, %xmm0
116 ; X32-SSE-NEXT: pextrd $2, %xmm0, %eax
122 ; X32-SSE-NEXT: pextrd $3, %xmm0, %edx
128 ; X32-SSE-NEXT: movd %xmm0, %eax
133 ; X32-SSE-NEXT: pextrd $1, %xmm0, %ecx
138 ; X32-SSE-NEXT: movd %edx, %xmm0
139 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
150 ; SSE2-NEXT: movd %xmm0, %rax
154 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
155 ; SSE2-NEXT: movd %xmm0, %rax
158 ; SSE2-NEXT: movd %rax, %xmm0
159 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
160 ; SSE2-NEXT: movdqa %xmm1, %xmm0
165 ; SSE3-NEXT: movd %xmm0, %rax
169 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
170 ; SSE3-NEXT: movd %xmm0, %rax
173 ; SSE3-NEXT: movd %rax, %xmm0
174 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
175 ; SSE3-NEXT: movdqa %xmm1, %xmm0
180 ; SSSE3-NEXT: movd %xmm0, %rax
184 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
185 ; SSSE3-NEXT: movd %xmm0, %rax
188 ; SSSE3-NEXT: movd %rax, %xmm0
189 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
190 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
195 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
199 ; SSE41-NEXT: movd %xmm0, %rax
202 ; SSE41-NEXT: movd %rax, %xmm0
203 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
208 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
212 ; AVX-NEXT: vmovq %xmm0, %rax
215 ; AVX-NEXT: vmovq %rax, %xmm0
216 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
221 ; AVX512VLCD-NEXT: vplzcntq %xmm0, %xmm0
233 ; X32-SSE-NEXT: pextrd $3, %xmm0, %eax
236 ; X32-SSE-NEXT: pextrd $2, %xmm0, %edx
243 ; X32-SSE-NEXT: pextrd $1, %xmm0, %eax
246 ; X32-SSE-NEXT: movd %xmm0, %edx
252 ; X32-SSE-NEXT: movd %edx, %xmm0
253 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
263 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
270 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
277 ; SSE2-NEXT: movd %xmm0, %eax
282 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
283 ; SSE2-NEXT: movd %xmm0, %eax
287 ; SSE2-NEXT: movd %eax, %xmm0
288 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
290 ; SSE2-NEXT: movdqa %xmm1, %xmm0
295 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
302 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
309 ; SSE3-NEXT: movd %xmm0, %eax
314 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
315 ; SSE3-NEXT: movd %xmm0, %eax
319 ; SSE3-NEXT: movd %eax, %xmm0
320 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
322 ; SSE3-NEXT: movdqa %xmm1, %xmm0
327 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
334 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
341 ; SSSE3-NEXT: movd %xmm0, %eax
346 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
347 ; SSSE3-NEXT: movd %xmm0, %eax
351 ; SSSE3-NEXT: movd %eax, %xmm0
352 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
354 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
359 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
364 ; SSE41-NEXT: movd %xmm0, %edx
370 ; SSE41-NEXT: pextrd $2, %xmm0, %eax
375 ; SSE41-NEXT: pextrd $3, %xmm0, %eax
380 ; SSE41-NEXT: movdqa %xmm1, %xmm0
385 ; AVX-NEXT: vpextrd $1, %xmm0, %eax
390 ; AVX-NEXT: vmovd %xmm0, %edx
396 ; AVX-NEXT: vpextrd $2, %xmm0, %eax
401 ; AVX-NEXT: vpextrd $3, %xmm0, %eax
405 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
410 ; AVX512VLCD-NEXT: vplzcntd %xmm0, %xmm0
422 ; X32-SSE-NEXT: pextrd $1, %xmm0, %eax
427 ; X32-SSE-NEXT: movd %xmm0, %edx
433 ; X32-SSE-NEXT: pextrd $2, %xmm0, %eax
438 ; X32-SSE-NEXT: pextrd $3, %xmm0, %eax
443 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
453 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
458 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
464 ; SSE2-NEXT: movd %xmm0, %eax
468 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
469 ; SSE2-NEXT: movd %xmm0, %eax
472 ; SSE2-NEXT: movd %eax, %xmm0
473 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
475 ; SSE2-NEXT: movdqa %xmm1, %xmm0
480 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
485 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
491 ; SSE3-NEXT: movd %xmm0, %eax
495 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
496 ; SSE3-NEXT: movd %xmm0, %eax
499 ; SSE3-NEXT: movd %eax, %xmm0
500 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
502 ; SSE3-NEXT: movdqa %xmm1, %xmm0
507 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
512 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
518 ; SSSE3-NEXT: movd %xmm0, %eax
522 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
523 ; SSSE3-NEXT: movd %xmm0, %eax
526 ; SSSE3-NEXT: movd %eax, %xmm0
527 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
529 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
534 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
537 ; SSE41-NEXT: movd %xmm0, %ecx
542 ; SSE41-NEXT: pextrd $2, %xmm0, %eax
546 ; SSE41-NEXT: pextrd $3, %xmm0, %eax
550 ; SSE41-NEXT: movdqa %xmm1, %xmm0
555 ; AVX-NEXT: vpextrd $1, %xmm0, %eax
558 ; AVX-NEXT: vmovd %xmm0, %ecx
563 ; AVX-NEXT: vpextrd $2, %xmm0, %eax
567 ; AVX-NEXT: vpextrd $3, %xmm0, %eax
570 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
575 ; AVX512VLCD-NEXT: vplzcntd %xmm0, %xmm0
587 ; X32-SSE-NEXT: pextrd $1, %xmm0, %eax
590 ; X32-SSE-NEXT: movd %xmm0, %ecx
595 ; X32-SSE-NEXT: pextrd $2, %xmm0, %eax
599 ; X32-SSE-NEXT: pextrd $3, %xmm0, %eax
603 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
613 ; SSE2-NEXT: pextrw $7, %xmm0, %eax
619 ; SSE2-NEXT: pextrw $3, %xmm0, %ecx
625 ; SSE2-NEXT: pextrw $5, %xmm0, %ecx
630 ; SSE2-NEXT: pextrw $1, %xmm0, %ecx
637 ; SSE2-NEXT: pextrw $6, %xmm0, %ecx
642 ; SSE2-NEXT: pextrw $2, %xmm0, %ecx
648 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
653 ; SSE2-NEXT: movd %xmm0, %ecx
657 ; SSE2-NEXT: movd %ecx, %xmm0
658 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],x…
659 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],x…
660 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],x…
665 ; SSE3-NEXT: pextrw $7, %xmm0, %eax
671 ; SSE3-NEXT: pextrw $3, %xmm0, %ecx
677 ; SSE3-NEXT: pextrw $5, %xmm0, %ecx
682 ; SSE3-NEXT: pextrw $1, %xmm0, %ecx
689 ; SSE3-NEXT: pextrw $6, %xmm0, %ecx
694 ; SSE3-NEXT: pextrw $2, %xmm0, %ecx
700 ; SSE3-NEXT: pextrw $4, %xmm0, %ecx
705 ; SSE3-NEXT: movd %xmm0, %ecx
709 ; SSE3-NEXT: movd %ecx, %xmm0
710 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],x…
711 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],x…
712 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],x…
718 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
723 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
731 ; SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
732 ; SSSE3-NEXT: psrlw $8, %xmm0
733 ; SSSE3-NEXT: pand %xmm1, %xmm0
735 ; SSSE3-NEXT: paddw %xmm0, %xmm1
736 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
742 ; SSE41-NEXT: movdqa %xmm0, %xmm1
747 ; SSE41-NEXT: movdqa %xmm0, %xmm1
755 ; SSE41-NEXT: pcmpeqb %xmm2, %xmm0
756 ; SSE41-NEXT: psrlw $8, %xmm0
757 ; SSE41-NEXT: pand %xmm1, %xmm0
759 ; SSE41-NEXT: paddw %xmm0, %xmm1
760 ; SSE41-NEXT: movdqa %xmm1, %xmm0
766 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
769 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm4
776 ; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
777 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
778 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
780 ; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm0
785 … vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0
787 ; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
788 ; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
793 … vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0
796 ; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
802 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
807 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
815 ; X32-SSE-NEXT: pcmpeqb %xmm2, %xmm0
816 ; X32-SSE-NEXT: psrlw $8, %xmm0
817 ; X32-SSE-NEXT: pand %xmm1, %xmm0
819 ; X32-SSE-NEXT: paddw %xmm0, %xmm1
820 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
829 ; SSE2-NEXT: pextrw $7, %xmm0, %eax
833 ; SSE2-NEXT: pextrw $3, %xmm0, %eax
838 ; SSE2-NEXT: pextrw $5, %xmm0, %eax
842 ; SSE2-NEXT: pextrw $1, %xmm0, %eax
848 ; SSE2-NEXT: pextrw $6, %xmm0, %eax
852 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
857 ; SSE2-NEXT: pextrw $4, %xmm0, %eax
861 ; SSE2-NEXT: movd %xmm0, %eax
864 ; SSE2-NEXT: movd %eax, %xmm0
865 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],x…
866 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],x…
867 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],x…
872 ; SSE3-NEXT: pextrw $7, %xmm0, %eax
876 ; SSE3-NEXT: pextrw $3, %xmm0, %eax
881 ; SSE3-NEXT: pextrw $5, %xmm0, %eax
885 ; SSE3-NEXT: pextrw $1, %xmm0, %eax
891 ; SSE3-NEXT: pextrw $6, %xmm0, %eax
895 ; SSE3-NEXT: pextrw $2, %xmm0, %eax
900 ; SSE3-NEXT: pextrw $4, %xmm0, %eax
904 ; SSE3-NEXT: movd %xmm0, %eax
907 ; SSE3-NEXT: movd %eax, %xmm0
908 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],x…
909 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],x…
910 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],x…
916 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
921 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
929 ; SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
930 ; SSSE3-NEXT: psrlw $8, %xmm0
931 ; SSSE3-NEXT: pand %xmm1, %xmm0
933 ; SSSE3-NEXT: paddw %xmm0, %xmm1
934 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
940 ; SSE41-NEXT: movdqa %xmm0, %xmm1
945 ; SSE41-NEXT: movdqa %xmm0, %xmm1
953 ; SSE41-NEXT: pcmpeqb %xmm2, %xmm0
954 ; SSE41-NEXT: psrlw $8, %xmm0
955 ; SSE41-NEXT: pand %xmm1, %xmm0
957 ; SSE41-NEXT: paddw %xmm0, %xmm1
958 ; SSE41-NEXT: movdqa %xmm1, %xmm0
964 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
967 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm4
974 ; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
975 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
976 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
978 ; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm0
983 … vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0
985 ; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
986 ; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
991 … vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0
994 ; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1000 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
1005 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
1013 ; X32-SSE-NEXT: pcmpeqb %xmm2, %xmm0
1014 ; X32-SSE-NEXT: psrlw $8, %xmm0
1015 ; X32-SSE-NEXT: pand %xmm1, %xmm0
1017 ; X32-SSE-NEXT: paddw %xmm0, %xmm1
1018 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
1029 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1035 ; SSE2-NEXT: movd %ecx, %xmm0
1048 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1060 ; SSE2-NEXT: movd %ebp, %xmm0
1061 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1062 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1084 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1088 ; SSE2-NEXT: movd %ecx, %xmm0
1093 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1097 ; SSE2-NEXT: movd %ecx, %xmm0
1102 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1107 ; SSE2-NEXT: movd %ecx, %xmm0
1112 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1121 ; SSE2-NEXT: movd %ecx, %xmm0
1122 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4…
1123 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3…
1124 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1125 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1134 ; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1140 ; SSE3-NEXT: movd %ecx, %xmm0
1153 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1165 ; SSE3-NEXT: movd %ebp, %xmm0
1166 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1167 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1189 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1193 ; SSE3-NEXT: movd %ecx, %xmm0
1198 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1202 ; SSE3-NEXT: movd %ecx, %xmm0
1207 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1212 ; SSE3-NEXT: movd %ecx, %xmm0
1217 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1226 ; SSE3-NEXT: movd %ecx, %xmm0
1227 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4…
1228 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3…
1229 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1230 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1238 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
1243 ; SSSE3-NEXT: psrlw $4, %xmm0
1244 ; SSSE3-NEXT: pand %xmm2, %xmm0
1246 ; SSSE3-NEXT: pcmpeqb %xmm0, %xmm2
1248 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
1250 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1256 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1261 ; SSE41-NEXT: psrlw $4, %xmm0
1262 ; SSE41-NEXT: pand %xmm2, %xmm0
1264 ; SSE41-NEXT: pcmpeqb %xmm0, %xmm2
1266 ; SSE41-NEXT: pshufb %xmm0, %xmm1
1268 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1274 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
1277 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
1278 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
1280 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
1282 ; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1283 ; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1288xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4…
1290 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1291 ; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1297 ; X32-SSE-NEXT: movdqa %xmm0, %xmm3
1302 ; X32-SSE-NEXT: psrlw $4, %xmm0
1303 ; X32-SSE-NEXT: pand %xmm2, %xmm0
1305 ; X32-SSE-NEXT: pcmpeqb %xmm0, %xmm2
1307 ; X32-SSE-NEXT: pshufb %xmm0, %xmm1
1309 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
1319 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1323 ; SSE2-NEXT: movd %eax, %xmm0
1335 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1338 ; SSE2-NEXT: movd %eax, %xmm0
1346 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1350 ; SSE2-NEXT: movd %edx, %xmm0
1354 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1357 ; SSE2-NEXT: movd %ecx, %xmm0
1363 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1368 ; SSE2-NEXT: movd %edx, %xmm0
1372 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1375 ; SSE2-NEXT: movd %eax, %xmm0
1379 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1383 ; SSE2-NEXT: movd %eax, %xmm0
1387 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1394 ; SSE2-NEXT: movd %eax, %xmm0
1395 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4…
1396 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1397 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3…
1398 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1405 ; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1409 ; SSE3-NEXT: movd %eax, %xmm0
1421 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1424 ; SSE3-NEXT: movd %eax, %xmm0
1432 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1436 ; SSE3-NEXT: movd %edx, %xmm0
1440 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1443 ; SSE3-NEXT: movd %ecx, %xmm0
1449 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
1454 ; SSE3-NEXT: movd %edx, %xmm0
1458 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1461 ; SSE3-NEXT: movd %eax, %xmm0
1465 …}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],
1469 ; SSE3-NEXT: movd %eax, %xmm0
1473 …}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],
1480 ; SSE3-NEXT: movd %eax, %xmm0
1481 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4…
1482 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1483 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3…
1484 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1491 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
1496 ; SSSE3-NEXT: psrlw $4, %xmm0
1497 ; SSSE3-NEXT: pand %xmm2, %xmm0
1499 ; SSSE3-NEXT: pcmpeqb %xmm0, %xmm2
1501 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
1503 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1509 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1514 ; SSE41-NEXT: psrlw $4, %xmm0
1515 ; SSE41-NEXT: pand %xmm2, %xmm0
1517 ; SSE41-NEXT: pcmpeqb %xmm0, %xmm2
1519 ; SSE41-NEXT: pshufb %xmm0, %xmm1
1521 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1527 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
1530 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
1531 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
1533 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
1535 ; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1536 ; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1541xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4…
1543 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1544 ; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1550 ; X32-SSE-NEXT: movdqa %xmm0, %xmm3
1555 ; X32-SSE-NEXT: psrlw $4, %xmm0
1556 ; X32-SSE-NEXT: pand %xmm2, %xmm0
1558 ; X32-SSE-NEXT: pcmpeqb %xmm0, %xmm2
1560 ; X32-SSE-NEXT: pshufb %xmm0, %xmm1
1562 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0
1572 ; SSE-NEXT: movd %rax, %xmm0
1578 ; AVX-NEXT: vmovq %rax, %xmm0
1584 ; AVX512-NEXT: vmovq %rax, %xmm0
1590 ; X32-SSE-NEXT: movd %eax, %xmm0
1600 ; SSE-NEXT: movd %rax, %xmm0
1606 ; AVX-NEXT: vmovq %rax, %xmm0
1612 ; AVX512-NEXT: vmovq %rax, %xmm0
1618 ; X32-SSE-NEXT: movd %eax, %xmm0
1627 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
1632 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
1637 ; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
1642 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
1647 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
1656 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
1661 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
1666 ; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
1671 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
1676 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
1685 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1690 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1695 ; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1700 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1705 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1714 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1719 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1724 ; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1729 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1734 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
1743 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1748 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1753 ; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1758 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1763 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1772 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1777 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1782 ; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1787 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
1792 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]