• Home
  • Raw
  • Download

Lines Matching refs:xmm0

14 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
15 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
16 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
22 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
28 ; SSE41-NEXT: pshufb %xmm1, %xmm0
34 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
39 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
48 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
49 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
50 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
55 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
60 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
65 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
74 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
75 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
76 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
77 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
78 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
83 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
88 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
93 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
102 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
103 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
108 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
109 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
118 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
119 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
124 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
125 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
134 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
135 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
136 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
137 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
138 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
139 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
144 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
149 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
154 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
163 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
168 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
177 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
178 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
183 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
184 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
189 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
198 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
203 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
212 … {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1…
217 … {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1…
226 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
231 ; SSE2-NEXT: pandn %xmm0, %xmm2
233 ; SSE2-NEXT: movdqa %xmm2, %xmm0
240 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
241 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
248 …}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],
249 ; SSE41-NEXT: movdqa %xmm1, %xmm0
256 …w {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],…
262 …w {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],…
272 ; SSE2-NEXT: movdqa %xmm0, %xmm2
276 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
277 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
278 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
279 ; SSE2-NEXT: packuswb %xmm2, %xmm0
284 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
289 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
294 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
307 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
308 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
309 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
310 ; SSE2-NEXT: packuswb %xmm1, %xmm0
315 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
316 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
321 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
322 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
327 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
328 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
338 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
339 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
341 ; SSE2-NEXT: movdqa %xmm0, %xmm2
344 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
345 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
348 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
351 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
352 ; SSE2-NEXT: packuswb %xmm3, %xmm0
358 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
359 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
365 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
366 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
372 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
373 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
383 ; SSE2-NEXT: andps %xmm2, %xmm0
385 ; SSE2-NEXT: orps %xmm2, %xmm0
391 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
392 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
397 ; SSE41-NEXT: movdqa %xmm0, %xmm2
398 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
400 ; SSE41-NEXT: movdqa %xmm1, %xmm0
406 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
416 ; SSE2-NEXT: andps %xmm2, %xmm0
418 ; SSE2-NEXT: orps %xmm2, %xmm0
424 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12…
425 ; SSSE3-NEXT: por %xmm1, %xmm0
430 ; SSE41-NEXT: movdqa %xmm0, %xmm2
431 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
433 ; SSE41-NEXT: movdqa %xmm1, %xmm0
439 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
448 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0
453 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
463 ; SSE2-NEXT: andps %xmm2, %xmm0
465 ; SSE2-NEXT: orps %xmm2, %xmm0
471 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0
472 ; SSSE3-NEXT: por %xmm1, %xmm0
477 ; SSE41-NEXT: movdqa %xmm0, %xmm2
478 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
480 ; SSE41-NEXT: movdqa %xmm1, %xmm0
486 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
497 ; SSE2-NEXT: andnps %xmm0, %xmm2
499 ; SSE2-NEXT: movaps %xmm2, %xmm0
504 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zer…
506 ; SSSE3-NEXT: por %xmm1, %xmm0
511 ; SSE41-NEXT: movdqa %xmm0, %xmm2
512 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
514 ; SSE41-NEXT: movdqa %xmm2, %xmm0
520 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
529 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
530 ; SSE2-NEXT: packuswb %xmm0, %xmm0
531 ; SSE2-NEXT: packuswb %xmm0, %xmm0
536 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
541 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
546 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
600 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
601 ; SSE2-NEXT: packuswb %xmm0, %xmm0
602 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
607 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero…
612 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero…
617 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,…
627 ; SSE-NEXT: movd %eax, %xmm0
633 ; AVX-NEXT: vmovd %eax, %xmm0
644 ; SSE2-NEXT: pxor %xmm0, %xmm0
645 ; SSE2-NEXT: pinsrw $2, %edi, %xmm0
651 ; SSSE3-NEXT: pxor %xmm0, %xmm0
652 ; SSSE3-NEXT: pinsrw $2, %edi, %xmm0
657 ; SSE41-NEXT: pxor %xmm0, %xmm0
658 ; SSE41-NEXT: pinsrb $5, %edi, %xmm0
663 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
664 ; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
675 ; SSE2-NEXT: pxor %xmm0, %xmm0
676 ; SSE2-NEXT: pinsrw $7, %edi, %xmm0
682 ; SSSE3-NEXT: pxor %xmm0, %xmm0
683 ; SSSE3-NEXT: pinsrw $7, %edi, %xmm0
688 ; SSE41-NEXT: pxor %xmm0, %xmm0
689 ; SSE41-NEXT: pinsrb $15, %edi, %xmm0
694 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
695 ; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0
706 ; SSE2-NEXT: pxor %xmm0, %xmm0
707 ; SSE2-NEXT: pinsrw $1, %eax, %xmm0
713 ; SSSE3-NEXT: pxor %xmm0, %xmm0
714 ; SSSE3-NEXT: pinsrw $1, %eax, %xmm0
719 ; SSE41-NEXT: pxor %xmm0, %xmm0
720 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0
725 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
726 ; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0
736 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,x…
741 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,
750 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zer…
755 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ze…
765 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
766 ; SSE2-NEXT: por %xmm1, %xmm0
771 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
776 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
781 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
790 ; SSE2-NEXT: movdqa %xmm0, %xmm1
792 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
793 ; SSE2-NEXT: por %xmm1, %xmm0
798 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
803 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
808 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
818 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,…
819 ; SSE2-NEXT: por %xmm1, %xmm0
824 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
829 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
834 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
843 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
845 ; SSE2-NEXT: por %xmm1, %xmm0
850 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
851 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
856 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
857 ; SSE41-NEXT: movdqa %xmm1, %xmm0
862 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
871 ; SSE2-NEXT: movdqa %xmm0, %xmm1
873 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,…
874 ; SSE2-NEXT: por %xmm1, %xmm0
879 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
884 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
889 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
898 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,z…
900 ; SSE2-NEXT: por %xmm1, %xmm0
905 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
906 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
911 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
912 ; SSE41-NEXT: movdqa %xmm1, %xmm0
917 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
926 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
927 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
928 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
933 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zer…
938 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,z…
943 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,ze…
953 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
954 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],x…
955 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
960 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zer…
965 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,z…
970 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,ze…
979 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
980 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
985 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
986 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
991 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zer…
996 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero…
1006 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1007 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],x…
1013 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1014 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1019 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zer…
1024 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero…
1033 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1038 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1043 …XT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,x…
1048 …T: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,x…
1058 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1064 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1069 …XT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,x…
1074 …T: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,x…
1084 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1090 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2…
1091 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3]
1099 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3]
1100 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1101 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
1102 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1103 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1105 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
1106 ; SSE2-NEXT: pand %xmm0, %xmm2
1109 ; SSE2-NEXT: pandn %xmm1, %xmm0
1110 ; SSE2-NEXT: por %xmm2, %xmm0
1116 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm…
1117 ; SSSE3-NEXT: por %xmm1, %xmm0
1123 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm…
1124 ; SSE41-NEXT: por %xmm1, %xmm0
1130 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0
1131 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
1154 ; SSE-NEXT: xorps %xmm0, %xmm0
1155 ; SSE-NEXT: movaps %xmm0, (%rdi)
1156 ; SSE-NEXT: movaps %xmm0, (%rsi)
1161 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1162 ; AVX-NEXT: vmovaps %xmm0, (%rdi)
1163 ; AVX-NEXT: vmovaps %xmm0, (%rsi)
1181 ; SSE-NEXT: psllw $8, %xmm0
1186 ; AVX-NEXT: vpsllw $8, %xmm0, %xmm0
1195 ; SSE-NEXT: pslld $24, %xmm0
1200 ; AVX-NEXT: vpslld $24, %xmm0, %xmm0
1209 ; SSE-NEXT: psllq $56, %xmm0
1214 ; AVX-NEXT: vpsllq $56, %xmm0, %xmm0
1223 ; SSE-NEXT: psllq $8, %xmm0
1228 ; AVX-NEXT: vpsllq $8, %xmm0, %xmm0
1237 ; SSE-NEXT: psrlw $8, %xmm0
1242 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
1251 ; SSE-NEXT: psrld $16, %xmm0
1256 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
1265 ; SSE-NEXT: psrlq $56, %xmm0
1270 ; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0
1281 ; SSE2-NEXT: pand %xmm2, %xmm0
1282 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1289 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
1290 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1297 ; SSE41-NEXT: pshufb %xmm2, %xmm0
1298 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1305 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1306 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1316 ; SSE-NEXT: psrld $8, %xmm0
1321 ; AVX-NEXT: vpsrld $8, %xmm0, %xmm0
1330 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1335 …cklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1…
1349 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1350 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1351 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1352 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1357 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1359 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
1364 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1366 ; SSE41-NEXT: pshufb %xmm1, %xmm0
1371 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1373 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1378 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
1391 ; SSE2-NEXT: movd %eax, %xmm0
1392 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1393 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1394 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1400 ; SSSE3-NEXT: movd %eax, %xmm0
1402 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
1408 ; SSE41-NEXT: movd %eax, %xmm0
1410 ; SSE41-NEXT: pshufb %xmm1, %xmm0
1416 ; AVX1-NEXT: vmovd %eax, %xmm0
1418 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1423 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
1436 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1437 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1438 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1439 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1444 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1450 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1451 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1456 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1457 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1462 ; AVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0
1474 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1475 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1476 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
1477 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1482 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1483 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1488 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1489 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1494 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1495 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1500 ; AVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0
1513 ; SSE2-NEXT: movd %eax, %xmm0
1514 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1515 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1516 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1522 ; SSSE3-NEXT: movd %eax, %xmm0
1523 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1529 ; SSE41-NEXT: movd %eax, %xmm0
1530 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1536 ; AVX1-NEXT: vmovd %eax, %xmm0
1537 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1544 ; AVX2-NEXT: vmovd %eax, %xmm0
1545 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
1559 ; SSE2-NEXT: movd %eax, %xmm0
1560 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1561 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
1562 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1568 ; SSSE3-NEXT: movd %eax, %xmm0
1569 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1575 ; SSE41-NEXT: movd %eax, %xmm0
1576 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1582 ; AVX1-NEXT: vmovd %eax, %xmm0
1583 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1590 ; AVX2-NEXT: vmovd %eax, %xmm0
1591 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0