• Home
  • Raw
  • Download

Lines Matching refs:xmm0

17 ; SSE2-NEXT:    movd %xmm0, %rax
22 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
23 ; SSE2-NEXT: movd %xmm0, %rax
26 ; SSE2-NEXT: movd %rax, %xmm0
27 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE2-NEXT: movdqa %xmm1, %xmm0
33 ; SSE3-NEXT: movd %xmm0, %rax
38 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 ; SSE3-NEXT: movd %xmm0, %rax
42 ; SSE3-NEXT: movd %rax, %xmm0
43 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
44 ; SSE3-NEXT: movdqa %xmm1, %xmm0
49 ; SSSE3-NEXT: movd %xmm0, %rax
54 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
55 ; SSSE3-NEXT: movd %xmm0, %rax
58 ; SSSE3-NEXT: movd %rax, %xmm0
59 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
60 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
65 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
70 ; SSE41-NEXT: movd %xmm0, %rax
73 ; SSE41-NEXT: movd %rax, %xmm0
74 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
79 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
84 ; AVX-NEXT: vmovq %xmm0, %rax
87 ; AVX-NEXT: vmovq %rax, %xmm0
88 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
94 ; X32-SSE-NEXT: pextrd $3, %xmm0, %eax
99 ; X32-SSE-NEXT: pextrd $2, %xmm0, %edx
104 ; X32-SSE-NEXT: pextrd $1, %xmm0, %eax
108 ; X32-SSE-NEXT: movd %xmm0, %ecx
112 ; X32-SSE-NEXT: movd %edx, %xmm0
113 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
123 ; SSE2-NEXT: movd %xmm0, %rax
126 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
127 ; SSE2-NEXT: movd %xmm0, %rax
129 ; SSE2-NEXT: movd %rax, %xmm0
130 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
131 ; SSE2-NEXT: movdqa %xmm1, %xmm0
136 ; SSE3-NEXT: movd %xmm0, %rax
139 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
140 ; SSE3-NEXT: movd %xmm0, %rax
142 ; SSE3-NEXT: movd %rax, %xmm0
143 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
144 ; SSE3-NEXT: movdqa %xmm1, %xmm0
149 ; SSSE3-NEXT: movd %xmm0, %rax
152 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
153 ; SSSE3-NEXT: movd %xmm0, %rax
155 ; SSSE3-NEXT: movd %rax, %xmm0
156 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
157 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
162 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
165 ; SSE41-NEXT: movd %xmm0, %rax
167 ; SSE41-NEXT: movd %rax, %xmm0
168 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
173 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
176 ; AVX1-NEXT: vmovq %xmm0, %rax
178 ; AVX1-NEXT: vmovq %rax, %xmm0
179 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
184 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
187 ; AVX2-NEXT: vmovq %xmm0, %rax
189 ; AVX2-NEXT: vmovq %rax, %xmm0
190 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
196 ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
197 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
198 ; AVX512CDVL-NEXT: vplzcntq %xmm0, %xmm0
200 ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm0
206 ; AVX512CD-NEXT: vpsubq %xmm0, %xmm1, %xmm1
207 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
210 ; AVX512CD-NEXT: vpsubq %xmm0, %xmm1, %xmm0
215 ; X32-SSE-NEXT: pextrd $2, %xmm0, %eax
217 ; X32-SSE-NEXT: pextrd $3, %xmm0, %edx
223 ; X32-SSE-NEXT: movd %xmm0, %eax
225 ; X32-SSE-NEXT: pextrd $1, %xmm0, %edx
230 ; X32-SSE-NEXT: movd %edx, %xmm0
231 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
242 ; SSE2-NEXT: psubd %xmm0, %xmm2
243 ; SSE2-NEXT: pand %xmm0, %xmm2
245 ; SSE2-NEXT: movdqa %xmm2, %xmm0
246 ; SSE2-NEXT: psrld $1, %xmm0
247 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
248 ; SSE2-NEXT: psubd %xmm0, %xmm2
249 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
251 ; SSE2-NEXT: pand %xmm0, %xmm3
253 ; SSE2-NEXT: pand %xmm0, %xmm2
255 ; SSE2-NEXT: movdqa %xmm2, %xmm0
256 ; SSE2-NEXT: psrld $4, %xmm0
257 ; SSE2-NEXT: paddd %xmm2, %xmm0
258 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
259 ; SSE2-NEXT: movdqa %xmm0, %xmm2
262 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
263 ; SSE2-NEXT: psadbw %xmm1, %xmm0
264 ; SSE2-NEXT: packuswb %xmm2, %xmm0
271 ; SSE3-NEXT: psubd %xmm0, %xmm2
272 ; SSE3-NEXT: pand %xmm0, %xmm2
274 ; SSE3-NEXT: movdqa %xmm2, %xmm0
275 ; SSE3-NEXT: psrld $1, %xmm0
276 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
277 ; SSE3-NEXT: psubd %xmm0, %xmm2
278 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
280 ; SSE3-NEXT: pand %xmm0, %xmm3
282 ; SSE3-NEXT: pand %xmm0, %xmm2
284 ; SSE3-NEXT: movdqa %xmm2, %xmm0
285 ; SSE3-NEXT: psrld $4, %xmm0
286 ; SSE3-NEXT: paddd %xmm2, %xmm0
287 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
288 ; SSE3-NEXT: movdqa %xmm0, %xmm2
291 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
292 ; SSE3-NEXT: psadbw %xmm1, %xmm0
293 ; SSE3-NEXT: packuswb %xmm2, %xmm0
300 ; SSSE3-NEXT: psubd %xmm0, %xmm2
301 ; SSSE3-NEXT: pand %xmm0, %xmm2
306 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
307 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
311 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
312 ; SSSE3-NEXT: paddb %xmm5, %xmm0
313 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
316 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
317 ; SSSE3-NEXT: psadbw %xmm1, %xmm0
318 ; SSSE3-NEXT: packuswb %xmm2, %xmm0
325 ; SSE41-NEXT: psubd %xmm0, %xmm2
326 ; SSE41-NEXT: pand %xmm0, %xmm2
331 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
332 ; SSE41-NEXT: movdqa %xmm0, %xmm5
336 ; SSE41-NEXT: pshufb %xmm2, %xmm0
337 ; SSE41-NEXT: paddb %xmm5, %xmm0
338 ; SSE41-NEXT: movdqa %xmm0, %xmm2
341 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
342 ; SSE41-NEXT: psadbw %xmm1, %xmm0
343 ; SSE41-NEXT: packuswb %xmm2, %xmm0
349 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm2
350 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
351 ; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
353 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3
356 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
357 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
358 ; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
359 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
360 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
362 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
363 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
364 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
370 ; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm2
371 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
373 ; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
375 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm3
378 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
379 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
380 ; AVX2-NEXT: vpshufb %xmm0, %xmm4, %xmm0
381 ; AVX2-NEXT: vpaddb %xmm3, %xmm0, %xmm0
382 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
384 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
385 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
386 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
392 ; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm2
393 ; AVX512CDVL-NEXT: vpandd %xmm2, %xmm0, %xmm0
394 ; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to4}, %xmm0, %xmm0
396 ; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm3
399 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
400 ; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0
401 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm4, %xmm0
402 ; AVX512CDVL-NEXT: vpaddb %xmm3, %xmm0, %xmm0
403 ; AVX512CDVL-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
405 ; AVX512CDVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
406 ; AVX512CDVL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
407 ; AVX512CDVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
413 ; AVX512CD-NEXT: vpsubd %xmm0, %xmm1, %xmm2
414 ; AVX512CD-NEXT: vpand %xmm2, %xmm0, %xmm0
416 ; AVX512CD-NEXT: vpsubd %xmm2, %xmm0, %xmm0
418 ; AVX512CD-NEXT: vpand %xmm2, %xmm0, %xmm3
421 ; AVX512CD-NEXT: vpsrlw $4, %xmm0, %xmm0
422 ; AVX512CD-NEXT: vpand %xmm2, %xmm0, %xmm0
423 ; AVX512CD-NEXT: vpshufb %xmm0, %xmm4, %xmm0
424 ; AVX512CD-NEXT: vpaddb %xmm3, %xmm0, %xmm0
425 ; AVX512CD-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
427 ; AVX512CD-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
428 ; AVX512CD-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
429 ; AVX512CD-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
436 ; X32-SSE-NEXT: psubd %xmm0, %xmm2
437 ; X32-SSE-NEXT: pand %xmm0, %xmm2
442 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
443 ; X32-SSE-NEXT: movdqa %xmm0, %xmm5
447 ; X32-SSE-NEXT: pshufb %xmm2, %xmm0
448 ; X32-SSE-NEXT: paddb %xmm5, %xmm0
449 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2
452 ; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
453 ; X32-SSE-NEXT: psadbw %xmm1, %xmm0
454 ; X32-SSE-NEXT: packuswb %xmm2, %xmm0
465 ; SSE2-NEXT: psubd %xmm0, %xmm2
466 ; SSE2-NEXT: pand %xmm0, %xmm2
468 ; SSE2-NEXT: movdqa %xmm2, %xmm0
469 ; SSE2-NEXT: psrld $1, %xmm0
470 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
471 ; SSE2-NEXT: psubd %xmm0, %xmm2
472 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
474 ; SSE2-NEXT: pand %xmm0, %xmm3
476 ; SSE2-NEXT: pand %xmm0, %xmm2
478 ; SSE2-NEXT: movdqa %xmm2, %xmm0
479 ; SSE2-NEXT: psrld $4, %xmm0
480 ; SSE2-NEXT: paddd %xmm2, %xmm0
481 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
482 ; SSE2-NEXT: movdqa %xmm0, %xmm2
485 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
486 ; SSE2-NEXT: psadbw %xmm1, %xmm0
487 ; SSE2-NEXT: packuswb %xmm2, %xmm0
494 ; SSE3-NEXT: psubd %xmm0, %xmm2
495 ; SSE3-NEXT: pand %xmm0, %xmm2
497 ; SSE3-NEXT: movdqa %xmm2, %xmm0
498 ; SSE3-NEXT: psrld $1, %xmm0
499 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
500 ; SSE3-NEXT: psubd %xmm0, %xmm2
501 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
503 ; SSE3-NEXT: pand %xmm0, %xmm3
505 ; SSE3-NEXT: pand %xmm0, %xmm2
507 ; SSE3-NEXT: movdqa %xmm2, %xmm0
508 ; SSE3-NEXT: psrld $4, %xmm0
509 ; SSE3-NEXT: paddd %xmm2, %xmm0
510 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
511 ; SSE3-NEXT: movdqa %xmm0, %xmm2
514 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
515 ; SSE3-NEXT: psadbw %xmm1, %xmm0
516 ; SSE3-NEXT: packuswb %xmm2, %xmm0
523 ; SSSE3-NEXT: psubd %xmm0, %xmm2
524 ; SSSE3-NEXT: pand %xmm0, %xmm2
529 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
530 ; SSSE3-NEXT: movdqa %xmm0, %xmm5
534 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
535 ; SSSE3-NEXT: paddb %xmm5, %xmm0
536 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
539 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
540 ; SSSE3-NEXT: psadbw %xmm1, %xmm0
541 ; SSSE3-NEXT: packuswb %xmm2, %xmm0
548 ; SSE41-NEXT: psubd %xmm0, %xmm2
549 ; SSE41-NEXT: pand %xmm0, %xmm2
554 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
555 ; SSE41-NEXT: movdqa %xmm0, %xmm5
559 ; SSE41-NEXT: pshufb %xmm2, %xmm0
560 ; SSE41-NEXT: paddb %xmm5, %xmm0
561 ; SSE41-NEXT: movdqa %xmm0, %xmm2
564 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
565 ; SSE41-NEXT: psadbw %xmm1, %xmm0
566 ; SSE41-NEXT: packuswb %xmm2, %xmm0
572 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm2
573 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
574 ; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
576 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3
579 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
580 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
581 ; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
582 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
583 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
585 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
586 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
587 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
593 ; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm2
594 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
596 ; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
598 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm3
601 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
602 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
603 ; AVX2-NEXT: vpshufb %xmm0, %xmm4, %xmm0
604 ; AVX2-NEXT: vpaddb %xmm3, %xmm0, %xmm0
605 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
607 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
608 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
609 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
615 ; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
616 ; AVX512CDVL-NEXT: vpandd %xmm1, %xmm0, %xmm0
617 ; AVX512CDVL-NEXT: vplzcntd %xmm0, %xmm0
619 ; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm0
625 ; AVX512CD-NEXT: vpsubd %xmm0, %xmm1, %xmm1
626 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
629 ; AVX512CD-NEXT: vpsubd %xmm0, %xmm1, %xmm0
636 ; X32-SSE-NEXT: psubd %xmm0, %xmm2
637 ; X32-SSE-NEXT: pand %xmm0, %xmm2
642 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
643 ; X32-SSE-NEXT: movdqa %xmm0, %xmm5
647 ; X32-SSE-NEXT: pshufb %xmm2, %xmm0
648 ; X32-SSE-NEXT: paddb %xmm5, %xmm0
649 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2
652 ; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
653 ; X32-SSE-NEXT: psadbw %xmm1, %xmm0
654 ; X32-SSE-NEXT: packuswb %xmm2, %xmm0
664 ; SSE2-NEXT: psubw %xmm0, %xmm1
665 ; SSE2-NEXT: pand %xmm0, %xmm1
667 ; SSE2-NEXT: movdqa %xmm1, %xmm0
668 ; SSE2-NEXT: psrlw $1, %xmm0
669 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
670 ; SSE2-NEXT: psubw %xmm0, %xmm1
671 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
673 ; SSE2-NEXT: pand %xmm0, %xmm2
675 ; SSE2-NEXT: pand %xmm0, %xmm1
681 ; SSE2-NEXT: movdqa %xmm2, %xmm0
682 ; SSE2-NEXT: psllw $8, %xmm0
683 ; SSE2-NEXT: paddb %xmm2, %xmm0
684 ; SSE2-NEXT: psrlw $8, %xmm0
690 ; SSE3-NEXT: psubw %xmm0, %xmm1
691 ; SSE3-NEXT: pand %xmm0, %xmm1
693 ; SSE3-NEXT: movdqa %xmm1, %xmm0
694 ; SSE3-NEXT: psrlw $1, %xmm0
695 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
696 ; SSE3-NEXT: psubw %xmm0, %xmm1
697 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
699 ; SSE3-NEXT: pand %xmm0, %xmm2
701 ; SSE3-NEXT: pand %xmm0, %xmm1
707 ; SSE3-NEXT: movdqa %xmm2, %xmm0
708 ; SSE3-NEXT: psllw $8, %xmm0
709 ; SSE3-NEXT: paddb %xmm2, %xmm0
710 ; SSE3-NEXT: psrlw $8, %xmm0
716 ; SSSE3-NEXT: psubw %xmm0, %xmm1
717 ; SSSE3-NEXT: pand %xmm0, %xmm1
719 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
721 ; SSSE3-NEXT: pand %xmm0, %xmm2
726 ; SSSE3-NEXT: pand %xmm0, %xmm1
729 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
730 ; SSSE3-NEXT: psllw $8, %xmm0
731 ; SSSE3-NEXT: paddb %xmm3, %xmm0
732 ; SSSE3-NEXT: psrlw $8, %xmm0
738 ; SSE41-NEXT: psubw %xmm0, %xmm1
739 ; SSE41-NEXT: pand %xmm0, %xmm1
741 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
743 ; SSE41-NEXT: pand %xmm0, %xmm2
748 ; SSE41-NEXT: pand %xmm0, %xmm1
751 ; SSE41-NEXT: movdqa %xmm3, %xmm0
752 ; SSE41-NEXT: psllw $8, %xmm0
753 ; SSE41-NEXT: paddb %xmm3, %xmm0
754 ; SSE41-NEXT: psrlw $8, %xmm0
760 ; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm1
761 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
762 ; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
764 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
767 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
768 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
769 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
770 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
771 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
772 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
773 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
779 ; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm1
780 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
781 ; AVX2-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
783 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
786 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
787 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
788 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
789 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
790 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
791 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
792 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
798 ; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
799 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
800 ; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
802 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
805 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
806 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
807 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
808 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
809 ; AVX512CDVL-NEXT: vpsllw $8, %xmm0, %xmm1
810 ; AVX512CDVL-NEXT: vpaddb %xmm0, %xmm1, %xmm0
811 ; AVX512CDVL-NEXT: vpsrlw $8, %xmm0, %xmm0
817 ; AVX512CD-NEXT: vpsubw %xmm0, %xmm1, %xmm1
818 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
819 ; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
821 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm2
824 ; AVX512CD-NEXT: vpsrlw $4, %xmm0, %xmm0
825 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
826 ; AVX512CD-NEXT: vpshufb %xmm0, %xmm3, %xmm0
827 ; AVX512CD-NEXT: vpaddb %xmm2, %xmm0, %xmm0
828 ; AVX512CD-NEXT: vpsllw $8, %xmm0, %xmm1
829 ; AVX512CD-NEXT: vpaddb %xmm0, %xmm1, %xmm0
830 ; AVX512CD-NEXT: vpsrlw $8, %xmm0, %xmm0
836 ; X32-SSE-NEXT: psubw %xmm0, %xmm1
837 ; X32-SSE-NEXT: pand %xmm0, %xmm1
839 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
841 ; X32-SSE-NEXT: pand %xmm0, %xmm2
846 ; X32-SSE-NEXT: pand %xmm0, %xmm1
849 ; X32-SSE-NEXT: movdqa %xmm3, %xmm0
850 ; X32-SSE-NEXT: psllw $8, %xmm0
851 ; X32-SSE-NEXT: paddb %xmm3, %xmm0
852 ; X32-SSE-NEXT: psrlw $8, %xmm0
862 ; SSE2-NEXT: psubw %xmm0, %xmm1
863 ; SSE2-NEXT: pand %xmm0, %xmm1
865 ; SSE2-NEXT: movdqa %xmm1, %xmm0
866 ; SSE2-NEXT: psrlw $1, %xmm0
867 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
868 ; SSE2-NEXT: psubw %xmm0, %xmm1
869 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
871 ; SSE2-NEXT: pand %xmm0, %xmm2
873 ; SSE2-NEXT: pand %xmm0, %xmm1
879 ; SSE2-NEXT: movdqa %xmm2, %xmm0
880 ; SSE2-NEXT: psllw $8, %xmm0
881 ; SSE2-NEXT: paddb %xmm2, %xmm0
882 ; SSE2-NEXT: psrlw $8, %xmm0
888 ; SSE3-NEXT: psubw %xmm0, %xmm1
889 ; SSE3-NEXT: pand %xmm0, %xmm1
891 ; SSE3-NEXT: movdqa %xmm1, %xmm0
892 ; SSE3-NEXT: psrlw $1, %xmm0
893 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
894 ; SSE3-NEXT: psubw %xmm0, %xmm1
895 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
897 ; SSE3-NEXT: pand %xmm0, %xmm2
899 ; SSE3-NEXT: pand %xmm0, %xmm1
905 ; SSE3-NEXT: movdqa %xmm2, %xmm0
906 ; SSE3-NEXT: psllw $8, %xmm0
907 ; SSE3-NEXT: paddb %xmm2, %xmm0
908 ; SSE3-NEXT: psrlw $8, %xmm0
914 ; SSSE3-NEXT: psubw %xmm0, %xmm1
915 ; SSSE3-NEXT: pand %xmm0, %xmm1
917 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
919 ; SSSE3-NEXT: pand %xmm0, %xmm2
924 ; SSSE3-NEXT: pand %xmm0, %xmm1
927 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
928 ; SSSE3-NEXT: psllw $8, %xmm0
929 ; SSSE3-NEXT: paddb %xmm3, %xmm0
930 ; SSSE3-NEXT: psrlw $8, %xmm0
936 ; SSE41-NEXT: psubw %xmm0, %xmm1
937 ; SSE41-NEXT: pand %xmm0, %xmm1
939 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
941 ; SSE41-NEXT: pand %xmm0, %xmm2
946 ; SSE41-NEXT: pand %xmm0, %xmm1
949 ; SSE41-NEXT: movdqa %xmm3, %xmm0
950 ; SSE41-NEXT: psllw $8, %xmm0
951 ; SSE41-NEXT: paddb %xmm3, %xmm0
952 ; SSE41-NEXT: psrlw $8, %xmm0
958 ; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm1
959 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
960 ; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
962 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
965 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
966 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
967 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
968 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
969 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
970 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
971 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
977 ; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm1
978 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
979 ; AVX2-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
981 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
984 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
985 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
986 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
987 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
988 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
989 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
990 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
996 ; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
997 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
998 ; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1000 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
1003 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
1004 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
1005 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1006 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1007 ; AVX512CDVL-NEXT: vpsllw $8, %xmm0, %xmm1
1008 ; AVX512CDVL-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1009 ; AVX512CDVL-NEXT: vpsrlw $8, %xmm0, %xmm0
1015 ; AVX512CD-NEXT: vpsubw %xmm0, %xmm1, %xmm1
1016 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
1017 ; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1019 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm2
1022 ; AVX512CD-NEXT: vpsrlw $4, %xmm0, %xmm0
1023 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
1024 ; AVX512CD-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1025 ; AVX512CD-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1026 ; AVX512CD-NEXT: vpsllw $8, %xmm0, %xmm1
1027 ; AVX512CD-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1028 ; AVX512CD-NEXT: vpsrlw $8, %xmm0, %xmm0
1034 ; X32-SSE-NEXT: psubw %xmm0, %xmm1
1035 ; X32-SSE-NEXT: pand %xmm0, %xmm1
1037 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1039 ; X32-SSE-NEXT: pand %xmm0, %xmm2
1044 ; X32-SSE-NEXT: pand %xmm0, %xmm1
1047 ; X32-SSE-NEXT: movdqa %xmm3, %xmm0
1048 ; X32-SSE-NEXT: psllw $8, %xmm0
1049 ; X32-SSE-NEXT: paddb %xmm3, %xmm0
1050 ; X32-SSE-NEXT: psrlw $8, %xmm0
1060 ; SSE2-NEXT: psubb %xmm0, %xmm1
1061 ; SSE2-NEXT: pand %xmm0, %xmm1
1063 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1064 ; SSE2-NEXT: psrlw $1, %xmm0
1065 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1066 ; SSE2-NEXT: psubb %xmm0, %xmm1
1067 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1069 ; SSE2-NEXT: pand %xmm0, %xmm2
1071 ; SSE2-NEXT: pand %xmm0, %xmm1
1073 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1074 ; SSE2-NEXT: psrlw $4, %xmm0
1075 ; SSE2-NEXT: paddb %xmm1, %xmm0
1076 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1082 ; SSE3-NEXT: psubb %xmm0, %xmm1
1083 ; SSE3-NEXT: pand %xmm0, %xmm1
1085 ; SSE3-NEXT: movdqa %xmm1, %xmm0
1086 ; SSE3-NEXT: psrlw $1, %xmm0
1087 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
1088 ; SSE3-NEXT: psubb %xmm0, %xmm1
1089 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1091 ; SSE3-NEXT: pand %xmm0, %xmm2
1093 ; SSE3-NEXT: pand %xmm0, %xmm1
1095 ; SSE3-NEXT: movdqa %xmm1, %xmm0
1096 ; SSE3-NEXT: psrlw $4, %xmm0
1097 ; SSE3-NEXT: paddb %xmm1, %xmm0
1098 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
1104 ; SSSE3-NEXT: psubb %xmm0, %xmm1
1105 ; SSSE3-NEXT: pand %xmm0, %xmm1
1110 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1111 ; SSSE3-NEXT: movdqa %xmm0, %xmm4
1115 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
1116 ; SSSE3-NEXT: paddb %xmm4, %xmm0
1122 ; SSE41-NEXT: psubb %xmm0, %xmm1
1123 ; SSE41-NEXT: pand %xmm0, %xmm1
1128 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1129 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1133 ; SSE41-NEXT: pshufb %xmm1, %xmm0
1134 ; SSE41-NEXT: paddb %xmm4, %xmm0
1140 ; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1141 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1142 ; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1144 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1147 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1148 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1149 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1150 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1156 ; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1157 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1158 ; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1160 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1163 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1164 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1165 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1166 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1172 ; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1173 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
1174 ; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1176 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
1179 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
1180 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
1181 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1182 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1188 ; AVX512CD-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1189 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
1190 ; AVX512CD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1192 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm2
1195 ; AVX512CD-NEXT: vpsrlw $4, %xmm0, %xmm0
1196 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
1197 ; AVX512CD-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1198 ; AVX512CD-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1204 ; X32-SSE-NEXT: psubb %xmm0, %xmm1
1205 ; X32-SSE-NEXT: pand %xmm0, %xmm1
1210 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1211 ; X32-SSE-NEXT: movdqa %xmm0, %xmm4
1215 ; X32-SSE-NEXT: pshufb %xmm1, %xmm0
1216 ; X32-SSE-NEXT: paddb %xmm4, %xmm0
1226 ; SSE2-NEXT: psubb %xmm0, %xmm1
1227 ; SSE2-NEXT: pand %xmm0, %xmm1
1229 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1230 ; SSE2-NEXT: psrlw $1, %xmm0
1231 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1232 ; SSE2-NEXT: psubb %xmm0, %xmm1
1233 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1235 ; SSE2-NEXT: pand %xmm0, %xmm2
1237 ; SSE2-NEXT: pand %xmm0, %xmm1
1239 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1240 ; SSE2-NEXT: psrlw $4, %xmm0
1241 ; SSE2-NEXT: paddb %xmm1, %xmm0
1242 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1248 ; SSE3-NEXT: psubb %xmm0, %xmm1
1249 ; SSE3-NEXT: pand %xmm0, %xmm1
1251 ; SSE3-NEXT: movdqa %xmm1, %xmm0
1252 ; SSE3-NEXT: psrlw $1, %xmm0
1253 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
1254 ; SSE3-NEXT: psubb %xmm0, %xmm1
1255 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1257 ; SSE3-NEXT: pand %xmm0, %xmm2
1259 ; SSE3-NEXT: pand %xmm0, %xmm1
1261 ; SSE3-NEXT: movdqa %xmm1, %xmm0
1262 ; SSE3-NEXT: psrlw $4, %xmm0
1263 ; SSE3-NEXT: paddb %xmm1, %xmm0
1264 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
1270 ; SSSE3-NEXT: psubb %xmm0, %xmm1
1271 ; SSSE3-NEXT: pand %xmm0, %xmm1
1276 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1277 ; SSSE3-NEXT: movdqa %xmm0, %xmm4
1281 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
1282 ; SSSE3-NEXT: paddb %xmm4, %xmm0
1288 ; SSE41-NEXT: psubb %xmm0, %xmm1
1289 ; SSE41-NEXT: pand %xmm0, %xmm1
1294 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1295 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1299 ; SSE41-NEXT: pshufb %xmm1, %xmm0
1300 ; SSE41-NEXT: paddb %xmm4, %xmm0
1306 ; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1307 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1308 ; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1310 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1313 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1314 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1315 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1316 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1322 ; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1323 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1324 ; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1326 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1329 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1330 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1331 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1332 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1338 ; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1339 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
1340 ; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1342 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
1345 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
1346 ; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
1347 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1348 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1354 ; AVX512CD-NEXT: vpsubb %xmm0, %xmm1, %xmm1
1355 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
1356 ; AVX512CD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1358 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm2
1361 ; AVX512CD-NEXT: vpsrlw $4, %xmm0, %xmm0
1362 ; AVX512CD-NEXT: vpand %xmm1, %xmm0, %xmm0
1363 ; AVX512CD-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1364 ; AVX512CD-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1370 ; X32-SSE-NEXT: psubb %xmm0, %xmm1
1371 ; X32-SSE-NEXT: pand %xmm0, %xmm1
1376 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1377 ; X32-SSE-NEXT: movdqa %xmm0, %xmm4
1381 ; X32-SSE-NEXT: pshufb %xmm1, %xmm0
1382 ; X32-SSE-NEXT: paddb %xmm4, %xmm0
1392 ; SSE-NEXT: movd %rax, %xmm0
1398 ; AVX-NEXT: vmovq %rax, %xmm0
1404 ; X32-SSE-NEXT: movd %eax, %xmm0
1414 ; SSE-NEXT: movd %rax, %xmm0
1420 ; AVX-NEXT: vmovq %rax, %xmm0
1426 ; X32-SSE-NEXT: movd %eax, %xmm0
1435 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
1440 ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
1445 ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
1450 ; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0]
1455 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
1460 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
1469 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
1474 ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
1479 ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
1484 ; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0]
1489 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
1494 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
1503 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1508 ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1513 ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1518 ; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1523 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1528 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1537 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1542 ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1547 ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1552 ; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1557 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1562 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
1571 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1576 ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1581 ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1586 ; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1591 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1596 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1605 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1610 ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1615 ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1620 ; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1625 ; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
1630 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]