• Home
  • Raw
  • Download

Lines Matching refs:AVX512

8 …pu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX…
82 ; AVX512-LABEL: var_shift_v2i64:
83 ; AVX512: ## BB#0:
84 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
85 ; AVX512-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
86 ; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
87 ; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
88 ; AVX512-NEXT: vpsubq %xmm3, %xmm0, %xmm0
89 ; AVX512-NEXT: retq
191 ; AVX512-LABEL: var_shift_v4i32:
192 ; AVX512: ## BB#0:
193 ; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
194 ; AVX512-NEXT: retq
325 ; AVX512-LABEL: var_shift_v8i16:
326 ; AVX512: ## BB#0:
327 ; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
328 ; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
329 ; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
330 ; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
331 ; AVX512-NEXT: retq
501 ; AVX512-LABEL: var_shift_v16i8:
502 ; AVX512: ## BB#0:
503 ; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
504 ; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0…
505 ; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
506 ; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4
507 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
508 ; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4
509 ; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
510 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
511 ; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4
512 ; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
513 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
514 ; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2
515 ; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
516 ; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
517 ; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3
518 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
519 ; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3
520 ; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
521 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
522 ; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3
523 ; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
524 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
525 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0
526 ; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
527 ; AVX512-NEXT: retq
629 ; AVX512-LABEL: splatvar_shift_v2i64:
630 ; AVX512: ## BB#0:
631 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
632 ; AVX512-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
633 ; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
634 ; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
635 ; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
636 ; AVX512-NEXT: retq
681 ; AVX512-LABEL: splatvar_shift_v4i32:
682 ; AVX512: ## BB#0:
683 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
684 ; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
685 ; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
686 ; AVX512-NEXT: retq
729 ; AVX512-LABEL: splatvar_shift_v8i16:
730 ; AVX512: ## BB#0:
731 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
732 ; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
733 ; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
734 ; AVX512-NEXT: retq
923 ; AVX512-LABEL: splatvar_shift_v16i8:
924 ; AVX512: ## BB#0:
925 ; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1
926 ; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
927 ; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0…
928 ; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
929 ; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4
930 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
931 ; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4
932 ; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
933 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
934 ; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4
935 ; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
936 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
937 ; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2
938 ; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
939 ; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
940 ; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3
941 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
942 ; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3
943 ; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
944 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
945 ; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3
946 ; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
947 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
948 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0
949 ; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
950 ; AVX512-NEXT: retq
1070 ; AVX512-LABEL: constant_shift_v2i64:
1071 ; AVX512: ## BB#0:
1072 ; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
1073 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
1074 ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1075 ; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
1076 ; AVX512-NEXT: retq
1154 ; AVX512-LABEL: constant_shift_v4i32:
1155 ; AVX512: ## BB#0:
1156 ; AVX512-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
1157 ; AVX512-NEXT: retq
1236 ; AVX512-LABEL: constant_shift_v8i16:
1237 ; AVX512: ## BB#0:
1238 ; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1239 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
1240 ; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
1241 ; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1242 ; AVX512-NEXT: retq
1397 ; AVX512-LABEL: constant_shift_v16i8:
1398 ; AVX512: ## BB#0:
1399 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
1400 ; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
1401 ; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0…
1402 ; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1403 ; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4
1404 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
1405 ; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4
1406 ; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
1407 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
1408 ; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4
1409 ; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
1410 ; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
1411 ; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2
1412 ; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
1413 ; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1414 ; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3
1415 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
1416 ; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3
1417 ; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
1418 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
1419 ; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3
1420 ; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
1421 ; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
1422 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0
1423 ; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1424 ; AVX512-NEXT: retq
1532 ; AVX512-LABEL: splatconstant_shift_v2i64:
1533 ; AVX512: ## BB#0:
1534 ; AVX512-NEXT: vpsrad $7, %xmm0, %xmm1
1535 ; AVX512-NEXT: vpsrlq $7, %xmm0, %xmm0
1536 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1537 ; AVX512-NEXT: retq
1568 ; AVX512-LABEL: splatconstant_shift_v4i32:
1569 ; AVX512: ## BB#0:
1570 ; AVX512-NEXT: vpsrad $5, %xmm0, %xmm0
1571 ; AVX512-NEXT: retq
1597 ; AVX512-LABEL: splatconstant_shift_v8i16:
1598 ; AVX512: ## BB#0:
1599 ; AVX512-NEXT: vpsraw $3, %xmm0, %xmm0
1600 ; AVX512-NEXT: retq
1636 ; AVX512-LABEL: splatconstant_shift_v16i8:
1637 ; AVX512: ## BB#0:
1638 ; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
1639 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1640 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1641 ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1642 ; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
1643 ; AVX512-NEXT: retq