1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+ssse3 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+xop | FileCheck %s --check-prefix=XOP 4; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 5; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F 6; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW 7; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+avx512vl,+avx512vbmi | FileCheck %s --check-prefix=AVX512VBMI 8 9define <32 x i8> @foo(<48 x i8>* %x0) { 10; SSE-LABEL: foo: 11; SSE: # %bb.0: 12; SSE-NEXT: movdqu (%rdi), %xmm0 13; SSE-NEXT: movdqu 16(%rdi), %xmm2 14; SSE-NEXT: movdqu 32(%rdi), %xmm1 15; SSE-NEXT: movdqa %xmm2, %xmm3 16; SSE-NEXT: pshufb {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,2,3,5,6] 17; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,3,4,6,7,9,10,12,13,15],zero,zero,zero,zero,zero 18; SSE-NEXT: por %xmm3, %xmm0 19; SSE-NEXT: pshufb {{.*#+}} xmm2 = xmm2[8,9,11,12,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 20; SSE-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[1,2,4,5,7,8,10,11,13,14] 21; SSE-NEXT: por %xmm2, %xmm1 22; SSE-NEXT: retq 23; 24; XOP-LABEL: foo: 25; XOP: # %bb.0: 26; XOP-NEXT: vmovdqu (%rdi), %xmm0 27; XOP-NEXT: vmovdqu 16(%rdi), %xmm1 28; XOP-NEXT: vmovdqu 32(%rdi), %xmm2 29; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,3,4,6,7,9,10,12,13,15],xmm1[0,2,3,5,6] 30; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[8,9,11,12,14,15],xmm2[1,2,4,5,7,8,10,11,13,14] 31; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 32; XOP-NEXT: retq 33; 34; AVX2-LABEL: foo: 35; AVX2: # %bb.0: 36; AVX2-NEXT: vmovdqu 32(%rdi), %xmm0 37; AVX2-NEXT: vmovdqu (%rdi), %ymm1 38; AVX2-NEXT: vmovdqu 16(%rdi), %xmm2 39; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,u,u,u,0,2,3,5,6] 40; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,3,4,6,7,9,10,12,13,15,u,u,u,u,u,24,25,27,28,30,31,u,u,u,u,u,u,u,u,u,u] 41; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255,255,u,u,u,u,u,u,u,u,u,u> 42; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 43; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,1,2,4,5,7,8,10,11,13,14] 44; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 45; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 46; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 47; AVX2-NEXT: retq 48; 49; AVX512F-LABEL: foo: 50; AVX512F: # %bb.0: 51; AVX512F-NEXT: vmovdqu (%rdi), %ymm0 52; AVX512F-NEXT: vmovdqu 32(%rdi), %xmm1 53; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,1,2,4,5,7,8,10,11,13,14] 54; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 55; AVX512F-NEXT: vmovdqu 16(%rdi), %xmm2 56; AVX512F-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm2[0,2,3,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 57; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,3,4,6,7,9,10,12,13,15],zero,zero,zero,zero,zero,ymm0[24,25,27,28,30,31,u,u,u,u,u,u,u,u,u,u] 58; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 59; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15] 60; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 61; AVX512F-NEXT: retq 62; 63; AVX512BW-LABEL: foo: 64; AVX512BW: # %bb.0: 65; AVX512BW-NEXT: vmovdqu 32(%rdi), %xmm0 66; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1 67; AVX512BW-NEXT: vmovdqu 16(%rdi), %xmm2 68; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,u,u,u,0,2,3,5,6] 69; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,3,4,6,7,9,10,12,13,15,u,u,u,u,u,24,25,27,28,30,31,u,u,u,u,u,u,u,u,u,u] 70; AVX512BW-NEXT: movl $63488, %eax # imm = 0xF800 71; AVX512BW-NEXT: kmovd %eax, %k1 72; AVX512BW-NEXT: vmovdqu8 %ymm2, %ymm1 {%k1} 73; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,1,2,4,5,7,8,10,11,13,14] 74; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 75; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [0,1,2,3,4,5,6,7,8,9,10,27,28,29,30,31] 76; AVX512BW-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 77; AVX512BW-NEXT: retq 78; 79; AVX512VBMI-LABEL: foo: 80; AVX512VBMI: # %bb.0: 81; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 82; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm2 83; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,33,34,36,37,39,40,42,43,45,46] 84; AVX512VBMI-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 85; AVX512VBMI-NEXT: retq 86 %1 = load <48 x i8>, <48 x i8>* %x0, align 1 87 %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 6, i32 7, i32 9, i32 10, i32 12, i32 13, i32 15, i32 16, i32 18, i32 19, i32 21, i32 22, i32 24, i32 25, i32 27, i32 28, i32 30, i32 31, i32 33, i32 34, i32 36, i32 37, i32 39, i32 40, i32 42, i32 43, i32 45, i32 46> 88 ret <32 x i8> %2 89} 90