; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { ; CHECK-LABEL: test_16xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] ; CHECK-NEXT: retq %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { ; CHECK-LABEL: test_16xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] ; CHECK-NEXT: retq %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { ; CHECK-LABEL: test_16xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { ; CHECK-LABEL: test_16xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res } define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] ; CHECK-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res } define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { ; CHECK-LABEL: test_32xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] ; CHECK-NEXT: retq %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { ; CHECK-LABEL: test_32xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] ; CHECK-NEXT: retq %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { ; CHECK-LABEL: test_32xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { ; CHECK-LABEL: test_32xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res } define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] ; CHECK-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res } define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { ; CHECK-LABEL: test_64xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] ; CHECK-NEXT: retq %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { ; CHECK-LABEL: test_64xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] ; CHECK-NEXT: retq %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] ; CHECK-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { ; CHECK-LABEL: test_64xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { ; CHECK-LABEL: test_64xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res } define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] ; CHECK-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res } define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { ; CHECK-LABEL: test_8xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] ; CHECK-NEXT: retq %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { ; CHECK-LABEL: test_8xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] ; CHECK-NEXT: retq %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { ; CHECK-LABEL: test_8xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] ; CHECK-NEXT: retq %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { ; CHECK-LABEL: test_8xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { ; CHECK-LABEL: test_8xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { ; CHECK-LABEL: test_8xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res } define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res } define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { ; CHECK-LABEL: test_16xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] ; CHECK-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { ; CHECK-LABEL: test_16xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] ; CHECK-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { ; CHECK-LABEL: test_16xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] ; CHECK-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res } define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] ; CHECK-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res } define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] ; CHECK-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] ; CHECK-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] ; CHECK-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] ; CHECK-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res } define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] ; CHECK-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res } define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { ; CHECK-LABEL: test_4xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] ; CHECK-NEXT: retq %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { ; CHECK-LABEL: test_4xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] ; CHECK-NEXT: retq %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { ; CHECK-LABEL: test_4xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { ; CHECK-LABEL: test_4xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res } define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res } define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { ; CHECK-LABEL: test_8xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] ; CHECK-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { ; CHECK-LABEL: test_8xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] ; CHECK-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; CHECK-LABEL: test_8xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; CHECK-LABEL: test_8xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res } define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res } define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) { ; CHECK-LABEL: test_16xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] ; CHECK-NEXT: retq %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { ; CHECK-LABEL: test_16xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] ; CHECK-NEXT: retq %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; CHECK-LABEL: test_16xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; CHECK-LABEL: test_16xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res } define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] ; CHECK-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res }