1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX 4 5; This test is an assembly of avx512 shuffling instructions to check their scheduling 6 7define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) { 8; GENERIC-LABEL: test_16xi16_perm_mask0: 9; GENERIC: # %bb.0: 10; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 11; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 12; GENERIC-NEXT: retq # sched: [1:1.00] 13; 14; SKX-LABEL: test_16xi16_perm_mask0: 15; SKX: # %bb.0: 16; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 17; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] 18; SKX-NEXT: retq # sched: [7:1.00] 19 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 20 ret <16 x i16> %res 21} 22define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 23; GENERIC-LABEL: test_masked_16xi16_perm_mask0: 24; GENERIC: # %bb.0: 25; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 26; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 27; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 28; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 29; GENERIC-NEXT: retq # sched: [1:1.00] 30; 31; SKX-LABEL: test_masked_16xi16_perm_mask0: 32; SKX: # %bb.0: 33; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 34; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 35; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 36; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 37; SKX-NEXT: retq # sched: [7:1.00] 38 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 39 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 40 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 41 ret <16 x i16> %res 42} 43 44define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) { 45; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0: 46; GENERIC: # %bb.0: 47; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 48; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 49; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 50; GENERIC-NEXT: retq # sched: [1:1.00] 51; 52; SKX-LABEL: test_masked_z_16xi16_perm_mask0: 53; SKX: # %bb.0: 54; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 55; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 56; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 57; SKX-NEXT: retq # sched: [7:1.00] 58 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 59 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 60 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 61 ret <16 x i16> %res 62} 63define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 64; GENERIC-LABEL: test_masked_16xi16_perm_mask1: 65; GENERIC: # %bb.0: 66; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 67; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 68; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 69; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 70; GENERIC-NEXT: retq # sched: [1:1.00] 71; 72; SKX-LABEL: test_masked_16xi16_perm_mask1: 73; SKX: # %bb.0: 74; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 75; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 76; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 77; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 78; SKX-NEXT: retq # sched: [7:1.00] 79 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 80 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 81 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 82 ret <16 x i16> %res 83} 84 85define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) { 86; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1: 87; GENERIC: # %bb.0: 88; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 89; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 90; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 91; GENERIC-NEXT: retq # sched: [1:1.00] 92; 93; SKX-LABEL: test_masked_z_16xi16_perm_mask1: 94; SKX: # %bb.0: 95; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 96; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 97; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 98; SKX-NEXT: retq # sched: [7:1.00] 99 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 100 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 101 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 102 ret <16 x i16> %res 103} 104define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 105; GENERIC-LABEL: test_masked_16xi16_perm_mask2: 106; GENERIC: # %bb.0: 107; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 108; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 109; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 110; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 111; GENERIC-NEXT: retq # sched: [1:1.00] 112; 113; SKX-LABEL: test_masked_16xi16_perm_mask2: 114; SKX: # %bb.0: 115; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 116; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 117; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 118; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 119; SKX-NEXT: retq # sched: [7:1.00] 120 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 121 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 122 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 123 ret <16 x i16> %res 124} 125 126define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) { 127; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2: 128; GENERIC: # %bb.0: 129; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 130; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 131; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 132; GENERIC-NEXT: retq # sched: [1:1.00] 133; 134; SKX-LABEL: test_masked_z_16xi16_perm_mask2: 135; SKX: # %bb.0: 136; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 137; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 138; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 139; SKX-NEXT: retq # sched: [7:1.00] 140 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 141 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 142 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 143 ret <16 x i16> %res 144} 145define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) { 146; GENERIC-LABEL: test_16xi16_perm_mask3: 147; GENERIC: # %bb.0: 148; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 149; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 150; GENERIC-NEXT: retq # sched: [1:1.00] 151; 152; SKX-LABEL: test_16xi16_perm_mask3: 153; SKX: # %bb.0: 154; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 155; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] 156; SKX-NEXT: retq # sched: [7:1.00] 157 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 158 ret <16 x i16> %res 159} 160define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 161; GENERIC-LABEL: test_masked_16xi16_perm_mask3: 162; GENERIC: # %bb.0: 163; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 164; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 165; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 166; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 167; GENERIC-NEXT: retq # sched: [1:1.00] 168; 169; SKX-LABEL: test_masked_16xi16_perm_mask3: 170; SKX: # %bb.0: 171; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 172; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 173; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 174; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 175; SKX-NEXT: retq # sched: [7:1.00] 176 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 177 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 178 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 179 ret <16 x i16> %res 180} 181 182define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) { 183; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3: 184; GENERIC: # %bb.0: 185; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 186; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 187; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 188; GENERIC-NEXT: retq # sched: [1:1.00] 189; 190; SKX-LABEL: test_masked_z_16xi16_perm_mask3: 191; SKX: # %bb.0: 192; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 193; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 194; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 195; SKX-NEXT: retq # sched: [7:1.00] 196 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 197 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 198 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 199 ret <16 x i16> %res 200} 201define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { 202; GENERIC-LABEL: test_16xi16_perm_mem_mask0: 203; GENERIC: # %bb.0: 204; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 205; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 206; GENERIC-NEXT: retq # sched: [1:1.00] 207; 208; SKX-LABEL: test_16xi16_perm_mem_mask0: 209; SKX: # %bb.0: 210; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 211; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 212; SKX-NEXT: retq # sched: [7:1.00] 213 %vec = load <16 x i16>, <16 x i16>* %vp 214 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 215 ret <16 x i16> %res 216} 217define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 218; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0: 219; GENERIC: # %bb.0: 220; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 221; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 222; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 223; GENERIC-NEXT: retq # sched: [1:1.00] 224; 225; SKX-LABEL: test_masked_16xi16_perm_mem_mask0: 226; SKX: # %bb.0: 227; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 228; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 229; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 230; SKX-NEXT: retq # sched: [7:1.00] 231 %vec = load <16 x i16>, <16 x i16>* %vp 232 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 233 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 234 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 235 ret <16 x i16> %res 236} 237 238define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 239; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0: 240; GENERIC: # %bb.0: 241; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 242; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 243; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 244; GENERIC-NEXT: retq # sched: [1:1.00] 245; 246; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0: 247; SKX: # %bb.0: 248; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 249; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 250; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 251; SKX-NEXT: retq # sched: [7:1.00] 252 %vec = load <16 x i16>, <16 x i16>* %vp 253 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 254 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 255 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 256 ret <16 x i16> %res 257} 258 259define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 260; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1: 261; GENERIC: # %bb.0: 262; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 263; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 264; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 265; GENERIC-NEXT: retq # sched: [1:1.00] 266; 267; SKX-LABEL: test_masked_16xi16_perm_mem_mask1: 268; SKX: # %bb.0: 269; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 270; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 271; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 272; SKX-NEXT: retq # sched: [7:1.00] 273 %vec = load <16 x i16>, <16 x i16>* %vp 274 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 275 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 276 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 277 ret <16 x i16> %res 278} 279 280define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 281; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1: 282; GENERIC: # %bb.0: 283; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 284; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 285; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 286; GENERIC-NEXT: retq # sched: [1:1.00] 287; 288; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1: 289; SKX: # %bb.0: 290; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 291; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 292; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 293; SKX-NEXT: retq # sched: [7:1.00] 294 %vec = load <16 x i16>, <16 x i16>* %vp 295 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 296 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 297 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 298 ret <16 x i16> %res 299} 300 301define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 302; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2: 303; GENERIC: # %bb.0: 304; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 305; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 306; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 307; GENERIC-NEXT: retq # sched: [1:1.00] 308; 309; SKX-LABEL: test_masked_16xi16_perm_mem_mask2: 310; SKX: # %bb.0: 311; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 312; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 313; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 314; SKX-NEXT: retq # sched: [7:1.00] 315 %vec = load <16 x i16>, <16 x i16>* %vp 316 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 317 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 318 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 319 ret <16 x i16> %res 320} 321 322define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 323; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2: 324; GENERIC: # %bb.0: 325; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 326; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 327; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 328; GENERIC-NEXT: retq # sched: [1:1.00] 329; 330; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2: 331; SKX: # %bb.0: 332; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 333; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 334; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 335; SKX-NEXT: retq # sched: [7:1.00] 336 %vec = load <16 x i16>, <16 x i16>* %vp 337 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 338 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 339 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 340 ret <16 x i16> %res 341} 342 343define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { 344; GENERIC-LABEL: test_16xi16_perm_mem_mask3: 345; GENERIC: # %bb.0: 346; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 347; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 348; GENERIC-NEXT: retq # sched: [1:1.00] 349; 350; SKX-LABEL: test_16xi16_perm_mem_mask3: 351; SKX: # %bb.0: 352; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 353; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 354; SKX-NEXT: retq # sched: [7:1.00] 355 %vec = load <16 x i16>, <16 x i16>* %vp 356 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 357 ret <16 x i16> %res 358} 359define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 360; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3: 361; GENERIC: # %bb.0: 362; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 363; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 364; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 365; GENERIC-NEXT: retq # sched: [1:1.00] 366; 367; SKX-LABEL: test_masked_16xi16_perm_mem_mask3: 368; SKX: # %bb.0: 369; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 370; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 371; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 372; SKX-NEXT: retq # sched: [7:1.00] 373 %vec = load <16 x i16>, <16 x i16>* %vp 374 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 375 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 376 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 377 ret <16 x i16> %res 378} 379 380define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 381; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3: 382; GENERIC: # %bb.0: 383; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 384; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 385; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 386; GENERIC-NEXT: retq # sched: [1:1.00] 387; 388; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3: 389; SKX: # %bb.0: 390; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 391; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 392; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 393; SKX-NEXT: retq # sched: [7:1.00] 394 %vec = load <16 x i16>, <16 x i16>* %vp 395 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 396 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 397 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 398 ret <16 x i16> %res 399} 400 401define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) { 402; GENERIC-LABEL: test_32xi16_perm_mask0: 403; GENERIC: # %bb.0: 404; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] 405; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 406; GENERIC-NEXT: retq # sched: [1:1.00] 407; 408; SKX-LABEL: test_32xi16_perm_mask0: 409; SKX: # %bb.0: 410; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] 411; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] 412; SKX-NEXT: retq # sched: [7:1.00] 413 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 414 ret <32 x i16> %res 415} 416define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 417; GENERIC-LABEL: test_masked_32xi16_perm_mask0: 418; GENERIC: # %bb.0: 419; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] 420; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 421; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 422; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 423; GENERIC-NEXT: retq # sched: [1:1.00] 424; 425; SKX-LABEL: test_masked_32xi16_perm_mask0: 426; SKX: # %bb.0: 427; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] 428; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 429; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 430; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 431; SKX-NEXT: retq # sched: [7:1.00] 432 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 433 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 434 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 435 ret <32 x i16> %res 436} 437 438define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) { 439; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0: 440; GENERIC: # %bb.0: 441; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] 442; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 443; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 444; GENERIC-NEXT: retq # sched: [1:1.00] 445; 446; SKX-LABEL: test_masked_z_32xi16_perm_mask0: 447; SKX: # %bb.0: 448; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] 449; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 450; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 451; SKX-NEXT: retq # sched: [7:1.00] 452 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 453 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 454 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 455 ret <32 x i16> %res 456} 457define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 458; GENERIC-LABEL: test_masked_32xi16_perm_mask1: 459; GENERIC: # %bb.0: 460; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] 461; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 462; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 463; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 464; GENERIC-NEXT: retq # sched: [1:1.00] 465; 466; SKX-LABEL: test_masked_32xi16_perm_mask1: 467; SKX: # %bb.0: 468; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] 469; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 470; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 471; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 472; SKX-NEXT: retq # sched: [7:1.00] 473 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16> 474 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 475 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 476 ret <32 x i16> %res 477} 478 479define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) { 480; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1: 481; GENERIC: # %bb.0: 482; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] 483; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 484; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 485; GENERIC-NEXT: retq # sched: [1:1.00] 486; 487; SKX-LABEL: test_masked_z_32xi16_perm_mask1: 488; SKX: # %bb.0: 489; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] 490; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 491; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 492; SKX-NEXT: retq # sched: [7:1.00] 493 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16> 494 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 495 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 496 ret <32 x i16> %res 497} 498define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 499; GENERIC-LABEL: test_masked_32xi16_perm_mask2: 500; GENERIC: # %bb.0: 501; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] 502; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 503; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 504; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 505; GENERIC-NEXT: retq # sched: [1:1.00] 506; 507; SKX-LABEL: test_masked_32xi16_perm_mask2: 508; SKX: # %bb.0: 509; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] 510; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 511; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 512; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 513; SKX-NEXT: retq # sched: [7:1.00] 514 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27> 515 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 516 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 517 ret <32 x i16> %res 518} 519 520define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) { 521; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2: 522; GENERIC: # %bb.0: 523; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] 524; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 525; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 526; GENERIC-NEXT: retq # sched: [1:1.00] 527; 528; SKX-LABEL: test_masked_z_32xi16_perm_mask2: 529; SKX: # %bb.0: 530; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] 531; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 532; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 533; SKX-NEXT: retq # sched: [7:1.00] 534 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27> 535 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 536 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 537 ret <32 x i16> %res 538} 539define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) { 540; GENERIC-LABEL: test_32xi16_perm_mask3: 541; GENERIC: # %bb.0: 542; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] 543; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 544; GENERIC-NEXT: retq # sched: [1:1.00] 545; 546; SKX-LABEL: test_32xi16_perm_mask3: 547; SKX: # %bb.0: 548; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] 549; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] 550; SKX-NEXT: retq # sched: [7:1.00] 551 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 552 ret <32 x i16> %res 553} 554define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 555; GENERIC-LABEL: test_masked_32xi16_perm_mask3: 556; GENERIC: # %bb.0: 557; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] 558; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 559; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 560; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 561; GENERIC-NEXT: retq # sched: [1:1.00] 562; 563; SKX-LABEL: test_masked_32xi16_perm_mask3: 564; SKX: # %bb.0: 565; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] 566; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 567; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 568; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 569; SKX-NEXT: retq # sched: [7:1.00] 570 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 571 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 572 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 573 ret <32 x i16> %res 574} 575 576define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) { 577; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3: 578; GENERIC: # %bb.0: 579; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] 580; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 581; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 582; GENERIC-NEXT: retq # sched: [1:1.00] 583; 584; SKX-LABEL: test_masked_z_32xi16_perm_mask3: 585; SKX: # %bb.0: 586; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] 587; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 588; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 589; SKX-NEXT: retq # sched: [7:1.00] 590 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 591 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 592 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 593 ret <32 x i16> %res 594} 595define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { 596; GENERIC-LABEL: test_32xi16_perm_mem_mask0: 597; GENERIC: # %bb.0: 598; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] 599; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 600; GENERIC-NEXT: retq # sched: [1:1.00] 601; 602; SKX-LABEL: test_32xi16_perm_mem_mask0: 603; SKX: # %bb.0: 604; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] 605; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] 606; SKX-NEXT: retq # sched: [7:1.00] 607 %vec = load <32 x i16>, <32 x i16>* %vp 608 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 609 ret <32 x i16> %res 610} 611define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 612; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0: 613; GENERIC: # %bb.0: 614; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] 615; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 616; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 617; GENERIC-NEXT: retq # sched: [1:1.00] 618; 619; SKX-LABEL: test_masked_32xi16_perm_mem_mask0: 620; SKX: # %bb.0: 621; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] 622; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 623; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 624; SKX-NEXT: retq # sched: [7:1.00] 625 %vec = load <32 x i16>, <32 x i16>* %vp 626 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 627 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 628 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 629 ret <32 x i16> %res 630} 631 632define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 633; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0: 634; GENERIC: # %bb.0: 635; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] 636; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 637; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 638; GENERIC-NEXT: retq # sched: [1:1.00] 639; 640; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0: 641; SKX: # %bb.0: 642; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] 643; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 644; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 645; SKX-NEXT: retq # sched: [7:1.00] 646 %vec = load <32 x i16>, <32 x i16>* %vp 647 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 648 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 649 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 650 ret <32 x i16> %res 651} 652 653define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 654; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1: 655; GENERIC: # %bb.0: 656; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] 657; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 658; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 659; GENERIC-NEXT: retq # sched: [1:1.00] 660; 661; SKX-LABEL: test_masked_32xi16_perm_mem_mask1: 662; SKX: # %bb.0: 663; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] 664; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 665; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 666; SKX-NEXT: retq # sched: [7:1.00] 667 %vec = load <32 x i16>, <32 x i16>* %vp 668 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6> 669 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 670 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 671 ret <32 x i16> %res 672} 673 674define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 675; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1: 676; GENERIC: # %bb.0: 677; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] 678; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 679; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 680; GENERIC-NEXT: retq # sched: [1:1.00] 681; 682; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1: 683; SKX: # %bb.0: 684; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] 685; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 686; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 687; SKX-NEXT: retq # sched: [7:1.00] 688 %vec = load <32 x i16>, <32 x i16>* %vp 689 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6> 690 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 691 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 692 ret <32 x i16> %res 693} 694 695define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 696; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2: 697; GENERIC: # %bb.0: 698; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] 699; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 700; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 701; GENERIC-NEXT: retq # sched: [1:1.00] 702; 703; SKX-LABEL: test_masked_32xi16_perm_mem_mask2: 704; SKX: # %bb.0: 705; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] 706; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 707; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 708; SKX-NEXT: retq # sched: [7:1.00] 709 %vec = load <32 x i16>, <32 x i16>* %vp 710 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25> 711 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 712 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 713 ret <32 x i16> %res 714} 715 716define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 717; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2: 718; GENERIC: # %bb.0: 719; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] 720; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 721; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 722; GENERIC-NEXT: retq # sched: [1:1.00] 723; 724; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2: 725; SKX: # %bb.0: 726; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] 727; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 728; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 729; SKX-NEXT: retq # sched: [7:1.00] 730 %vec = load <32 x i16>, <32 x i16>* %vp 731 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25> 732 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 733 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 734 ret <32 x i16> %res 735} 736 737define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { 738; GENERIC-LABEL: test_32xi16_perm_mem_mask3: 739; GENERIC: # %bb.0: 740; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] 741; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 742; GENERIC-NEXT: retq # sched: [1:1.00] 743; 744; SKX-LABEL: test_32xi16_perm_mem_mask3: 745; SKX: # %bb.0: 746; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] 747; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] 748; SKX-NEXT: retq # sched: [7:1.00] 749 %vec = load <32 x i16>, <32 x i16>* %vp 750 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 751 ret <32 x i16> %res 752} 753define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 754; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3: 755; GENERIC: # %bb.0: 756; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] 757; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 758; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 759; GENERIC-NEXT: retq # sched: [1:1.00] 760; 761; SKX-LABEL: test_masked_32xi16_perm_mem_mask3: 762; SKX: # %bb.0: 763; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] 764; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 765; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 766; SKX-NEXT: retq # sched: [7:1.00] 767 %vec = load <32 x i16>, <32 x i16>* %vp 768 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 769 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 770 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 771 ret <32 x i16> %res 772} 773 774define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 775; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3: 776; GENERIC: # %bb.0: 777; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] 778; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 779; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 780; GENERIC-NEXT: retq # sched: [1:1.00] 781; 782; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3: 783; SKX: # %bb.0: 784; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] 785; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 786; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 787; SKX-NEXT: retq # sched: [7:1.00] 788 %vec = load <32 x i16>, <32 x i16>* %vp 789 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 790 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 791 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 792 ret <32 x i16> %res 793} 794 795define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { 796; GENERIC-LABEL: test_8xi32_perm_mask0: 797; GENERIC: # %bb.0: 798; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 799; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 800; GENERIC-NEXT: retq # sched: [1:1.00] 801; 802; SKX-LABEL: test_8xi32_perm_mask0: 803; SKX: # %bb.0: 804; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 805; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 806; SKX-NEXT: retq # sched: [7:1.00] 807 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 808 ret <8 x i32> %res 809} 810define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 811; GENERIC-LABEL: test_masked_8xi32_perm_mask0: 812; GENERIC: # %bb.0: 813; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 814; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 815; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 816; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 817; GENERIC-NEXT: retq # sched: [1:1.00] 818; 819; SKX-LABEL: test_masked_8xi32_perm_mask0: 820; SKX: # %bb.0: 821; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 822; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 823; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 824; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 825; SKX-NEXT: retq # sched: [7:1.00] 826 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 827 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 828 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 829 ret <8 x i32> %res 830} 831 832define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 833; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0: 834; GENERIC: # %bb.0: 835; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 836; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 837; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 838; GENERIC-NEXT: retq # sched: [1:1.00] 839; 840; SKX-LABEL: test_masked_z_8xi32_perm_mask0: 841; SKX: # %bb.0: 842; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 843; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 844; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 845; SKX-NEXT: retq # sched: [7:1.00] 846 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 847 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 848 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 849 ret <8 x i32> %res 850} 851define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 852; GENERIC-LABEL: test_masked_8xi32_perm_mask1: 853; GENERIC: # %bb.0: 854; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 855; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 856; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 857; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 858; GENERIC-NEXT: retq # sched: [1:1.00] 859; 860; SKX-LABEL: test_masked_8xi32_perm_mask1: 861; SKX: # %bb.0: 862; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 863; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 864; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 865; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 866; SKX-NEXT: retq # sched: [7:1.00] 867 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3> 868 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 869 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 870 ret <8 x i32> %res 871} 872 873define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 874; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1: 875; GENERIC: # %bb.0: 876; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 877; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 878; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 879; GENERIC-NEXT: retq # sched: [1:1.00] 880; 881; SKX-LABEL: test_masked_z_8xi32_perm_mask1: 882; SKX: # %bb.0: 883; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 884; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 885; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 886; SKX-NEXT: retq # sched: [7:1.00] 887 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3> 888 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 889 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 890 ret <8 x i32> %res 891} 892define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 893; GENERIC-LABEL: test_masked_8xi32_perm_mask2: 894; GENERIC: # %bb.0: 895; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 896; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 897; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 898; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 899; GENERIC-NEXT: retq # sched: [1:1.00] 900; 901; SKX-LABEL: test_masked_8xi32_perm_mask2: 902; SKX: # %bb.0: 903; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 904; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 905; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 906; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 907; SKX-NEXT: retq # sched: [7:1.00] 908 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4> 909 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 910 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 911 ret <8 x i32> %res 912} 913 914define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 915; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2: 916; GENERIC: # %bb.0: 917; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 918; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 919; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 920; GENERIC-NEXT: retq # sched: [1:1.00] 921; 922; SKX-LABEL: test_masked_z_8xi32_perm_mask2: 923; SKX: # %bb.0: 924; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 925; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 926; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 927; SKX-NEXT: retq # sched: [7:1.00] 928 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4> 929 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 930 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 931 ret <8 x i32> %res 932} 933define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { 934; GENERIC-LABEL: test_8xi32_perm_mask3: 935; GENERIC: # %bb.0: 936; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 937; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 938; GENERIC-NEXT: retq # sched: [1:1.00] 939; 940; SKX-LABEL: test_8xi32_perm_mask3: 941; SKX: # %bb.0: 942; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 943; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 944; SKX-NEXT: retq # sched: [7:1.00] 945 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 946 ret <8 x i32> %res 947} 948define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 949; GENERIC-LABEL: test_masked_8xi32_perm_mask3: 950; GENERIC: # %bb.0: 951; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 952; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 953; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 954; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 955; GENERIC-NEXT: retq # sched: [1:1.00] 956; 957; SKX-LABEL: test_masked_8xi32_perm_mask3: 958; SKX: # %bb.0: 959; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 960; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 961; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 962; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 963; SKX-NEXT: retq # sched: [7:1.00] 964 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 965 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 966 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 967 ret <8 x i32> %res 968} 969 970define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 971; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3: 972; GENERIC: # %bb.0: 973; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 974; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 975; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 976; GENERIC-NEXT: retq # sched: [1:1.00] 977; 978; SKX-LABEL: test_masked_z_8xi32_perm_mask3: 979; SKX: # %bb.0: 980; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 981; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 982; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 983; SKX-NEXT: retq # sched: [7:1.00] 984 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 985 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 986 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 987 ret <8 x i32> %res 988} 989define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 990; GENERIC-LABEL: test_8xi32_perm_mem_mask0: 991; GENERIC: # %bb.0: 992; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 993; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 994; GENERIC-NEXT: retq # sched: [1:1.00] 995; 996; SKX-LABEL: test_8xi32_perm_mem_mask0: 997; SKX: # %bb.0: 998; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 999; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 1000; SKX-NEXT: retq # sched: [7:1.00] 1001 %vec = load <8 x i32>, <8 x i32>* %vp 1002 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 1003 ret <8 x i32> %res 1004} 1005define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1006; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0: 1007; GENERIC: # %bb.0: 1008; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1009; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1010; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1011; GENERIC-NEXT: retq # sched: [1:1.00] 1012; 1013; SKX-LABEL: test_masked_8xi32_perm_mem_mask0: 1014; SKX: # %bb.0: 1015; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1016; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1017; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1018; SKX-NEXT: retq # sched: [7:1.00] 1019 %vec = load <8 x i32>, <8 x i32>* %vp 1020 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 1021 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1022 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1023 ret <8 x i32> %res 1024} 1025 1026define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 1027; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0: 1028; GENERIC: # %bb.0: 1029; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1030; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1031; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1032; GENERIC-NEXT: retq # sched: [1:1.00] 1033; 1034; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0: 1035; SKX: # %bb.0: 1036; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1037; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1038; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1039; SKX-NEXT: retq # sched: [7:1.00] 1040 %vec = load <8 x i32>, <8 x i32>* %vp 1041 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 1042 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1043 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1044 ret <8 x i32> %res 1045} 1046 1047define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1048; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1: 1049; GENERIC: # %bb.0: 1050; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1051; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1052; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1053; GENERIC-NEXT: retq # sched: [1:1.00] 1054; 1055; SKX-LABEL: test_masked_8xi32_perm_mem_mask1: 1056; SKX: # %bb.0: 1057; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1058; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1059; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1060; SKX-NEXT: retq # sched: [7:1.00] 1061 %vec = load <8 x i32>, <8 x i32>* %vp 1062 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5> 1063 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1064 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1065 ret <8 x i32> %res 1066} 1067 1068define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 1069; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1: 1070; GENERIC: # %bb.0: 1071; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1072; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1073; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1074; GENERIC-NEXT: retq # sched: [1:1.00] 1075; 1076; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1: 1077; SKX: # %bb.0: 1078; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1079; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1080; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1081; SKX-NEXT: retq # sched: [7:1.00] 1082 %vec = load <8 x i32>, <8 x i32>* %vp 1083 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5> 1084 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1085 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1086 ret <8 x i32> %res 1087} 1088 1089define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1090; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2: 1091; GENERIC: # %bb.0: 1092; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1093; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1094; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1095; GENERIC-NEXT: retq # sched: [1:1.00] 1096; 1097; SKX-LABEL: test_masked_8xi32_perm_mem_mask2: 1098; SKX: # %bb.0: 1099; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1100; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1101; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1102; SKX-NEXT: retq # sched: [7:1.00] 1103 %vec = load <8 x i32>, <8 x i32>* %vp 1104 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3> 1105 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1106 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1107 ret <8 x i32> %res 1108} 1109 1110define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 1111; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2: 1112; GENERIC: # %bb.0: 1113; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1114; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1115; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1116; GENERIC-NEXT: retq # sched: [1:1.00] 1117; 1118; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2: 1119; SKX: # %bb.0: 1120; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1121; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1122; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1123; SKX-NEXT: retq # sched: [7:1.00] 1124 %vec = load <8 x i32>, <8 x i32>* %vp 1125 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3> 1126 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1127 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1128 ret <8 x i32> %res 1129} 1130 1131define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 1132; GENERIC-LABEL: test_8xi32_perm_mem_mask3: 1133; GENERIC: # %bb.0: 1134; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1135; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 1136; GENERIC-NEXT: retq # sched: [1:1.00] 1137; 1138; SKX-LABEL: test_8xi32_perm_mem_mask3: 1139; SKX: # %bb.0: 1140; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1141; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 1142; SKX-NEXT: retq # sched: [7:1.00] 1143 %vec = load <8 x i32>, <8 x i32>* %vp 1144 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 1145 ret <8 x i32> %res 1146} 1147define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1148; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3: 1149; GENERIC: # %bb.0: 1150; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1151; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1152; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1153; GENERIC-NEXT: retq # sched: [1:1.00] 1154; 1155; SKX-LABEL: test_masked_8xi32_perm_mem_mask3: 1156; SKX: # %bb.0: 1157; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1158; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1159; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1160; SKX-NEXT: retq # sched: [7:1.00] 1161 %vec = load <8 x i32>, <8 x i32>* %vp 1162 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 1163 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1164 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1165 ret <8 x i32> %res 1166} 1167 1168define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 1169; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3: 1170; GENERIC: # %bb.0: 1171; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1172; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1173; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1174; GENERIC-NEXT: retq # sched: [1:1.00] 1175; 1176; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3: 1177; SKX: # %bb.0: 1178; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1179; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1180; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1181; SKX-NEXT: retq # sched: [7:1.00] 1182 %vec = load <8 x i32>, <8 x i32>* %vp 1183 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 1184 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1185 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1186 ret <8 x i32> %res 1187} 1188 1189define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 1190; GENERIC-LABEL: test_16xi32_perm_mask0: 1191; GENERIC: # %bb.0: 1192; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] 1193; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 1194; GENERIC-NEXT: retq # sched: [1:1.00] 1195; 1196; SKX-LABEL: test_16xi32_perm_mask0: 1197; SKX: # %bb.0: 1198; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] 1199; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 1200; SKX-NEXT: retq # sched: [7:1.00] 1201 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 1202 ret <16 x i32> %res 1203} 1204define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1205; GENERIC-LABEL: test_masked_16xi32_perm_mask0: 1206; GENERIC: # %bb.0: 1207; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] 1208; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1209; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1210; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1211; GENERIC-NEXT: retq # sched: [1:1.00] 1212; 1213; SKX-LABEL: test_masked_16xi32_perm_mask0: 1214; SKX: # %bb.0: 1215; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] 1216; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1217; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1218; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1219; SKX-NEXT: retq # sched: [7:1.00] 1220 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 1221 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1222 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1223 ret <16 x i32> %res 1224} 1225 1226define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 1227; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0: 1228; GENERIC: # %bb.0: 1229; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] 1230; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1231; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1232; GENERIC-NEXT: retq # sched: [1:1.00] 1233; 1234; SKX-LABEL: test_masked_z_16xi32_perm_mask0: 1235; SKX: # %bb.0: 1236; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] 1237; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1238; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1239; SKX-NEXT: retq # sched: [7:1.00] 1240 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 1241 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1242 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1243 ret <16 x i32> %res 1244} 1245define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1246; GENERIC-LABEL: test_masked_16xi32_perm_mask1: 1247; GENERIC: # %bb.0: 1248; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] 1249; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1250; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1251; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1252; GENERIC-NEXT: retq # sched: [1:1.00] 1253; 1254; SKX-LABEL: test_masked_16xi32_perm_mask1: 1255; SKX: # %bb.0: 1256; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] 1257; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1258; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1259; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1260; SKX-NEXT: retq # sched: [7:1.00] 1261 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3> 1262 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1263 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1264 ret <16 x i32> %res 1265} 1266 1267define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 1268; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1: 1269; GENERIC: # %bb.0: 1270; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] 1271; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1272; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1273; GENERIC-NEXT: retq # sched: [1:1.00] 1274; 1275; SKX-LABEL: test_masked_z_16xi32_perm_mask1: 1276; SKX: # %bb.0: 1277; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] 1278; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1279; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1280; SKX-NEXT: retq # sched: [7:1.00] 1281 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3> 1282 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1283 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1284 ret <16 x i32> %res 1285} 1286define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1287; GENERIC-LABEL: test_masked_16xi32_perm_mask2: 1288; GENERIC: # %bb.0: 1289; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] 1290; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1291; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1292; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1293; GENERIC-NEXT: retq # sched: [1:1.00] 1294; 1295; SKX-LABEL: test_masked_16xi32_perm_mask2: 1296; SKX: # %bb.0: 1297; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] 1298; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1299; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1300; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1301; SKX-NEXT: retq # sched: [7:1.00] 1302 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5> 1303 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1304 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1305 ret <16 x i32> %res 1306} 1307 1308define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 1309; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2: 1310; GENERIC: # %bb.0: 1311; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] 1312; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1313; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1314; GENERIC-NEXT: retq # sched: [1:1.00] 1315; 1316; SKX-LABEL: test_masked_z_16xi32_perm_mask2: 1317; SKX: # %bb.0: 1318; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] 1319; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1320; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1321; SKX-NEXT: retq # sched: [7:1.00] 1322 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5> 1323 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1324 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1325 ret <16 x i32> %res 1326} 1327define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { 1328; GENERIC-LABEL: test_16xi32_perm_mask3: 1329; GENERIC: # %bb.0: 1330; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] 1331; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 1332; GENERIC-NEXT: retq # sched: [1:1.00] 1333; 1334; SKX-LABEL: test_16xi32_perm_mask3: 1335; SKX: # %bb.0: 1336; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] 1337; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 1338; SKX-NEXT: retq # sched: [7:1.00] 1339 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 1340 ret <16 x i32> %res 1341} 1342define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1343; GENERIC-LABEL: test_masked_16xi32_perm_mask3: 1344; GENERIC: # %bb.0: 1345; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] 1346; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1347; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1348; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1349; GENERIC-NEXT: retq # sched: [1:1.00] 1350; 1351; SKX-LABEL: test_masked_16xi32_perm_mask3: 1352; SKX: # %bb.0: 1353; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] 1354; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1355; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1356; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1357; SKX-NEXT: retq # sched: [7:1.00] 1358 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 1359 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1360 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1361 ret <16 x i32> %res 1362} 1363 1364define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 1365; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3: 1366; GENERIC: # %bb.0: 1367; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] 1368; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1369; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1370; GENERIC-NEXT: retq # sched: [1:1.00] 1371; 1372; SKX-LABEL: test_masked_z_16xi32_perm_mask3: 1373; SKX: # %bb.0: 1374; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] 1375; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1376; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1377; SKX-NEXT: retq # sched: [7:1.00] 1378 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 1379 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1380 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1381 ret <16 x i32> %res 1382} 1383define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 1384; GENERIC-LABEL: test_16xi32_perm_mem_mask0: 1385; GENERIC: # %bb.0: 1386; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] 1387; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 1388; GENERIC-NEXT: retq # sched: [1:1.00] 1389; 1390; SKX-LABEL: test_16xi32_perm_mem_mask0: 1391; SKX: # %bb.0: 1392; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] 1393; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 1394; SKX-NEXT: retq # sched: [7:1.00] 1395 %vec = load <16 x i32>, <16 x i32>* %vp 1396 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 1397 ret <16 x i32> %res 1398} 1399define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1400; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0: 1401; GENERIC: # %bb.0: 1402; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] 1403; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1404; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1405; GENERIC-NEXT: retq # sched: [1:1.00] 1406; 1407; SKX-LABEL: test_masked_16xi32_perm_mem_mask0: 1408; SKX: # %bb.0: 1409; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] 1410; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1411; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1412; SKX-NEXT: retq # sched: [7:1.00] 1413 %vec = load <16 x i32>, <16 x i32>* %vp 1414 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 1415 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1416 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1417 ret <16 x i32> %res 1418} 1419 1420define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 1421; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0: 1422; GENERIC: # %bb.0: 1423; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] 1424; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1425; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1426; GENERIC-NEXT: retq # sched: [1:1.00] 1427; 1428; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0: 1429; SKX: # %bb.0: 1430; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] 1431; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1432; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1433; SKX-NEXT: retq # sched: [7:1.00] 1434 %vec = load <16 x i32>, <16 x i32>* %vp 1435 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 1436 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1437 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1438 ret <16 x i32> %res 1439} 1440 1441define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1442; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1: 1443; GENERIC: # %bb.0: 1444; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] 1445; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1446; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1447; GENERIC-NEXT: retq # sched: [1:1.00] 1448; 1449; SKX-LABEL: test_masked_16xi32_perm_mem_mask1: 1450; SKX: # %bb.0: 1451; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] 1452; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1453; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1454; SKX-NEXT: retq # sched: [7:1.00] 1455 %vec = load <16 x i32>, <16 x i32>* %vp 1456 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3> 1457 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1458 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1459 ret <16 x i32> %res 1460} 1461 1462define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 1463; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1: 1464; GENERIC: # %bb.0: 1465; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] 1466; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1467; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1468; GENERIC-NEXT: retq # sched: [1:1.00] 1469; 1470; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1: 1471; SKX: # %bb.0: 1472; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] 1473; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1474; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1475; SKX-NEXT: retq # sched: [7:1.00] 1476 %vec = load <16 x i32>, <16 x i32>* %vp 1477 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3> 1478 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1479 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1480 ret <16 x i32> %res 1481} 1482 1483define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1484; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2: 1485; GENERIC: # %bb.0: 1486; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] 1487; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1488; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1489; GENERIC-NEXT: retq # sched: [1:1.00] 1490; 1491; SKX-LABEL: test_masked_16xi32_perm_mem_mask2: 1492; SKX: # %bb.0: 1493; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] 1494; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1495; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1496; SKX-NEXT: retq # sched: [7:1.00] 1497 %vec = load <16 x i32>, <16 x i32>* %vp 1498 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2> 1499 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1500 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1501 ret <16 x i32> %res 1502} 1503 1504define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 1505; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2: 1506; GENERIC: # %bb.0: 1507; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] 1508; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1509; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1510; GENERIC-NEXT: retq # sched: [1:1.00] 1511; 1512; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2: 1513; SKX: # %bb.0: 1514; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] 1515; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1516; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1517; SKX-NEXT: retq # sched: [7:1.00] 1518 %vec = load <16 x i32>, <16 x i32>* %vp 1519 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2> 1520 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1521 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1522 ret <16 x i32> %res 1523} 1524 1525define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 1526; GENERIC-LABEL: test_16xi32_perm_mem_mask3: 1527; GENERIC: # %bb.0: 1528; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] 1529; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 1530; GENERIC-NEXT: retq # sched: [1:1.00] 1531; 1532; SKX-LABEL: test_16xi32_perm_mem_mask3: 1533; SKX: # %bb.0: 1534; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] 1535; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 1536; SKX-NEXT: retq # sched: [7:1.00] 1537 %vec = load <16 x i32>, <16 x i32>* %vp 1538 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1539 ret <16 x i32> %res 1540} 1541define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1542; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3: 1543; GENERIC: # %bb.0: 1544; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] 1545; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1546; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1547; GENERIC-NEXT: retq # sched: [1:1.00] 1548; 1549; SKX-LABEL: test_masked_16xi32_perm_mem_mask3: 1550; SKX: # %bb.0: 1551; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] 1552; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1553; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1554; SKX-NEXT: retq # sched: [7:1.00] 1555 %vec = load <16 x i32>, <16 x i32>* %vp 1556 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1557 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1558 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1559 ret <16 x i32> %res 1560} 1561 1562define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 1563; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3: 1564; GENERIC: # %bb.0: 1565; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] 1566; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1567; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1568; GENERIC-NEXT: retq # sched: [1:1.00] 1569; 1570; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3: 1571; SKX: # %bb.0: 1572; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] 1573; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1574; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1575; SKX-NEXT: retq # sched: [7:1.00] 1576 %vec = load <16 x i32>, <16 x i32>* %vp 1577 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1578 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1579 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1580 ret <16 x i32> %res 1581} 1582 1583define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) { 1584; GENERIC-LABEL: test_4xi64_perm_mask0: 1585; GENERIC: # %bb.0: 1586; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [1:1.00] 1587; GENERIC-NEXT: retq # sched: [1:1.00] 1588; 1589; SKX-LABEL: test_4xi64_perm_mask0: 1590; SKX: # %bb.0: 1591; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [3:1.00] 1592; SKX-NEXT: retq # sched: [7:1.00] 1593 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1594 ret <4 x i64> %res 1595} 1596define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1597; GENERIC-LABEL: test_masked_4xi64_perm_mask0: 1598; GENERIC: # %bb.0: 1599; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1600; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00] 1601; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1602; GENERIC-NEXT: retq # sched: [1:1.00] 1603; 1604; SKX-LABEL: test_masked_4xi64_perm_mask0: 1605; SKX: # %bb.0: 1606; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1607; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [3:1.00] 1608; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1609; SKX-NEXT: retq # sched: [7:1.00] 1610 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1611 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1612 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1613 ret <4 x i64> %res 1614} 1615 1616define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) { 1617; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0: 1618; GENERIC: # %bb.0: 1619; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1620; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00] 1621; GENERIC-NEXT: retq # sched: [1:1.00] 1622; 1623; SKX-LABEL: test_masked_z_4xi64_perm_mask0: 1624; SKX: # %bb.0: 1625; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1626; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [3:1.00] 1627; SKX-NEXT: retq # sched: [7:1.00] 1628 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1629 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1630 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1631 ret <4 x i64> %res 1632} 1633define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1634; GENERIC-LABEL: test_masked_4xi64_perm_mask1: 1635; GENERIC: # %bb.0: 1636; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1637; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00] 1638; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1639; GENERIC-NEXT: retq # sched: [1:1.00] 1640; 1641; SKX-LABEL: test_masked_4xi64_perm_mask1: 1642; SKX: # %bb.0: 1643; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1644; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [3:1.00] 1645; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1646; SKX-NEXT: retq # sched: [7:1.00] 1647 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3> 1648 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1649 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1650 ret <4 x i64> %res 1651} 1652 1653define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) { 1654; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1: 1655; GENERIC: # %bb.0: 1656; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1657; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00] 1658; GENERIC-NEXT: retq # sched: [1:1.00] 1659; 1660; SKX-LABEL: test_masked_z_4xi64_perm_mask1: 1661; SKX: # %bb.0: 1662; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1663; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [3:1.00] 1664; SKX-NEXT: retq # sched: [7:1.00] 1665 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3> 1666 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1667 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1668 ret <4 x i64> %res 1669} 1670define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1671; GENERIC-LABEL: test_masked_4xi64_perm_mask2: 1672; GENERIC: # %bb.0: 1673; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1674; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00] 1675; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1676; GENERIC-NEXT: retq # sched: [1:1.00] 1677; 1678; SKX-LABEL: test_masked_4xi64_perm_mask2: 1679; SKX: # %bb.0: 1680; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1681; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [3:1.00] 1682; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1683; SKX-NEXT: retq # sched: [7:1.00] 1684 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1> 1685 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1686 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1687 ret <4 x i64> %res 1688} 1689 1690define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) { 1691; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2: 1692; GENERIC: # %bb.0: 1693; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1694; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00] 1695; GENERIC-NEXT: retq # sched: [1:1.00] 1696; 1697; SKX-LABEL: test_masked_z_4xi64_perm_mask2: 1698; SKX: # %bb.0: 1699; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1700; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [3:1.00] 1701; SKX-NEXT: retq # sched: [7:1.00] 1702 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1> 1703 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1704 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1705 ret <4 x i64> %res 1706} 1707define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) { 1708; GENERIC-LABEL: test_4xi64_perm_mask3: 1709; GENERIC: # %bb.0: 1710; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [1:1.00] 1711; GENERIC-NEXT: retq # sched: [1:1.00] 1712; 1713; SKX-LABEL: test_4xi64_perm_mask3: 1714; SKX: # %bb.0: 1715; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [3:1.00] 1716; SKX-NEXT: retq # sched: [7:1.00] 1717 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1718 ret <4 x i64> %res 1719} 1720define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1721; GENERIC-LABEL: test_masked_4xi64_perm_mask3: 1722; GENERIC: # %bb.0: 1723; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1724; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00] 1725; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1726; GENERIC-NEXT: retq # sched: [1:1.00] 1727; 1728; SKX-LABEL: test_masked_4xi64_perm_mask3: 1729; SKX: # %bb.0: 1730; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1731; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [3:1.00] 1732; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1733; SKX-NEXT: retq # sched: [7:1.00] 1734 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1735 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1736 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1737 ret <4 x i64> %res 1738} 1739 1740define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) { 1741; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3: 1742; GENERIC: # %bb.0: 1743; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1744; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00] 1745; GENERIC-NEXT: retq # sched: [1:1.00] 1746; 1747; SKX-LABEL: test_masked_z_4xi64_perm_mask3: 1748; SKX: # %bb.0: 1749; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1750; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [3:1.00] 1751; SKX-NEXT: retq # sched: [7:1.00] 1752 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1753 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1754 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1755 ret <4 x i64> %res 1756} 1757define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { 1758; GENERIC-LABEL: test_4xi64_perm_mem_mask0: 1759; GENERIC: # %bb.0: 1760; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00] 1761; GENERIC-NEXT: retq # sched: [1:1.00] 1762; 1763; SKX-LABEL: test_4xi64_perm_mem_mask0: 1764; SKX: # %bb.0: 1765; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [10:1.00] 1766; SKX-NEXT: retq # sched: [7:1.00] 1767 %vec = load <4 x i64>, <4 x i64>* %vp 1768 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1769 ret <4 x i64> %res 1770} 1771define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1772; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: 1773; GENERIC: # %bb.0: 1774; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1775; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00] 1776; GENERIC-NEXT: retq # sched: [1:1.00] 1777; 1778; SKX-LABEL: test_masked_4xi64_perm_mem_mask0: 1779; SKX: # %bb.0: 1780; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1781; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [10:1.00] 1782; SKX-NEXT: retq # sched: [7:1.00] 1783 %vec = load <4 x i64>, <4 x i64>* %vp 1784 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1785 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1786 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1787 ret <4 x i64> %res 1788} 1789 1790define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) { 1791; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: 1792; GENERIC: # %bb.0: 1793; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1794; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00] 1795; GENERIC-NEXT: retq # sched: [1:1.00] 1796; 1797; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0: 1798; SKX: # %bb.0: 1799; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1800; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [10:1.00] 1801; SKX-NEXT: retq # sched: [7:1.00] 1802 %vec = load <4 x i64>, <4 x i64>* %vp 1803 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1804 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1805 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1806 ret <4 x i64> %res 1807} 1808 1809define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1810; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: 1811; GENERIC: # %bb.0: 1812; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1813; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00] 1814; GENERIC-NEXT: retq # sched: [1:1.00] 1815; 1816; SKX-LABEL: test_masked_4xi64_perm_mem_mask1: 1817; SKX: # %bb.0: 1818; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1819; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [10:1.00] 1820; SKX-NEXT: retq # sched: [7:1.00] 1821 %vec = load <4 x i64>, <4 x i64>* %vp 1822 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 1823 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1824 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1825 ret <4 x i64> %res 1826} 1827 1828define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) { 1829; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: 1830; GENERIC: # %bb.0: 1831; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1832; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00] 1833; GENERIC-NEXT: retq # sched: [1:1.00] 1834; 1835; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1: 1836; SKX: # %bb.0: 1837; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1838; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [10:1.00] 1839; SKX-NEXT: retq # sched: [7:1.00] 1840 %vec = load <4 x i64>, <4 x i64>* %vp 1841 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 1842 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1843 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1844 ret <4 x i64> %res 1845} 1846 1847define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1848; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: 1849; GENERIC: # %bb.0: 1850; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1851; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [8:1.00] 1852; GENERIC-NEXT: retq # sched: [1:1.00] 1853; 1854; SKX-LABEL: test_masked_4xi64_perm_mem_mask2: 1855; SKX: # %bb.0: 1856; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1857; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [10:1.00] 1858; SKX-NEXT: retq # sched: [7:1.00] 1859 %vec = load <4 x i64>, <4 x i64>* %vp 1860 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 1861 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1862 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1863 ret <4 x i64> %res 1864} 1865 1866define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) { 1867; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: 1868; GENERIC: # %bb.0: 1869; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1870; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [8:1.00] 1871; GENERIC-NEXT: retq # sched: [1:1.00] 1872; 1873; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2: 1874; SKX: # %bb.0: 1875; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1876; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [10:1.00] 1877; SKX-NEXT: retq # sched: [7:1.00] 1878 %vec = load <4 x i64>, <4 x i64>* %vp 1879 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 1880 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1881 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1882 ret <4 x i64> %res 1883} 1884 1885define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { 1886; GENERIC-LABEL: test_4xi64_perm_mem_mask3: 1887; GENERIC: # %bb.0: 1888; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [8:1.00] 1889; GENERIC-NEXT: retq # sched: [1:1.00] 1890; 1891; SKX-LABEL: test_4xi64_perm_mem_mask3: 1892; SKX: # %bb.0: 1893; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [10:1.00] 1894; SKX-NEXT: retq # sched: [7:1.00] 1895 %vec = load <4 x i64>, <4 x i64>* %vp 1896 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1897 ret <4 x i64> %res 1898} 1899define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1900; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: 1901; GENERIC: # %bb.0: 1902; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1903; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [8:1.00] 1904; GENERIC-NEXT: retq # sched: [1:1.00] 1905; 1906; SKX-LABEL: test_masked_4xi64_perm_mem_mask3: 1907; SKX: # %bb.0: 1908; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1909; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [10:1.00] 1910; SKX-NEXT: retq # sched: [7:1.00] 1911 %vec = load <4 x i64>, <4 x i64>* %vp 1912 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1913 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1914 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1915 ret <4 x i64> %res 1916} 1917 1918define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) { 1919; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: 1920; GENERIC: # %bb.0: 1921; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1922; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [8:1.00] 1923; GENERIC-NEXT: retq # sched: [1:1.00] 1924; 1925; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3: 1926; SKX: # %bb.0: 1927; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1928; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [10:1.00] 1929; SKX-NEXT: retq # sched: [7:1.00] 1930 %vec = load <4 x i64>, <4 x i64>* %vp 1931 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1932 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1933 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1934 ret <4 x i64> %res 1935} 1936 1937define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) { 1938; GENERIC-LABEL: test_8xi64_perm_mask0: 1939; GENERIC: # %bb.0: 1940; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [7:0.50] 1941; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 1942; GENERIC-NEXT: retq # sched: [1:1.00] 1943; 1944; SKX-LABEL: test_8xi64_perm_mask0: 1945; SKX: # %bb.0: 1946; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [8:0.50] 1947; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 1948; SKX-NEXT: retq # sched: [7:1.00] 1949 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1950 ret <8 x i64> %res 1951} 1952define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1953; GENERIC-LABEL: test_masked_8xi64_perm_mask0: 1954; GENERIC: # %bb.0: 1955; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [7:0.50] 1956; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 1957; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1958; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1959; GENERIC-NEXT: retq # sched: [1:1.00] 1960; 1961; SKX-LABEL: test_masked_8xi64_perm_mask0: 1962; SKX: # %bb.0: 1963; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [8:0.50] 1964; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 1965; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1966; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1967; SKX-NEXT: retq # sched: [7:1.00] 1968 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1969 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1970 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1971 ret <8 x i64> %res 1972} 1973 1974define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask) { 1975; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0: 1976; GENERIC: # %bb.0: 1977; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [7:0.50] 1978; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 1979; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1980; GENERIC-NEXT: retq # sched: [1:1.00] 1981; 1982; SKX-LABEL: test_masked_z_8xi64_perm_mask0: 1983; SKX: # %bb.0: 1984; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [8:0.50] 1985; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 1986; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1987; SKX-NEXT: retq # sched: [7:1.00] 1988 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1989 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1990 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1991 ret <8 x i64> %res 1992} 1993define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1994; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1: 1995; GENERIC: # %bb.0: 1996; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 1997; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] 1998; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1999; GENERIC-NEXT: retq # sched: [1:1.00] 2000; 2001; SKX-LABEL: test_masked_8xi64_perm_imm_mask1: 2002; SKX: # %bb.0: 2003; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2004; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] 2005; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2006; SKX-NEXT: retq # sched: [7:1.00] 2007 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5> 2008 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2009 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2010 ret <8 x i64> %res 2011} 2012 2013define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { 2014; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1: 2015; GENERIC: # %bb.0: 2016; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2017; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] 2018; GENERIC-NEXT: retq # sched: [1:1.00] 2019; 2020; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask1: 2021; SKX: # %bb.0: 2022; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2023; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] 2024; SKX-NEXT: retq # sched: [7:1.00] 2025 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5> 2026 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2027 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2028 ret <8 x i64> %res 2029} 2030define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2031; GENERIC-LABEL: test_masked_8xi64_perm_mask2: 2032; GENERIC: # %bb.0: 2033; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [7:0.50] 2034; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2035; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 2036; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2037; GENERIC-NEXT: retq # sched: [1:1.00] 2038; 2039; SKX-LABEL: test_masked_8xi64_perm_mask2: 2040; SKX: # %bb.0: 2041; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [8:0.50] 2042; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2043; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 2044; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2045; SKX-NEXT: retq # sched: [7:1.00] 2046 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1> 2047 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2048 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2049 ret <8 x i64> %res 2050} 2051 2052define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask) { 2053; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2: 2054; GENERIC: # %bb.0: 2055; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [7:0.50] 2056; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2057; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 2058; GENERIC-NEXT: retq # sched: [1:1.00] 2059; 2060; SKX-LABEL: test_masked_z_8xi64_perm_mask2: 2061; SKX: # %bb.0: 2062; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [8:0.50] 2063; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2064; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 2065; SKX-NEXT: retq # sched: [7:1.00] 2066 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1> 2067 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2068 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2069 ret <8 x i64> %res 2070} 2071define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { 2072; GENERIC-LABEL: test_8xi64_perm_imm_mask3: 2073; GENERIC: # %bb.0: 2074; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] 2075; GENERIC-NEXT: retq # sched: [1:1.00] 2076; 2077; SKX-LABEL: test_8xi64_perm_imm_mask3: 2078; SKX: # %bb.0: 2079; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] 2080; SKX-NEXT: retq # sched: [7:1.00] 2081 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 2082 ret <8 x i64> %res 2083} 2084define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2085; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3: 2086; GENERIC: # %bb.0: 2087; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2088; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] 2089; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2090; GENERIC-NEXT: retq # sched: [1:1.00] 2091; 2092; SKX-LABEL: test_masked_8xi64_perm_imm_mask3: 2093; SKX: # %bb.0: 2094; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2095; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] 2096; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2097; SKX-NEXT: retq # sched: [7:1.00] 2098 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 2099 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2100 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2101 ret <8 x i64> %res 2102} 2103 2104define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { 2105; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3: 2106; GENERIC: # %bb.0: 2107; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2108; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] 2109; GENERIC-NEXT: retq # sched: [1:1.00] 2110; 2111; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask3: 2112; SKX: # %bb.0: 2113; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2114; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] 2115; SKX-NEXT: retq # sched: [7:1.00] 2116 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 2117 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2118 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2119 ret <8 x i64> %res 2120} 2121define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2122; GENERIC-LABEL: test_masked_8xi64_perm_mask4: 2123; GENERIC: # %bb.0: 2124; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [7:0.50] 2125; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2126; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 2127; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2128; GENERIC-NEXT: retq # sched: [1:1.00] 2129; 2130; SKX-LABEL: test_masked_8xi64_perm_mask4: 2131; SKX: # %bb.0: 2132; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [8:0.50] 2133; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2134; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 2135; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2136; SKX-NEXT: retq # sched: [7:1.00] 2137 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3> 2138 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2139 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2140 ret <8 x i64> %res 2141} 2142 2143define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask) { 2144; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4: 2145; GENERIC: # %bb.0: 2146; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [7:0.50] 2147; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2148; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 2149; GENERIC-NEXT: retq # sched: [1:1.00] 2150; 2151; SKX-LABEL: test_masked_z_8xi64_perm_mask4: 2152; SKX: # %bb.0: 2153; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [8:0.50] 2154; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2155; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 2156; SKX-NEXT: retq # sched: [7:1.00] 2157 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3> 2158 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2159 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2160 ret <8 x i64> %res 2161} 2162define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2163; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5: 2164; GENERIC: # %bb.0: 2165; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2166; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] 2167; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2168; GENERIC-NEXT: retq # sched: [1:1.00] 2169; 2170; SKX-LABEL: test_masked_8xi64_perm_imm_mask5: 2171; SKX: # %bb.0: 2172; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2173; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] 2174; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2175; SKX-NEXT: retq # sched: [7:1.00] 2176 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 2177 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2178 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2179 ret <8 x i64> %res 2180} 2181 2182define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { 2183; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5: 2184; GENERIC: # %bb.0: 2185; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2186; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] 2187; GENERIC-NEXT: retq # sched: [1:1.00] 2188; 2189; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask5: 2190; SKX: # %bb.0: 2191; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2192; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] 2193; SKX-NEXT: retq # sched: [7:1.00] 2194 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 2195 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2196 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2197 ret <8 x i64> %res 2198} 2199define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) { 2200; GENERIC-LABEL: test_8xi64_perm_mask6: 2201; GENERIC: # %bb.0: 2202; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [7:0.50] 2203; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 2204; GENERIC-NEXT: retq # sched: [1:1.00] 2205; 2206; SKX-LABEL: test_8xi64_perm_mask6: 2207; SKX: # %bb.0: 2208; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [8:0.50] 2209; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 2210; SKX-NEXT: retq # sched: [7:1.00] 2211 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 2212 ret <8 x i64> %res 2213} 2214define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2215; GENERIC-LABEL: test_masked_8xi64_perm_mask6: 2216; GENERIC: # %bb.0: 2217; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [7:0.50] 2218; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2219; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 2220; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2221; GENERIC-NEXT: retq # sched: [1:1.00] 2222; 2223; SKX-LABEL: test_masked_8xi64_perm_mask6: 2224; SKX: # %bb.0: 2225; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [8:0.50] 2226; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2227; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 2228; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2229; SKX-NEXT: retq # sched: [7:1.00] 2230 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 2231 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2232 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2233 ret <8 x i64> %res 2234} 2235 2236define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask) { 2237; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6: 2238; GENERIC: # %bb.0: 2239; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [7:0.50] 2240; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2241; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 2242; GENERIC-NEXT: retq # sched: [1:1.00] 2243; 2244; SKX-LABEL: test_masked_z_8xi64_perm_mask6: 2245; SKX: # %bb.0: 2246; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [8:0.50] 2247; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2248; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 2249; SKX-NEXT: retq # sched: [7:1.00] 2250 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 2251 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2252 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2253 ret <8 x i64> %res 2254} 2255define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2256; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7: 2257; GENERIC: # %bb.0: 2258; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2259; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] 2260; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2261; GENERIC-NEXT: retq # sched: [1:1.00] 2262; 2263; SKX-LABEL: test_masked_8xi64_perm_imm_mask7: 2264; SKX: # %bb.0: 2265; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2266; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] 2267; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2268; SKX-NEXT: retq # sched: [7:1.00] 2269 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7> 2270 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2271 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2272 ret <8 x i64> %res 2273} 2274 2275define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { 2276; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7: 2277; GENERIC: # %bb.0: 2278; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2279; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] 2280; GENERIC-NEXT: retq # sched: [1:1.00] 2281; 2282; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask7: 2283; SKX: # %bb.0: 2284; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2285; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] 2286; SKX-NEXT: retq # sched: [7:1.00] 2287 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7> 2288 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2289 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2290 ret <8 x i64> %res 2291} 2292define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { 2293; GENERIC-LABEL: test_8xi64_perm_mem_mask0: 2294; GENERIC: # %bb.0: 2295; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [7:0.50] 2296; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 2297; GENERIC-NEXT: retq # sched: [1:1.00] 2298; 2299; SKX-LABEL: test_8xi64_perm_mem_mask0: 2300; SKX: # %bb.0: 2301; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [8:0.50] 2302; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 2303; SKX-NEXT: retq # sched: [7:1.00] 2304 %vec = load <8 x i64>, <8 x i64>* %vp 2305 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 2306 ret <8 x i64> %res 2307} 2308define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2309; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0: 2310; GENERIC: # %bb.0: 2311; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [7:0.50] 2312; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2313; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2314; GENERIC-NEXT: retq # sched: [1:1.00] 2315; 2316; SKX-LABEL: test_masked_8xi64_perm_mem_mask0: 2317; SKX: # %bb.0: 2318; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [8:0.50] 2319; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2320; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2321; SKX-NEXT: retq # sched: [7:1.00] 2322 %vec = load <8 x i64>, <8 x i64>* %vp 2323 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 2324 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2325 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2326 ret <8 x i64> %res 2327} 2328 2329define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %mask) { 2330; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0: 2331; GENERIC: # %bb.0: 2332; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [7:0.50] 2333; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2334; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2335; GENERIC-NEXT: retq # sched: [1:1.00] 2336; 2337; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0: 2338; SKX: # %bb.0: 2339; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [8:0.50] 2340; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2341; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2342; SKX-NEXT: retq # sched: [7:1.00] 2343 %vec = load <8 x i64>, <8 x i64>* %vp 2344 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 2345 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2346 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2347 ret <8 x i64> %res 2348} 2349 2350define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2351; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: 2352; GENERIC: # %bb.0: 2353; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2354; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] 2355; GENERIC-NEXT: retq # sched: [1:1.00] 2356; 2357; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1: 2358; SKX: # %bb.0: 2359; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2360; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] 2361; SKX-NEXT: retq # sched: [7:1.00] 2362 %vec = load <8 x i64>, <8 x i64>* %vp 2363 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4> 2364 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2365 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2366 ret <8 x i64> %res 2367} 2368 2369define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { 2370; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: 2371; GENERIC: # %bb.0: 2372; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2373; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] 2374; GENERIC-NEXT: retq # sched: [1:1.00] 2375; 2376; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: 2377; SKX: # %bb.0: 2378; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2379; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] 2380; SKX-NEXT: retq # sched: [7:1.00] 2381 %vec = load <8 x i64>, <8 x i64>* %vp 2382 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4> 2383 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2384 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2385 ret <8 x i64> %res 2386} 2387 2388define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2389; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2: 2390; GENERIC: # %bb.0: 2391; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [7:0.50] 2392; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2393; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2394; GENERIC-NEXT: retq # sched: [1:1.00] 2395; 2396; SKX-LABEL: test_masked_8xi64_perm_mem_mask2: 2397; SKX: # %bb.0: 2398; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [8:0.50] 2399; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2400; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2401; SKX-NEXT: retq # sched: [7:1.00] 2402 %vec = load <8 x i64>, <8 x i64>* %vp 2403 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5> 2404 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2405 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2406 ret <8 x i64> %res 2407} 2408 2409define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %mask) { 2410; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2: 2411; GENERIC: # %bb.0: 2412; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [7:0.50] 2413; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2414; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2415; GENERIC-NEXT: retq # sched: [1:1.00] 2416; 2417; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2: 2418; SKX: # %bb.0: 2419; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [8:0.50] 2420; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2421; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2422; SKX-NEXT: retq # sched: [7:1.00] 2423 %vec = load <8 x i64>, <8 x i64>* %vp 2424 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5> 2425 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2426 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2427 ret <8 x i64> %res 2428} 2429 2430define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { 2431; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3: 2432; GENERIC: # %bb.0: 2433; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] 2434; GENERIC-NEXT: retq # sched: [1:1.00] 2435; 2436; SKX-LABEL: test_8xi64_perm_imm_mem_mask3: 2437; SKX: # %bb.0: 2438; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] 2439; SKX-NEXT: retq # sched: [7:1.00] 2440 %vec = load <8 x i64>, <8 x i64>* %vp 2441 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 2442 ret <8 x i64> %res 2443} 2444define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2445; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: 2446; GENERIC: # %bb.0: 2447; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2448; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] 2449; GENERIC-NEXT: retq # sched: [1:1.00] 2450; 2451; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3: 2452; SKX: # %bb.0: 2453; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2454; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] 2455; SKX-NEXT: retq # sched: [7:1.00] 2456 %vec = load <8 x i64>, <8 x i64>* %vp 2457 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 2458 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2459 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2460 ret <8 x i64> %res 2461} 2462 2463define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { 2464; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: 2465; GENERIC: # %bb.0: 2466; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2467; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] 2468; GENERIC-NEXT: retq # sched: [1:1.00] 2469; 2470; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: 2471; SKX: # %bb.0: 2472; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2473; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] 2474; SKX-NEXT: retq # sched: [7:1.00] 2475 %vec = load <8 x i64>, <8 x i64>* %vp 2476 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 2477 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2478 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2479 ret <8 x i64> %res 2480} 2481 2482define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2483; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4: 2484; GENERIC: # %bb.0: 2485; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [7:0.50] 2486; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2487; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2488; GENERIC-NEXT: retq # sched: [1:1.00] 2489; 2490; SKX-LABEL: test_masked_8xi64_perm_mem_mask4: 2491; SKX: # %bb.0: 2492; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [8:0.50] 2493; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2494; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2495; SKX-NEXT: retq # sched: [7:1.00] 2496 %vec = load <8 x i64>, <8 x i64>* %vp 2497 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6> 2498 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2499 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2500 ret <8 x i64> %res 2501} 2502 2503define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %mask) { 2504; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4: 2505; GENERIC: # %bb.0: 2506; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [7:0.50] 2507; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2508; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2509; GENERIC-NEXT: retq # sched: [1:1.00] 2510; 2511; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4: 2512; SKX: # %bb.0: 2513; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [8:0.50] 2514; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2515; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2516; SKX-NEXT: retq # sched: [7:1.00] 2517 %vec = load <8 x i64>, <8 x i64>* %vp 2518 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6> 2519 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2520 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2521 ret <8 x i64> %res 2522} 2523 2524define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2525; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: 2526; GENERIC: # %bb.0: 2527; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2528; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] 2529; GENERIC-NEXT: retq # sched: [1:1.00] 2530; 2531; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5: 2532; SKX: # %bb.0: 2533; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2534; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] 2535; SKX-NEXT: retq # sched: [7:1.00] 2536 %vec = load <8 x i64>, <8 x i64>* %vp 2537 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4> 2538 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2539 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2540 ret <8 x i64> %res 2541} 2542 2543define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { 2544; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: 2545; GENERIC: # %bb.0: 2546; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2547; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] 2548; GENERIC-NEXT: retq # sched: [1:1.00] 2549; 2550; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: 2551; SKX: # %bb.0: 2552; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2553; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] 2554; SKX-NEXT: retq # sched: [7:1.00] 2555 %vec = load <8 x i64>, <8 x i64>* %vp 2556 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4> 2557 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2558 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2559 ret <8 x i64> %res 2560} 2561 2562define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { 2563; GENERIC-LABEL: test_8xi64_perm_mem_mask6: 2564; GENERIC: # %bb.0: 2565; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [7:0.50] 2566; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 2567; GENERIC-NEXT: retq # sched: [1:1.00] 2568; 2569; SKX-LABEL: test_8xi64_perm_mem_mask6: 2570; SKX: # %bb.0: 2571; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [8:0.50] 2572; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 2573; SKX-NEXT: retq # sched: [7:1.00] 2574 %vec = load <8 x i64>, <8 x i64>* %vp 2575 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 2576 ret <8 x i64> %res 2577} 2578define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2579; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6: 2580; GENERIC: # %bb.0: 2581; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [7:0.50] 2582; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2583; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2584; GENERIC-NEXT: retq # sched: [1:1.00] 2585; 2586; SKX-LABEL: test_masked_8xi64_perm_mem_mask6: 2587; SKX: # %bb.0: 2588; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [8:0.50] 2589; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2590; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2591; SKX-NEXT: retq # sched: [7:1.00] 2592 %vec = load <8 x i64>, <8 x i64>* %vp 2593 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 2594 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2595 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2596 ret <8 x i64> %res 2597} 2598 2599define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %mask) { 2600; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6: 2601; GENERIC: # %bb.0: 2602; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [7:0.50] 2603; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2604; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2605; GENERIC-NEXT: retq # sched: [1:1.00] 2606; 2607; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6: 2608; SKX: # %bb.0: 2609; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [8:0.50] 2610; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2611; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2612; SKX-NEXT: retq # sched: [7:1.00] 2613 %vec = load <8 x i64>, <8 x i64>* %vp 2614 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 2615 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2616 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2617 ret <8 x i64> %res 2618} 2619 2620define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2621; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: 2622; GENERIC: # %bb.0: 2623; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2624; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] 2625; GENERIC-NEXT: retq # sched: [1:1.00] 2626; 2627; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7: 2628; SKX: # %bb.0: 2629; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2630; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] 2631; SKX-NEXT: retq # sched: [7:1.00] 2632 %vec = load <8 x i64>, <8 x i64>* %vp 2633 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5> 2634 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2635 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2636 ret <8 x i64> %res 2637} 2638 2639define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { 2640; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: 2641; GENERIC: # %bb.0: 2642; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2643; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] 2644; GENERIC-NEXT: retq # sched: [1:1.00] 2645; 2646; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: 2647; SKX: # %bb.0: 2648; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2649; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] 2650; SKX-NEXT: retq # sched: [7:1.00] 2651 %vec = load <8 x i64>, <8 x i64>* %vp 2652 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5> 2653 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2654 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2655 ret <8 x i64> %res 2656} 2657 2658define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) { 2659; GENERIC-LABEL: test_8xfloat_perm_mask0: 2660; GENERIC: # %bb.0: 2661; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2662; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 2663; GENERIC-NEXT: retq # sched: [1:1.00] 2664; 2665; SKX-LABEL: test_8xfloat_perm_mask0: 2666; SKX: # %bb.0: 2667; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2668; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2669; SKX-NEXT: retq # sched: [7:1.00] 2670 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 2671 ret <8 x float> %res 2672} 2673define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2674; GENERIC-LABEL: test_masked_8xfloat_perm_mask0: 2675; GENERIC: # %bb.0: 2676; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2677; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2678; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2679; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2680; GENERIC-NEXT: retq # sched: [1:1.00] 2681; 2682; SKX-LABEL: test_masked_8xfloat_perm_mask0: 2683; SKX: # %bb.0: 2684; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2685; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2686; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2687; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2688; SKX-NEXT: retq # sched: [7:1.00] 2689 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 2690 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2691 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2692 ret <8 x float> %res 2693} 2694 2695define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x i32> %mask) { 2696; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0: 2697; GENERIC: # %bb.0: 2698; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2699; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2700; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2701; GENERIC-NEXT: retq # sched: [1:1.00] 2702; 2703; SKX-LABEL: test_masked_z_8xfloat_perm_mask0: 2704; SKX: # %bb.0: 2705; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2706; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2707; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2708; SKX-NEXT: retq # sched: [7:1.00] 2709 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 2710 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2711 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2712 ret <8 x float> %res 2713} 2714define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2715; GENERIC-LABEL: test_masked_8xfloat_perm_mask1: 2716; GENERIC: # %bb.0: 2717; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2718; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2719; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2720; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2721; GENERIC-NEXT: retq # sched: [1:1.00] 2722; 2723; SKX-LABEL: test_masked_8xfloat_perm_mask1: 2724; SKX: # %bb.0: 2725; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2726; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2727; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2728; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2729; SKX-NEXT: retq # sched: [7:1.00] 2730 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1> 2731 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2732 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2733 ret <8 x float> %res 2734} 2735 2736define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x i64> %mask) { 2737; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1: 2738; GENERIC: # %bb.0: 2739; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2740; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2741; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2742; GENERIC-NEXT: retq # sched: [1:1.00] 2743; 2744; SKX-LABEL: test_masked_z_8xfloat_perm_mask1: 2745; SKX: # %bb.0: 2746; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2747; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2748; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2749; SKX-NEXT: retq # sched: [7:1.00] 2750 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1> 2751 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2752 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2753 ret <8 x float> %res 2754} 2755define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2756; GENERIC-LABEL: test_masked_8xfloat_perm_mask2: 2757; GENERIC: # %bb.0: 2758; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2759; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2760; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2761; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2762; GENERIC-NEXT: retq # sched: [1:1.00] 2763; 2764; SKX-LABEL: test_masked_8xfloat_perm_mask2: 2765; SKX: # %bb.0: 2766; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2767; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2768; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2769; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2770; SKX-NEXT: retq # sched: [7:1.00] 2771 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5> 2772 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2773 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2774 ret <8 x float> %res 2775} 2776 2777define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x i32> %mask) { 2778; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2: 2779; GENERIC: # %bb.0: 2780; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2781; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2782; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2783; GENERIC-NEXT: retq # sched: [1:1.00] 2784; 2785; SKX-LABEL: test_masked_z_8xfloat_perm_mask2: 2786; SKX: # %bb.0: 2787; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2788; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2789; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2790; SKX-NEXT: retq # sched: [7:1.00] 2791 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5> 2792 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2793 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2794 ret <8 x float> %res 2795} 2796define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) { 2797; GENERIC-LABEL: test_8xfloat_perm_mask3: 2798; GENERIC: # %bb.0: 2799; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2800; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 2801; GENERIC-NEXT: retq # sched: [1:1.00] 2802; 2803; SKX-LABEL: test_8xfloat_perm_mask3: 2804; SKX: # %bb.0: 2805; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2806; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2807; SKX-NEXT: retq # sched: [7:1.00] 2808 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 2809 ret <8 x float> %res 2810} 2811define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2812; GENERIC-LABEL: test_masked_8xfloat_perm_mask3: 2813; GENERIC: # %bb.0: 2814; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2815; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2816; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2817; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2818; GENERIC-NEXT: retq # sched: [1:1.00] 2819; 2820; SKX-LABEL: test_masked_8xfloat_perm_mask3: 2821; SKX: # %bb.0: 2822; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2823; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2824; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2825; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2826; SKX-NEXT: retq # sched: [7:1.00] 2827 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 2828 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2829 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2830 ret <8 x float> %res 2831} 2832 2833define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x i32> %mask) { 2834; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3: 2835; GENERIC: # %bb.0: 2836; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2837; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2838; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2839; GENERIC-NEXT: retq # sched: [1:1.00] 2840; 2841; SKX-LABEL: test_masked_z_8xfloat_perm_mask3: 2842; SKX: # %bb.0: 2843; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2844; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2845; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2846; SKX-NEXT: retq # sched: [7:1.00] 2847 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 2848 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2849 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2850 ret <8 x float> %res 2851} 2852define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { 2853; GENERIC-LABEL: test_8xfloat_perm_mem_mask0: 2854; GENERIC: # %bb.0: 2855; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2856; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2857; GENERIC-NEXT: retq # sched: [1:1.00] 2858; 2859; SKX-LABEL: test_8xfloat_perm_mem_mask0: 2860; SKX: # %bb.0: 2861; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2862; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2863; SKX-NEXT: retq # sched: [7:1.00] 2864 %vec = load <8 x float>, <8 x float>* %vp 2865 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 2866 ret <8 x float> %res 2867} 2868define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 2869; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0: 2870; GENERIC: # %bb.0: 2871; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2872; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2873; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 2874; GENERIC-NEXT: retq # sched: [1:1.00] 2875; 2876; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0: 2877; SKX: # %bb.0: 2878; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2879; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2880; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 2881; SKX-NEXT: retq # sched: [7:1.00] 2882 %vec = load <8 x float>, <8 x float>* %vp 2883 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 2884 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2885 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2886 ret <8 x float> %res 2887} 2888 2889define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x i32> %mask) { 2890; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0: 2891; GENERIC: # %bb.0: 2892; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2893; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 2894; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 2895; GENERIC-NEXT: retq # sched: [1:1.00] 2896; 2897; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0: 2898; SKX: # %bb.0: 2899; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2900; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 2901; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 2902; SKX-NEXT: retq # sched: [7:1.00] 2903 %vec = load <8 x float>, <8 x float>* %vp 2904 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 2905 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2906 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2907 ret <8 x float> %res 2908} 2909 2910define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 2911; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1: 2912; GENERIC: # %bb.0: 2913; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2914; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2915; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 2916; GENERIC-NEXT: retq # sched: [1:1.00] 2917; 2918; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1: 2919; SKX: # %bb.0: 2920; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2921; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2922; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 2923; SKX-NEXT: retq # sched: [7:1.00] 2924 %vec = load <8 x float>, <8 x float>* %vp 2925 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6> 2926 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2927 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2928 ret <8 x float> %res 2929} 2930 2931define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x i32> %mask) { 2932; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1: 2933; GENERIC: # %bb.0: 2934; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2935; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 2936; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 2937; GENERIC-NEXT: retq # sched: [1:1.00] 2938; 2939; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1: 2940; SKX: # %bb.0: 2941; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2942; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 2943; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 2944; SKX-NEXT: retq # sched: [7:1.00] 2945 %vec = load <8 x float>, <8 x float>* %vp 2946 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6> 2947 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2948 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2949 ret <8 x float> %res 2950} 2951 2952define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 2953; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2: 2954; GENERIC: # %bb.0: 2955; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2956; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2957; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 2958; GENERIC-NEXT: retq # sched: [1:1.00] 2959; 2960; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2: 2961; SKX: # %bb.0: 2962; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2963; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2964; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 2965; SKX-NEXT: retq # sched: [7:1.00] 2966 %vec = load <8 x float>, <8 x float>* %vp 2967 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4> 2968 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2969 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2970 ret <8 x float> %res 2971} 2972 2973define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x i32> %mask) { 2974; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2: 2975; GENERIC: # %bb.0: 2976; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2977; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 2978; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 2979; GENERIC-NEXT: retq # sched: [1:1.00] 2980; 2981; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2: 2982; SKX: # %bb.0: 2983; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2984; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 2985; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 2986; SKX-NEXT: retq # sched: [7:1.00] 2987 %vec = load <8 x float>, <8 x float>* %vp 2988 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4> 2989 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2990 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2991 ret <8 x float> %res 2992} 2993 2994define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) { 2995; GENERIC-LABEL: test_8xfloat_perm_mem_mask3: 2996; GENERIC: # %bb.0: 2997; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 2998; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2999; GENERIC-NEXT: retq # sched: [1:1.00] 3000; 3001; SKX-LABEL: test_8xfloat_perm_mem_mask3: 3002; SKX: # %bb.0: 3003; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3004; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 3005; SKX-NEXT: retq # sched: [7:1.00] 3006 %vec = load <8 x float>, <8 x float>* %vp 3007 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 3008 ret <8 x float> %res 3009} 3010define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 3011; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3: 3012; GENERIC: # %bb.0: 3013; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3014; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 3015; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 3016; GENERIC-NEXT: retq # sched: [1:1.00] 3017; 3018; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3: 3019; SKX: # %bb.0: 3020; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3021; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 3022; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 3023; SKX-NEXT: retq # sched: [7:1.00] 3024 %vec = load <8 x float>, <8 x float>* %vp 3025 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 3026 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 3027 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 3028 ret <8 x float> %res 3029} 3030 3031define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) { 3032; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3: 3033; GENERIC: # %bb.0: 3034; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3035; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 3036; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 3037; GENERIC-NEXT: retq # sched: [1:1.00] 3038; 3039; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3: 3040; SKX: # %bb.0: 3041; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3042; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 3043; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 3044; SKX-NEXT: retq # sched: [7:1.00] 3045 %vec = load <8 x float>, <8 x float>* %vp 3046 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 3047 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 3048 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 3049 ret <8 x float> %res 3050} 3051 3052define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) { 3053; GENERIC-LABEL: test_16xfloat_perm_mask0: 3054; GENERIC: # %bb.0: 3055; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] 3056; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 3057; GENERIC-NEXT: retq # sched: [1:1.00] 3058; 3059; SKX-LABEL: test_16xfloat_perm_mask0: 3060; SKX: # %bb.0: 3061; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] 3062; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 3063; SKX-NEXT: retq # sched: [7:1.00] 3064 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 3065 ret <16 x float> %res 3066} 3067define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3068; GENERIC-LABEL: test_masked_16xfloat_perm_mask0: 3069; GENERIC: # %bb.0: 3070; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] 3071; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3072; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3073; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3074; GENERIC-NEXT: retq # sched: [1:1.00] 3075; 3076; SKX-LABEL: test_masked_16xfloat_perm_mask0: 3077; SKX: # %bb.0: 3078; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] 3079; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3080; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3081; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3082; SKX-NEXT: retq # sched: [7:1.00] 3083 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 3084 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3085 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3086 ret <16 x float> %res 3087} 3088 3089define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x i32> %mask) { 3090; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0: 3091; GENERIC: # %bb.0: 3092; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] 3093; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3094; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3095; GENERIC-NEXT: retq # sched: [1:1.00] 3096; 3097; SKX-LABEL: test_masked_z_16xfloat_perm_mask0: 3098; SKX: # %bb.0: 3099; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] 3100; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3101; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3102; SKX-NEXT: retq # sched: [7:1.00] 3103 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 3104 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3105 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3106 ret <16 x float> %res 3107} 3108define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3109; GENERIC-LABEL: test_masked_16xfloat_perm_mask1: 3110; GENERIC: # %bb.0: 3111; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] 3112; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3113; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3114; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3115; GENERIC-NEXT: retq # sched: [1:1.00] 3116; 3117; SKX-LABEL: test_masked_16xfloat_perm_mask1: 3118; SKX: # %bb.0: 3119; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] 3120; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3121; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3122; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3123; SKX-NEXT: retq # sched: [7:1.00] 3124 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1> 3125 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3126 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3127 ret <16 x float> %res 3128} 3129 3130define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x i32> %mask) { 3131; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1: 3132; GENERIC: # %bb.0: 3133; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] 3134; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3135; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3136; GENERIC-NEXT: retq # sched: [1:1.00] 3137; 3138; SKX-LABEL: test_masked_z_16xfloat_perm_mask1: 3139; SKX: # %bb.0: 3140; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] 3141; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3142; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3143; SKX-NEXT: retq # sched: [7:1.00] 3144 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1> 3145 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3146 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3147 ret <16 x float> %res 3148} 3149define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3150; GENERIC-LABEL: test_masked_16xfloat_perm_mask2: 3151; GENERIC: # %bb.0: 3152; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] 3153; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3154; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3155; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3156; GENERIC-NEXT: retq # sched: [1:1.00] 3157; 3158; SKX-LABEL: test_masked_16xfloat_perm_mask2: 3159; SKX: # %bb.0: 3160; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] 3161; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3162; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3163; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3164; SKX-NEXT: retq # sched: [7:1.00] 3165 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11> 3166 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3167 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3168 ret <16 x float> %res 3169} 3170 3171define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x i32> %mask) { 3172; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2: 3173; GENERIC: # %bb.0: 3174; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] 3175; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3176; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3177; GENERIC-NEXT: retq # sched: [1:1.00] 3178; 3179; SKX-LABEL: test_masked_z_16xfloat_perm_mask2: 3180; SKX: # %bb.0: 3181; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] 3182; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3183; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3184; SKX-NEXT: retq # sched: [7:1.00] 3185 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11> 3186 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3187 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3188 ret <16 x float> %res 3189} 3190define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) { 3191; GENERIC-LABEL: test_16xfloat_perm_mask3: 3192; GENERIC: # %bb.0: 3193; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] 3194; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 3195; GENERIC-NEXT: retq # sched: [1:1.00] 3196; 3197; SKX-LABEL: test_16xfloat_perm_mask3: 3198; SKX: # %bb.0: 3199; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] 3200; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 3201; SKX-NEXT: retq # sched: [7:1.00] 3202 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 3203 ret <16 x float> %res 3204} 3205define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3206; GENERIC-LABEL: test_masked_16xfloat_perm_mask3: 3207; GENERIC: # %bb.0: 3208; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] 3209; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3210; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3211; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3212; GENERIC-NEXT: retq # sched: [1:1.00] 3213; 3214; SKX-LABEL: test_masked_16xfloat_perm_mask3: 3215; SKX: # %bb.0: 3216; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] 3217; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3218; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3219; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3220; SKX-NEXT: retq # sched: [7:1.00] 3221 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 3222 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3223 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3224 ret <16 x float> %res 3225} 3226 3227define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x i32> %mask) { 3228; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3: 3229; GENERIC: # %bb.0: 3230; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] 3231; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3232; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3233; GENERIC-NEXT: retq # sched: [1:1.00] 3234; 3235; SKX-LABEL: test_masked_z_16xfloat_perm_mask3: 3236; SKX: # %bb.0: 3237; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] 3238; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3239; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3240; SKX-NEXT: retq # sched: [7:1.00] 3241 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 3242 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3243 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3244 ret <16 x float> %res 3245} 3246define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { 3247; GENERIC-LABEL: test_16xfloat_perm_mem_mask0: 3248; GENERIC: # %bb.0: 3249; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] 3250; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 3251; GENERIC-NEXT: retq # sched: [1:1.00] 3252; 3253; SKX-LABEL: test_16xfloat_perm_mem_mask0: 3254; SKX: # %bb.0: 3255; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] 3256; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 3257; SKX-NEXT: retq # sched: [7:1.00] 3258 %vec = load <16 x float>, <16 x float>* %vp 3259 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 3260 ret <16 x float> %res 3261} 3262define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3263; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0: 3264; GENERIC: # %bb.0: 3265; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] 3266; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3267; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3268; GENERIC-NEXT: retq # sched: [1:1.00] 3269; 3270; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0: 3271; SKX: # %bb.0: 3272; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] 3273; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3274; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3275; SKX-NEXT: retq # sched: [7:1.00] 3276 %vec = load <16 x float>, <16 x float>* %vp 3277 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 3278 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3279 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3280 ret <16 x float> %res 3281} 3282 3283define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x i32> %mask) { 3284; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0: 3285; GENERIC: # %bb.0: 3286; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] 3287; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3288; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3289; GENERIC-NEXT: retq # sched: [1:1.00] 3290; 3291; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0: 3292; SKX: # %bb.0: 3293; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] 3294; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3295; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3296; SKX-NEXT: retq # sched: [7:1.00] 3297 %vec = load <16 x float>, <16 x float>* %vp 3298 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 3299 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3300 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3301 ret <16 x float> %res 3302} 3303 3304define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3305; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1: 3306; GENERIC: # %bb.0: 3307; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] 3308; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3309; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3310; GENERIC-NEXT: retq # sched: [1:1.00] 3311; 3312; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1: 3313; SKX: # %bb.0: 3314; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] 3315; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3316; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3317; SKX-NEXT: retq # sched: [7:1.00] 3318 %vec = load <16 x float>, <16 x float>* %vp 3319 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4> 3320 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3321 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3322 ret <16 x float> %res 3323} 3324 3325define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x i32> %mask) { 3326; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1: 3327; GENERIC: # %bb.0: 3328; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] 3329; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3330; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3331; GENERIC-NEXT: retq # sched: [1:1.00] 3332; 3333; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1: 3334; SKX: # %bb.0: 3335; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] 3336; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3337; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3338; SKX-NEXT: retq # sched: [7:1.00] 3339 %vec = load <16 x float>, <16 x float>* %vp 3340 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4> 3341 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3342 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3343 ret <16 x float> %res 3344} 3345 3346define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3347; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2: 3348; GENERIC: # %bb.0: 3349; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] 3350; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3351; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3352; GENERIC-NEXT: retq # sched: [1:1.00] 3353; 3354; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2: 3355; SKX: # %bb.0: 3356; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] 3357; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3358; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3359; SKX-NEXT: retq # sched: [7:1.00] 3360 %vec = load <16 x float>, <16 x float>* %vp 3361 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5> 3362 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3363 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3364 ret <16 x float> %res 3365} 3366 3367define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x i32> %mask) { 3368; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2: 3369; GENERIC: # %bb.0: 3370; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] 3371; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3372; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3373; GENERIC-NEXT: retq # sched: [1:1.00] 3374; 3375; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2: 3376; SKX: # %bb.0: 3377; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] 3378; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3379; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3380; SKX-NEXT: retq # sched: [7:1.00] 3381 %vec = load <16 x float>, <16 x float>* %vp 3382 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5> 3383 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3384 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3385 ret <16 x float> %res 3386} 3387 3388define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { 3389; GENERIC-LABEL: test_16xfloat_perm_mem_mask3: 3390; GENERIC: # %bb.0: 3391; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] 3392; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 3393; GENERIC-NEXT: retq # sched: [1:1.00] 3394; 3395; SKX-LABEL: test_16xfloat_perm_mem_mask3: 3396; SKX: # %bb.0: 3397; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] 3398; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 3399; SKX-NEXT: retq # sched: [7:1.00] 3400 %vec = load <16 x float>, <16 x float>* %vp 3401 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 3402 ret <16 x float> %res 3403} 3404define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3405; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3: 3406; GENERIC: # %bb.0: 3407; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] 3408; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3409; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3410; GENERIC-NEXT: retq # sched: [1:1.00] 3411; 3412; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3: 3413; SKX: # %bb.0: 3414; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] 3415; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3416; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3417; SKX-NEXT: retq # sched: [7:1.00] 3418 %vec = load <16 x float>, <16 x float>* %vp 3419 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 3420 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3421 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3422 ret <16 x float> %res 3423} 3424 3425define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x i32> %mask) { 3426; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3: 3427; GENERIC: # %bb.0: 3428; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] 3429; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3430; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3431; GENERIC-NEXT: retq # sched: [1:1.00] 3432; 3433; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3: 3434; SKX: # %bb.0: 3435; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] 3436; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3437; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3438; SKX-NEXT: retq # sched: [7:1.00] 3439 %vec = load <16 x float>, <16 x float>* %vp 3440 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 3441 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3442 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3443 ret <16 x float> %res 3444} 3445 3446define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { 3447; GENERIC-LABEL: test_4xdouble_perm_mask0: 3448; GENERIC: # %bb.0: 3449; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [1:1.00] 3450; GENERIC-NEXT: retq # sched: [1:1.00] 3451; 3452; SKX-LABEL: test_4xdouble_perm_mask0: 3453; SKX: # %bb.0: 3454; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [3:1.00] 3455; SKX-NEXT: retq # sched: [7:1.00] 3456 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 3457 ret <4 x double> %res 3458} 3459define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3460; GENERIC-LABEL: test_masked_4xdouble_perm_mask0: 3461; GENERIC: # %bb.0: 3462; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3463; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00] 3464; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3465; GENERIC-NEXT: retq # sched: [1:1.00] 3466; 3467; SKX-LABEL: test_masked_4xdouble_perm_mask0: 3468; SKX: # %bb.0: 3469; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3470; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [3:1.00] 3471; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3472; SKX-NEXT: retq # sched: [7:1.00] 3473 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 3474 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3475 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3476 ret <4 x double> %res 3477} 3478 3479define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) { 3480; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0: 3481; GENERIC: # %bb.0: 3482; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3483; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00] 3484; GENERIC-NEXT: retq # sched: [1:1.00] 3485; 3486; SKX-LABEL: test_masked_z_4xdouble_perm_mask0: 3487; SKX: # %bb.0: 3488; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3489; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [3:1.00] 3490; SKX-NEXT: retq # sched: [7:1.00] 3491 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 3492 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3493 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3494 ret <4 x double> %res 3495} 3496define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3497; GENERIC-LABEL: test_masked_4xdouble_perm_mask1: 3498; GENERIC: # %bb.0: 3499; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3500; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00] 3501; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3502; GENERIC-NEXT: retq # sched: [1:1.00] 3503; 3504; SKX-LABEL: test_masked_4xdouble_perm_mask1: 3505; SKX: # %bb.0: 3506; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3507; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [3:1.00] 3508; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3509; SKX-NEXT: retq # sched: [7:1.00] 3510 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 3511 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3512 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3513 ret <4 x double> %res 3514} 3515 3516define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) { 3517; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1: 3518; GENERIC: # %bb.0: 3519; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3520; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00] 3521; GENERIC-NEXT: retq # sched: [1:1.00] 3522; 3523; SKX-LABEL: test_masked_z_4xdouble_perm_mask1: 3524; SKX: # %bb.0: 3525; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3526; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [3:1.00] 3527; SKX-NEXT: retq # sched: [7:1.00] 3528 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 3529 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3530 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3531 ret <4 x double> %res 3532} 3533define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3534; GENERIC-LABEL: test_masked_4xdouble_perm_mask2: 3535; GENERIC: # %bb.0: 3536; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3537; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00] 3538; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3539; GENERIC-NEXT: retq # sched: [1:1.00] 3540; 3541; SKX-LABEL: test_masked_4xdouble_perm_mask2: 3542; SKX: # %bb.0: 3543; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3544; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [3:1.00] 3545; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3546; SKX-NEXT: retq # sched: [7:1.00] 3547 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1> 3548 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3549 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3550 ret <4 x double> %res 3551} 3552 3553define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) { 3554; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2: 3555; GENERIC: # %bb.0: 3556; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3557; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00] 3558; GENERIC-NEXT: retq # sched: [1:1.00] 3559; 3560; SKX-LABEL: test_masked_z_4xdouble_perm_mask2: 3561; SKX: # %bb.0: 3562; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3563; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [3:1.00] 3564; SKX-NEXT: retq # sched: [7:1.00] 3565 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1> 3566 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3567 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3568 ret <4 x double> %res 3569} 3570define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { 3571; GENERIC-LABEL: test_4xdouble_perm_mask3: 3572; GENERIC: # %bb.0: 3573; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [1:1.00] 3574; GENERIC-NEXT: retq # sched: [1:1.00] 3575; 3576; SKX-LABEL: test_4xdouble_perm_mask3: 3577; SKX: # %bb.0: 3578; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [3:1.00] 3579; SKX-NEXT: retq # sched: [7:1.00] 3580 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 3581 ret <4 x double> %res 3582} 3583define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3584; GENERIC-LABEL: test_masked_4xdouble_perm_mask3: 3585; GENERIC: # %bb.0: 3586; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3587; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00] 3588; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3589; GENERIC-NEXT: retq # sched: [1:1.00] 3590; 3591; SKX-LABEL: test_masked_4xdouble_perm_mask3: 3592; SKX: # %bb.0: 3593; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3594; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [3:1.00] 3595; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3596; SKX-NEXT: retq # sched: [7:1.00] 3597 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 3598 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3599 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3600 ret <4 x double> %res 3601} 3602 3603define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) { 3604; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3: 3605; GENERIC: # %bb.0: 3606; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3607; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00] 3608; GENERIC-NEXT: retq # sched: [1:1.00] 3609; 3610; SKX-LABEL: test_masked_z_4xdouble_perm_mask3: 3611; SKX: # %bb.0: 3612; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3613; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [3:1.00] 3614; SKX-NEXT: retq # sched: [7:1.00] 3615 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 3616 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3617 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3618 ret <4 x double> %res 3619} 3620define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { 3621; GENERIC-LABEL: test_4xdouble_perm_mem_mask0: 3622; GENERIC: # %bb.0: 3623; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [8:1.00] 3624; GENERIC-NEXT: retq # sched: [1:1.00] 3625; 3626; SKX-LABEL: test_4xdouble_perm_mem_mask0: 3627; SKX: # %bb.0: 3628; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [10:1.00] 3629; SKX-NEXT: retq # sched: [7:1.00] 3630 %vec = load <4 x double>, <4 x double>* %vp 3631 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 3632 ret <4 x double> %res 3633} 3634define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3635; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: 3636; GENERIC: # %bb.0: 3637; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3638; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [8:1.00] 3639; GENERIC-NEXT: retq # sched: [1:1.00] 3640; 3641; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0: 3642; SKX: # %bb.0: 3643; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3644; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [10:1.00] 3645; SKX-NEXT: retq # sched: [7:1.00] 3646 %vec = load <4 x double>, <4 x double>* %vp 3647 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 3648 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3649 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3650 ret <4 x double> %res 3651} 3652 3653define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) { 3654; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: 3655; GENERIC: # %bb.0: 3656; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3657; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [8:1.00] 3658; GENERIC-NEXT: retq # sched: [1:1.00] 3659; 3660; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0: 3661; SKX: # %bb.0: 3662; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3663; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [10:1.00] 3664; SKX-NEXT: retq # sched: [7:1.00] 3665 %vec = load <4 x double>, <4 x double>* %vp 3666 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 3667 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3668 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3669 ret <4 x double> %res 3670} 3671 3672define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3673; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: 3674; GENERIC: # %bb.0: 3675; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3676; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [8:1.00] 3677; GENERIC-NEXT: retq # sched: [1:1.00] 3678; 3679; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1: 3680; SKX: # %bb.0: 3681; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3682; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [10:1.00] 3683; SKX-NEXT: retq # sched: [7:1.00] 3684 %vec = load <4 x double>, <4 x double>* %vp 3685 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2> 3686 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3687 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3688 ret <4 x double> %res 3689} 3690 3691define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) { 3692; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: 3693; GENERIC: # %bb.0: 3694; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3695; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [8:1.00] 3696; GENERIC-NEXT: retq # sched: [1:1.00] 3697; 3698; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1: 3699; SKX: # %bb.0: 3700; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3701; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [10:1.00] 3702; SKX-NEXT: retq # sched: [7:1.00] 3703 %vec = load <4 x double>, <4 x double>* %vp 3704 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2> 3705 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3706 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3707 ret <4 x double> %res 3708} 3709 3710define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3711; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: 3712; GENERIC: # %bb.0: 3713; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3714; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [8:1.00] 3715; GENERIC-NEXT: retq # sched: [1:1.00] 3716; 3717; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2: 3718; SKX: # %bb.0: 3719; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3720; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [10:1.00] 3721; SKX-NEXT: retq # sched: [7:1.00] 3722 %vec = load <4 x double>, <4 x double>* %vp 3723 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 3724 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3725 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3726 ret <4 x double> %res 3727} 3728 3729define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) { 3730; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: 3731; GENERIC: # %bb.0: 3732; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3733; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [8:1.00] 3734; GENERIC-NEXT: retq # sched: [1:1.00] 3735; 3736; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2: 3737; SKX: # %bb.0: 3738; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3739; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [10:1.00] 3740; SKX-NEXT: retq # sched: [7:1.00] 3741 %vec = load <4 x double>, <4 x double>* %vp 3742 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 3743 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3744 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3745 ret <4 x double> %res 3746} 3747 3748define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { 3749; GENERIC-LABEL: test_4xdouble_perm_mem_mask3: 3750; GENERIC: # %bb.0: 3751; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [8:1.00] 3752; GENERIC-NEXT: retq # sched: [1:1.00] 3753; 3754; SKX-LABEL: test_4xdouble_perm_mem_mask3: 3755; SKX: # %bb.0: 3756; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [10:1.00] 3757; SKX-NEXT: retq # sched: [7:1.00] 3758 %vec = load <4 x double>, <4 x double>* %vp 3759 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 3760 ret <4 x double> %res 3761} 3762define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3763; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: 3764; GENERIC: # %bb.0: 3765; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3766; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [8:1.00] 3767; GENERIC-NEXT: retq # sched: [1:1.00] 3768; 3769; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3: 3770; SKX: # %bb.0: 3771; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3772; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [10:1.00] 3773; SKX-NEXT: retq # sched: [7:1.00] 3774 %vec = load <4 x double>, <4 x double>* %vp 3775 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 3776 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3777 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3778 ret <4 x double> %res 3779} 3780 3781define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) { 3782; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: 3783; GENERIC: # %bb.0: 3784; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3785; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [8:1.00] 3786; GENERIC-NEXT: retq # sched: [1:1.00] 3787; 3788; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3: 3789; SKX: # %bb.0: 3790; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3791; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [10:1.00] 3792; SKX-NEXT: retq # sched: [7:1.00] 3793 %vec = load <4 x double>, <4 x double>* %vp 3794 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 3795 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3796 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3797 ret <4 x double> %res 3798} 3799 3800define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) { 3801; GENERIC-LABEL: test_8xdouble_perm_mask0: 3802; GENERIC: # %bb.0: 3803; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [7:0.50] 3804; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 3805; GENERIC-NEXT: retq # sched: [1:1.00] 3806; 3807; SKX-LABEL: test_8xdouble_perm_mask0: 3808; SKX: # %bb.0: 3809; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [8:0.50] 3810; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 3811; SKX-NEXT: retq # sched: [7:1.00] 3812 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 3813 ret <8 x double> %res 3814} 3815define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3816; GENERIC-LABEL: test_masked_8xdouble_perm_mask0: 3817; GENERIC: # %bb.0: 3818; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [7:0.50] 3819; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3820; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3821; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3822; GENERIC-NEXT: retq # sched: [1:1.00] 3823; 3824; SKX-LABEL: test_masked_8xdouble_perm_mask0: 3825; SKX: # %bb.0: 3826; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [8:0.50] 3827; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3828; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3829; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3830; SKX-NEXT: retq # sched: [7:1.00] 3831 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 3832 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3833 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3834 ret <8 x double> %res 3835} 3836 3837define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i64> %mask) { 3838; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0: 3839; GENERIC: # %bb.0: 3840; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [7:0.50] 3841; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3842; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3843; GENERIC-NEXT: retq # sched: [1:1.00] 3844; 3845; SKX-LABEL: test_masked_z_8xdouble_perm_mask0: 3846; SKX: # %bb.0: 3847; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [8:0.50] 3848; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3849; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3850; SKX-NEXT: retq # sched: [7:1.00] 3851 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 3852 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3853 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3854 ret <8 x double> %res 3855} 3856define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3857; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1: 3858; GENERIC: # %bb.0: 3859; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3860; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] 3861; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3862; GENERIC-NEXT: retq # sched: [1:1.00] 3863; 3864; SKX-LABEL: test_masked_8xdouble_perm_imm_mask1: 3865; SKX: # %bb.0: 3866; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3867; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] 3868; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3869; SKX-NEXT: retq # sched: [7:1.00] 3870 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6> 3871 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3872 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3873 ret <8 x double> %res 3874} 3875 3876define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) { 3877; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1: 3878; GENERIC: # %bb.0: 3879; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3880; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] 3881; GENERIC-NEXT: retq # sched: [1:1.00] 3882; 3883; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask1: 3884; SKX: # %bb.0: 3885; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3886; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] 3887; SKX-NEXT: retq # sched: [7:1.00] 3888 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6> 3889 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3890 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3891 ret <8 x double> %res 3892} 3893define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3894; GENERIC-LABEL: test_masked_8xdouble_perm_mask2: 3895; GENERIC: # %bb.0: 3896; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [7:0.50] 3897; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3898; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3899; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3900; GENERIC-NEXT: retq # sched: [1:1.00] 3901; 3902; SKX-LABEL: test_masked_8xdouble_perm_mask2: 3903; SKX: # %bb.0: 3904; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [8:0.50] 3905; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3906; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3907; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3908; SKX-NEXT: retq # sched: [7:1.00] 3909 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7> 3910 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3911 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3912 ret <8 x double> %res 3913} 3914 3915define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x i64> %mask) { 3916; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2: 3917; GENERIC: # %bb.0: 3918; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [7:0.50] 3919; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3920; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3921; GENERIC-NEXT: retq # sched: [1:1.00] 3922; 3923; SKX-LABEL: test_masked_z_8xdouble_perm_mask2: 3924; SKX: # %bb.0: 3925; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [8:0.50] 3926; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3927; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3928; SKX-NEXT: retq # sched: [7:1.00] 3929 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7> 3930 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3931 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3932 ret <8 x double> %res 3933} 3934define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { 3935; GENERIC-LABEL: test_8xdouble_perm_imm_mask3: 3936; GENERIC: # %bb.0: 3937; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] 3938; GENERIC-NEXT: retq # sched: [1:1.00] 3939; 3940; SKX-LABEL: test_8xdouble_perm_imm_mask3: 3941; SKX: # %bb.0: 3942; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] 3943; SKX-NEXT: retq # sched: [7:1.00] 3944 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 3945 ret <8 x double> %res 3946} 3947define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3948; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3: 3949; GENERIC: # %bb.0: 3950; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3951; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] 3952; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3953; GENERIC-NEXT: retq # sched: [1:1.00] 3954; 3955; SKX-LABEL: test_masked_8xdouble_perm_imm_mask3: 3956; SKX: # %bb.0: 3957; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3958; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] 3959; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3960; SKX-NEXT: retq # sched: [7:1.00] 3961 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 3962 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3963 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3964 ret <8 x double> %res 3965} 3966 3967define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) { 3968; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3: 3969; GENERIC: # %bb.0: 3970; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3971; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] 3972; GENERIC-NEXT: retq # sched: [1:1.00] 3973; 3974; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask3: 3975; SKX: # %bb.0: 3976; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3977; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] 3978; SKX-NEXT: retq # sched: [7:1.00] 3979 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 3980 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3981 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3982 ret <8 x double> %res 3983} 3984define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3985; GENERIC-LABEL: test_masked_8xdouble_perm_mask4: 3986; GENERIC: # %bb.0: 3987; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [7:0.50] 3988; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3989; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3990; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3991; GENERIC-NEXT: retq # sched: [1:1.00] 3992; 3993; SKX-LABEL: test_masked_8xdouble_perm_mask4: 3994; SKX: # %bb.0: 3995; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [8:0.50] 3996; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3997; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3998; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3999; SKX-NEXT: retq # sched: [7:1.00] 4000 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1> 4001 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4002 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4003 ret <8 x double> %res 4004} 4005 4006define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i64> %mask) { 4007; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4: 4008; GENERIC: # %bb.0: 4009; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [7:0.50] 4010; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4011; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 4012; GENERIC-NEXT: retq # sched: [1:1.00] 4013; 4014; SKX-LABEL: test_masked_z_8xdouble_perm_mask4: 4015; SKX: # %bb.0: 4016; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [8:0.50] 4017; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4018; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 4019; SKX-NEXT: retq # sched: [7:1.00] 4020 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1> 4021 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4022 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4023 ret <8 x double> %res 4024} 4025define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 4026; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5: 4027; GENERIC: # %bb.0: 4028; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 4029; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] 4030; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 4031; GENERIC-NEXT: retq # sched: [1:1.00] 4032; 4033; SKX-LABEL: test_masked_8xdouble_perm_imm_mask5: 4034; SKX: # %bb.0: 4035; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 4036; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] 4037; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 4038; SKX-NEXT: retq # sched: [7:1.00] 4039 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7> 4040 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4041 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4042 ret <8 x double> %res 4043} 4044 4045define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) { 4046; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5: 4047; GENERIC: # %bb.0: 4048; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4049; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] 4050; GENERIC-NEXT: retq # sched: [1:1.00] 4051; 4052; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask5: 4053; SKX: # %bb.0: 4054; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4055; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] 4056; SKX-NEXT: retq # sched: [7:1.00] 4057 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7> 4058 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4059 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4060 ret <8 x double> %res 4061} 4062define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) { 4063; GENERIC-LABEL: test_8xdouble_perm_mask6: 4064; GENERIC: # %bb.0: 4065; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [7:0.50] 4066; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 4067; GENERIC-NEXT: retq # sched: [1:1.00] 4068; 4069; SKX-LABEL: test_8xdouble_perm_mask6: 4070; SKX: # %bb.0: 4071; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [8:0.50] 4072; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 4073; SKX-NEXT: retq # sched: [7:1.00] 4074 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 4075 ret <8 x double> %res 4076} 4077define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 4078; GENERIC-LABEL: test_masked_8xdouble_perm_mask6: 4079; GENERIC: # %bb.0: 4080; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [7:0.50] 4081; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 4082; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 4083; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 4084; GENERIC-NEXT: retq # sched: [1:1.00] 4085; 4086; SKX-LABEL: test_masked_8xdouble_perm_mask6: 4087; SKX: # %bb.0: 4088; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [8:0.50] 4089; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 4090; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 4091; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 4092; SKX-NEXT: retq # sched: [7:1.00] 4093 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 4094 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4095 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4096 ret <8 x double> %res 4097} 4098 4099define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i64> %mask) { 4100; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6: 4101; GENERIC: # %bb.0: 4102; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [7:0.50] 4103; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4104; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 4105; GENERIC-NEXT: retq # sched: [1:1.00] 4106; 4107; SKX-LABEL: test_masked_z_8xdouble_perm_mask6: 4108; SKX: # %bb.0: 4109; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [8:0.50] 4110; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4111; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 4112; SKX-NEXT: retq # sched: [7:1.00] 4113 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 4114 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4115 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4116 ret <8 x double> %res 4117} 4118define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 4119; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7: 4120; GENERIC: # %bb.0: 4121; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 4122; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] 4123; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 4124; GENERIC-NEXT: retq # sched: [1:1.00] 4125; 4126; SKX-LABEL: test_masked_8xdouble_perm_imm_mask7: 4127; SKX: # %bb.0: 4128; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 4129; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] 4130; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 4131; SKX-NEXT: retq # sched: [7:1.00] 4132 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6> 4133 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4134 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4135 ret <8 x double> %res 4136} 4137 4138define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) { 4139; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7: 4140; GENERIC: # %bb.0: 4141; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4142; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] 4143; GENERIC-NEXT: retq # sched: [1:1.00] 4144; 4145; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask7: 4146; SKX: # %bb.0: 4147; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4148; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] 4149; SKX-NEXT: retq # sched: [7:1.00] 4150 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6> 4151 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4152 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4153 ret <8 x double> %res 4154} 4155define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { 4156; GENERIC-LABEL: test_8xdouble_perm_mem_mask0: 4157; GENERIC: # %bb.0: 4158; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [7:0.50] 4159; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 4160; GENERIC-NEXT: retq # sched: [1:1.00] 4161; 4162; SKX-LABEL: test_8xdouble_perm_mem_mask0: 4163; SKX: # %bb.0: 4164; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [8:0.50] 4165; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 4166; SKX-NEXT: retq # sched: [7:1.00] 4167 %vec = load <8 x double>, <8 x double>* %vp 4168 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 4169 ret <8 x double> %res 4170} 4171define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4172; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0: 4173; GENERIC: # %bb.0: 4174; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [7:0.50] 4175; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4176; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4177; GENERIC-NEXT: retq # sched: [1:1.00] 4178; 4179; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0: 4180; SKX: # %bb.0: 4181; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [8:0.50] 4182; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4183; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4184; SKX-NEXT: retq # sched: [7:1.00] 4185 %vec = load <8 x double>, <8 x double>* %vp 4186 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 4187 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4188 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4189 ret <8 x double> %res 4190} 4191 4192define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x i64> %mask) { 4193; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0: 4194; GENERIC: # %bb.0: 4195; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [7:0.50] 4196; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4197; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4198; GENERIC-NEXT: retq # sched: [1:1.00] 4199; 4200; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0: 4201; SKX: # %bb.0: 4202; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [8:0.50] 4203; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4204; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4205; SKX-NEXT: retq # sched: [7:1.00] 4206 %vec = load <8 x double>, <8 x double>* %vp 4207 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 4208 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4209 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4210 ret <8 x double> %res 4211} 4212 4213define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4214; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: 4215; GENERIC: # %bb.0: 4216; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4217; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] 4218; GENERIC-NEXT: retq # sched: [1:1.00] 4219; 4220; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: 4221; SKX: # %bb.0: 4222; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4223; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] 4224; SKX-NEXT: retq # sched: [7:1.00] 4225 %vec = load <8 x double>, <8 x double>* %vp 4226 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7> 4227 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4228 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4229 ret <8 x double> %res 4230} 4231 4232define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) { 4233; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: 4234; GENERIC: # %bb.0: 4235; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4236; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] 4237; GENERIC-NEXT: retq # sched: [1:1.00] 4238; 4239; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: 4240; SKX: # %bb.0: 4241; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4242; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] 4243; SKX-NEXT: retq # sched: [7:1.00] 4244 %vec = load <8 x double>, <8 x double>* %vp 4245 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7> 4246 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4247 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4248 ret <8 x double> %res 4249} 4250 4251define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4252; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2: 4253; GENERIC: # %bb.0: 4254; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [7:0.50] 4255; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4256; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4257; GENERIC-NEXT: retq # sched: [1:1.00] 4258; 4259; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2: 4260; SKX: # %bb.0: 4261; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [8:0.50] 4262; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4263; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4264; SKX-NEXT: retq # sched: [7:1.00] 4265 %vec = load <8 x double>, <8 x double>* %vp 4266 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5> 4267 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4268 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4269 ret <8 x double> %res 4270} 4271 4272define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x i64> %mask) { 4273; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2: 4274; GENERIC: # %bb.0: 4275; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [7:0.50] 4276; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4277; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4278; GENERIC-NEXT: retq # sched: [1:1.00] 4279; 4280; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2: 4281; SKX: # %bb.0: 4282; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [8:0.50] 4283; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4284; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4285; SKX-NEXT: retq # sched: [7:1.00] 4286 %vec = load <8 x double>, <8 x double>* %vp 4287 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5> 4288 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4289 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4290 ret <8 x double> %res 4291} 4292 4293define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { 4294; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3: 4295; GENERIC: # %bb.0: 4296; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] 4297; GENERIC-NEXT: retq # sched: [1:1.00] 4298; 4299; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3: 4300; SKX: # %bb.0: 4301; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] 4302; SKX-NEXT: retq # sched: [7:1.00] 4303 %vec = load <8 x double>, <8 x double>* %vp 4304 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 4305 ret <8 x double> %res 4306} 4307define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4308; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: 4309; GENERIC: # %bb.0: 4310; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4311; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] 4312; GENERIC-NEXT: retq # sched: [1:1.00] 4313; 4314; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: 4315; SKX: # %bb.0: 4316; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4317; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] 4318; SKX-NEXT: retq # sched: [7:1.00] 4319 %vec = load <8 x double>, <8 x double>* %vp 4320 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 4321 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4322 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4323 ret <8 x double> %res 4324} 4325 4326define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) { 4327; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: 4328; GENERIC: # %bb.0: 4329; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4330; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] 4331; GENERIC-NEXT: retq # sched: [1:1.00] 4332; 4333; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: 4334; SKX: # %bb.0: 4335; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4336; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] 4337; SKX-NEXT: retq # sched: [7:1.00] 4338 %vec = load <8 x double>, <8 x double>* %vp 4339 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 4340 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4341 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4342 ret <8 x double> %res 4343} 4344 4345define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4346; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4: 4347; GENERIC: # %bb.0: 4348; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [7:0.50] 4349; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4350; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4351; GENERIC-NEXT: retq # sched: [1:1.00] 4352; 4353; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4: 4354; SKX: # %bb.0: 4355; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [8:0.50] 4356; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4357; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4358; SKX-NEXT: retq # sched: [7:1.00] 4359 %vec = load <8 x double>, <8 x double>* %vp 4360 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0> 4361 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4362 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4363 ret <8 x double> %res 4364} 4365 4366define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x i64> %mask) { 4367; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4: 4368; GENERIC: # %bb.0: 4369; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [7:0.50] 4370; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4371; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4372; GENERIC-NEXT: retq # sched: [1:1.00] 4373; 4374; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4: 4375; SKX: # %bb.0: 4376; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [8:0.50] 4377; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4378; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4379; SKX-NEXT: retq # sched: [7:1.00] 4380 %vec = load <8 x double>, <8 x double>* %vp 4381 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0> 4382 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4383 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4384 ret <8 x double> %res 4385} 4386 4387define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4388; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: 4389; GENERIC: # %bb.0: 4390; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4391; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] 4392; GENERIC-NEXT: retq # sched: [1:1.00] 4393; 4394; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: 4395; SKX: # %bb.0: 4396; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4397; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] 4398; SKX-NEXT: retq # sched: [7:1.00] 4399 %vec = load <8 x double>, <8 x double>* %vp 4400 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7> 4401 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4402 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4403 ret <8 x double> %res 4404} 4405 4406define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) { 4407; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: 4408; GENERIC: # %bb.0: 4409; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4410; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] 4411; GENERIC-NEXT: retq # sched: [1:1.00] 4412; 4413; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: 4414; SKX: # %bb.0: 4415; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4416; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] 4417; SKX-NEXT: retq # sched: [7:1.00] 4418 %vec = load <8 x double>, <8 x double>* %vp 4419 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7> 4420 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4421 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4422 ret <8 x double> %res 4423} 4424 4425define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { 4426; GENERIC-LABEL: test_8xdouble_perm_mem_mask6: 4427; GENERIC: # %bb.0: 4428; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [7:0.50] 4429; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 4430; GENERIC-NEXT: retq # sched: [1:1.00] 4431; 4432; SKX-LABEL: test_8xdouble_perm_mem_mask6: 4433; SKX: # %bb.0: 4434; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [8:0.50] 4435; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 4436; SKX-NEXT: retq # sched: [7:1.00] 4437 %vec = load <8 x double>, <8 x double>* %vp 4438 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 4439 ret <8 x double> %res 4440} 4441define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4442; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6: 4443; GENERIC: # %bb.0: 4444; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [7:0.50] 4445; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4446; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4447; GENERIC-NEXT: retq # sched: [1:1.00] 4448; 4449; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6: 4450; SKX: # %bb.0: 4451; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [8:0.50] 4452; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4453; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4454; SKX-NEXT: retq # sched: [7:1.00] 4455 %vec = load <8 x double>, <8 x double>* %vp 4456 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 4457 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4458 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4459 ret <8 x double> %res 4460} 4461 4462define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x i64> %mask) { 4463; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6: 4464; GENERIC: # %bb.0: 4465; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [7:0.50] 4466; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4467; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4468; GENERIC-NEXT: retq # sched: [1:1.00] 4469; 4470; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6: 4471; SKX: # %bb.0: 4472; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [8:0.50] 4473; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4474; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4475; SKX-NEXT: retq # sched: [7:1.00] 4476 %vec = load <8 x double>, <8 x double>* %vp 4477 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 4478 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4479 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4480 ret <8 x double> %res 4481} 4482 4483define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4484; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: 4485; GENERIC: # %bb.0: 4486; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4487; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 4488; GENERIC-NEXT: retq # sched: [1:1.00] 4489; 4490; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: 4491; SKX: # %bb.0: 4492; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4493; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] 4494; SKX-NEXT: retq # sched: [7:1.00] 4495 %vec = load <8 x double>, <8 x double>* %vp 4496 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 4497 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4498 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4499 ret <8 x double> %res 4500} 4501 4502define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) { 4503; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: 4504; GENERIC: # %bb.0: 4505; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4506; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 4507; GENERIC-NEXT: retq # sched: [1:1.00] 4508; 4509; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: 4510; SKX: # %bb.0: 4511; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4512; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] 4513; SKX-NEXT: retq # sched: [7:1.00] 4514 %vec = load <8 x double>, <8 x double>* %vp 4515 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 4516 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4517 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4518 ret <8 x double> %res 4519} 4520 4521define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { 4522; GENERIC-LABEL: test_16xi8_perm_mask0: 4523; GENERIC: # %bb.0: 4524; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 4525; GENERIC-NEXT: retq # sched: [1:1.00] 4526; 4527; SKX-LABEL: test_16xi8_perm_mask0: 4528; SKX: # %bb.0: 4529; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] 4530; SKX-NEXT: retq # sched: [7:1.00] 4531 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 4532 ret <16 x i8> %res 4533} 4534define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4535; GENERIC-LABEL: test_masked_16xi8_perm_mask0: 4536; GENERIC: # %bb.0: 4537; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4538; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 4539; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4540; GENERIC-NEXT: retq # sched: [1:1.00] 4541; 4542; SKX-LABEL: test_masked_16xi8_perm_mask0: 4543; SKX: # %bb.0: 4544; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4545; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] 4546; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4547; SKX-NEXT: retq # sched: [7:1.00] 4548 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 4549 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4550 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4551 ret <16 x i8> %res 4552} 4553 4554define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { 4555; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: 4556; GENERIC: # %bb.0: 4557; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4558; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 4559; GENERIC-NEXT: retq # sched: [1:1.00] 4560; 4561; SKX-LABEL: test_masked_z_16xi8_perm_mask0: 4562; SKX: # %bb.0: 4563; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4564; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] 4565; SKX-NEXT: retq # sched: [7:1.00] 4566 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 4567 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4568 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4569 ret <16 x i8> %res 4570} 4571define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4572; GENERIC-LABEL: test_masked_16xi8_perm_mask1: 4573; GENERIC: # %bb.0: 4574; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4575; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 4576; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4577; GENERIC-NEXT: retq # sched: [1:1.00] 4578; 4579; SKX-LABEL: test_masked_16xi8_perm_mask1: 4580; SKX: # %bb.0: 4581; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4582; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] 4583; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4584; SKX-NEXT: retq # sched: [7:1.00] 4585 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 4586 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4587 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4588 ret <16 x i8> %res 4589} 4590 4591define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { 4592; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: 4593; GENERIC: # %bb.0: 4594; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4595; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 4596; GENERIC-NEXT: retq # sched: [1:1.00] 4597; 4598; SKX-LABEL: test_masked_z_16xi8_perm_mask1: 4599; SKX: # %bb.0: 4600; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4601; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] 4602; SKX-NEXT: retq # sched: [7:1.00] 4603 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 4604 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4605 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4606 ret <16 x i8> %res 4607} 4608define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4609; GENERIC-LABEL: test_masked_16xi8_perm_mask2: 4610; GENERIC: # %bb.0: 4611; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4612; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 4613; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4614; GENERIC-NEXT: retq # sched: [1:1.00] 4615; 4616; SKX-LABEL: test_masked_16xi8_perm_mask2: 4617; SKX: # %bb.0: 4618; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4619; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] 4620; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4621; SKX-NEXT: retq # sched: [7:1.00] 4622 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 4623 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4624 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4625 ret <16 x i8> %res 4626} 4627 4628define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { 4629; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: 4630; GENERIC: # %bb.0: 4631; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4632; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 4633; GENERIC-NEXT: retq # sched: [1:1.00] 4634; 4635; SKX-LABEL: test_masked_z_16xi8_perm_mask2: 4636; SKX: # %bb.0: 4637; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4638; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] 4639; SKX-NEXT: retq # sched: [7:1.00] 4640 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 4641 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4642 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4643 ret <16 x i8> %res 4644} 4645define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { 4646; GENERIC-LABEL: test_16xi8_perm_mask3: 4647; GENERIC: # %bb.0: 4648; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 4649; GENERIC-NEXT: retq # sched: [1:1.00] 4650; 4651; SKX-LABEL: test_16xi8_perm_mask3: 4652; SKX: # %bb.0: 4653; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] 4654; SKX-NEXT: retq # sched: [7:1.00] 4655 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 4656 ret <16 x i8> %res 4657} 4658define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4659; GENERIC-LABEL: test_masked_16xi8_perm_mask3: 4660; GENERIC: # %bb.0: 4661; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4662; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 4663; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4664; GENERIC-NEXT: retq # sched: [1:1.00] 4665; 4666; SKX-LABEL: test_masked_16xi8_perm_mask3: 4667; SKX: # %bb.0: 4668; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4669; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] 4670; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4671; SKX-NEXT: retq # sched: [7:1.00] 4672 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 4673 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4674 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4675 ret <16 x i8> %res 4676} 4677 4678define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { 4679; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: 4680; GENERIC: # %bb.0: 4681; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4682; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 4683; GENERIC-NEXT: retq # sched: [1:1.00] 4684; 4685; SKX-LABEL: test_masked_z_16xi8_perm_mask3: 4686; SKX: # %bb.0: 4687; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4688; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] 4689; SKX-NEXT: retq # sched: [7:1.00] 4690 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 4691 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4692 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4693 ret <16 x i8> %res 4694} 4695define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { 4696; GENERIC-LABEL: test_16xi8_perm_mem_mask0: 4697; GENERIC: # %bb.0: 4698; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4699; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 4700; GENERIC-NEXT: retq # sched: [1:1.00] 4701; 4702; SKX-LABEL: test_16xi8_perm_mem_mask0: 4703; SKX: # %bb.0: 4704; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4705; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] 4706; SKX-NEXT: retq # sched: [7:1.00] 4707 %vec = load <16 x i8>, <16 x i8>* %vp 4708 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 4709 ret <16 x i8> %res 4710} 4711define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4712; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0: 4713; GENERIC: # %bb.0: 4714; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4715; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4716; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 4717; GENERIC-NEXT: retq # sched: [1:1.00] 4718; 4719; SKX-LABEL: test_masked_16xi8_perm_mem_mask0: 4720; SKX: # %bb.0: 4721; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4722; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4723; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] 4724; SKX-NEXT: retq # sched: [7:1.00] 4725 %vec = load <16 x i8>, <16 x i8>* %vp 4726 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 4727 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4728 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4729 ret <16 x i8> %res 4730} 4731 4732define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) { 4733; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0: 4734; GENERIC: # %bb.0: 4735; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4736; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4737; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 4738; GENERIC-NEXT: retq # sched: [1:1.00] 4739; 4740; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0: 4741; SKX: # %bb.0: 4742; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4743; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4744; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] 4745; SKX-NEXT: retq # sched: [7:1.00] 4746 %vec = load <16 x i8>, <16 x i8>* %vp 4747 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 4748 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4749 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4750 ret <16 x i8> %res 4751} 4752 4753define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4754; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1: 4755; GENERIC: # %bb.0: 4756; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4757; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4758; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 4759; GENERIC-NEXT: retq # sched: [1:1.00] 4760; 4761; SKX-LABEL: test_masked_16xi8_perm_mem_mask1: 4762; SKX: # %bb.0: 4763; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4764; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4765; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] 4766; SKX-NEXT: retq # sched: [7:1.00] 4767 %vec = load <16 x i8>, <16 x i8>* %vp 4768 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 4769 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4770 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4771 ret <16 x i8> %res 4772} 4773 4774define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) { 4775; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1: 4776; GENERIC: # %bb.0: 4777; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4778; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4779; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 4780; GENERIC-NEXT: retq # sched: [1:1.00] 4781; 4782; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1: 4783; SKX: # %bb.0: 4784; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4785; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4786; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] 4787; SKX-NEXT: retq # sched: [7:1.00] 4788 %vec = load <16 x i8>, <16 x i8>* %vp 4789 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 4790 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4791 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4792 ret <16 x i8> %res 4793} 4794 4795define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4796; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2: 4797; GENERIC: # %bb.0: 4798; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4799; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4800; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 4801; GENERIC-NEXT: retq # sched: [1:1.00] 4802; 4803; SKX-LABEL: test_masked_16xi8_perm_mem_mask2: 4804; SKX: # %bb.0: 4805; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4806; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4807; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] 4808; SKX-NEXT: retq # sched: [7:1.00] 4809 %vec = load <16 x i8>, <16 x i8>* %vp 4810 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 4811 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4812 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4813 ret <16 x i8> %res 4814} 4815 4816define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) { 4817; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2: 4818; GENERIC: # %bb.0: 4819; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4820; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4821; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 4822; GENERIC-NEXT: retq # sched: [1:1.00] 4823; 4824; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2: 4825; SKX: # %bb.0: 4826; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4827; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4828; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] 4829; SKX-NEXT: retq # sched: [7:1.00] 4830 %vec = load <16 x i8>, <16 x i8>* %vp 4831 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 4832 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4833 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4834 ret <16 x i8> %res 4835} 4836 4837define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { 4838; GENERIC-LABEL: test_16xi8_perm_mem_mask3: 4839; GENERIC: # %bb.0: 4840; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4841; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 4842; GENERIC-NEXT: retq # sched: [1:1.00] 4843; 4844; SKX-LABEL: test_16xi8_perm_mem_mask3: 4845; SKX: # %bb.0: 4846; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4847; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] 4848; SKX-NEXT: retq # sched: [7:1.00] 4849 %vec = load <16 x i8>, <16 x i8>* %vp 4850 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 4851 ret <16 x i8> %res 4852} 4853define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4854; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3: 4855; GENERIC: # %bb.0: 4856; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4857; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4858; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 4859; GENERIC-NEXT: retq # sched: [1:1.00] 4860; 4861; SKX-LABEL: test_masked_16xi8_perm_mem_mask3: 4862; SKX: # %bb.0: 4863; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4864; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4865; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] 4866; SKX-NEXT: retq # sched: [7:1.00] 4867 %vec = load <16 x i8>, <16 x i8>* %vp 4868 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 4869 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4870 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4871 ret <16 x i8> %res 4872} 4873 4874define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) { 4875; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3: 4876; GENERIC: # %bb.0: 4877; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4878; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4879; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 4880; GENERIC-NEXT: retq # sched: [1:1.00] 4881; 4882; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3: 4883; SKX: # %bb.0: 4884; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4885; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4886; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] 4887; SKX-NEXT: retq # sched: [7:1.00] 4888 %vec = load <16 x i8>, <16 x i8>* %vp 4889 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 4890 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4891 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4892 ret <16 x i8> %res 4893} 4894 4895define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { 4896; GENERIC-LABEL: test_32xi8_perm_mask0: 4897; GENERIC: # %bb.0: 4898; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] 4899; GENERIC-NEXT: retq # sched: [1:1.00] 4900; 4901; SKX-LABEL: test_32xi8_perm_mask0: 4902; SKX: # %bb.0: 4903; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] 4904; SKX-NEXT: retq # sched: [7:1.00] 4905 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 4906 ret <32 x i8> %res 4907} 4908define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 4909; GENERIC-LABEL: test_masked_32xi8_perm_mask0: 4910; GENERIC: # %bb.0: 4911; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 4912; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] 4913; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 4914; GENERIC-NEXT: retq # sched: [1:1.00] 4915; 4916; SKX-LABEL: test_masked_32xi8_perm_mask0: 4917; SKX: # %bb.0: 4918; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 4919; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] 4920; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 4921; SKX-NEXT: retq # sched: [7:1.00] 4922 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 4923 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4924 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 4925 ret <32 x i8> %res 4926} 4927 4928define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { 4929; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: 4930; GENERIC: # %bb.0: 4931; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 4932; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] 4933; GENERIC-NEXT: retq # sched: [1:1.00] 4934; 4935; SKX-LABEL: test_masked_z_32xi8_perm_mask0: 4936; SKX: # %bb.0: 4937; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 4938; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] 4939; SKX-NEXT: retq # sched: [7:1.00] 4940 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 4941 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4942 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 4943 ret <32 x i8> %res 4944} 4945define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 4946; GENERIC-LABEL: test_masked_32xi8_perm_mask1: 4947; GENERIC: # %bb.0: 4948; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 4949; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50] 4950; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 4951; GENERIC-NEXT: retq # sched: [1:1.00] 4952; 4953; SKX-LABEL: test_masked_32xi8_perm_mask1: 4954; SKX: # %bb.0: 4955; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 4956; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] 4957; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 4958; SKX-NEXT: retq # sched: [7:1.00] 4959 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 4960 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4961 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 4962 ret <32 x i8> %res 4963} 4964 4965define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { 4966; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: 4967; GENERIC: # %bb.0: 4968; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 4969; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50] 4970; GENERIC-NEXT: retq # sched: [1:1.00] 4971; 4972; SKX-LABEL: test_masked_z_32xi8_perm_mask1: 4973; SKX: # %bb.0: 4974; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 4975; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] 4976; SKX-NEXT: retq # sched: [7:1.00] 4977 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 4978 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4979 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 4980 ret <32 x i8> %res 4981} 4982define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 4983; GENERIC-LABEL: test_masked_32xi8_perm_mask2: 4984; GENERIC: # %bb.0: 4985; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 4986; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50] 4987; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 4988; GENERIC-NEXT: retq # sched: [1:1.00] 4989; 4990; SKX-LABEL: test_masked_32xi8_perm_mask2: 4991; SKX: # %bb.0: 4992; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 4993; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] 4994; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 4995; SKX-NEXT: retq # sched: [7:1.00] 4996 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 4997 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4998 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 4999 ret <32 x i8> %res 5000} 5001 5002define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { 5003; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: 5004; GENERIC: # %bb.0: 5005; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5006; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50] 5007; GENERIC-NEXT: retq # sched: [1:1.00] 5008; 5009; SKX-LABEL: test_masked_z_32xi8_perm_mask2: 5010; SKX: # %bb.0: 5011; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5012; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] 5013; SKX-NEXT: retq # sched: [7:1.00] 5014 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 5015 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5016 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5017 ret <32 x i8> %res 5018} 5019define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { 5020; GENERIC-LABEL: test_32xi8_perm_mask3: 5021; GENERIC: # %bb.0: 5022; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] 5023; GENERIC-NEXT: retq # sched: [1:1.00] 5024; 5025; SKX-LABEL: test_32xi8_perm_mask3: 5026; SKX: # %bb.0: 5027; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] 5028; SKX-NEXT: retq # sched: [7:1.00] 5029 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 5030 ret <32 x i8> %res 5031} 5032define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 5033; GENERIC-LABEL: test_masked_32xi8_perm_mask3: 5034; GENERIC: # %bb.0: 5035; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 5036; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] 5037; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 5038; GENERIC-NEXT: retq # sched: [1:1.00] 5039; 5040; SKX-LABEL: test_masked_32xi8_perm_mask3: 5041; SKX: # %bb.0: 5042; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 5043; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] 5044; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 5045; SKX-NEXT: retq # sched: [7:1.00] 5046 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 5047 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5048 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5049 ret <32 x i8> %res 5050} 5051 5052define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { 5053; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: 5054; GENERIC: # %bb.0: 5055; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5056; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] 5057; GENERIC-NEXT: retq # sched: [1:1.00] 5058; 5059; SKX-LABEL: test_masked_z_32xi8_perm_mask3: 5060; SKX: # %bb.0: 5061; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5062; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] 5063; SKX-NEXT: retq # sched: [7:1.00] 5064 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 5065 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5066 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5067 ret <32 x i8> %res 5068} 5069define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { 5070; GENERIC-LABEL: test_32xi8_perm_mem_mask0: 5071; GENERIC: # %bb.0: 5072; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5073; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] 5074; GENERIC-NEXT: retq # sched: [1:1.00] 5075; 5076; SKX-LABEL: test_32xi8_perm_mem_mask0: 5077; SKX: # %bb.0: 5078; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5079; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] 5080; SKX-NEXT: retq # sched: [7:1.00] 5081 %vec = load <32 x i8>, <32 x i8>* %vp 5082 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 5083 ret <32 x i8> %res 5084} 5085define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5086; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0: 5087; GENERIC: # %bb.0: 5088; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5089; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5090; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] 5091; GENERIC-NEXT: retq # sched: [1:1.00] 5092; 5093; SKX-LABEL: test_masked_32xi8_perm_mem_mask0: 5094; SKX: # %bb.0: 5095; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5096; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5097; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] 5098; SKX-NEXT: retq # sched: [7:1.00] 5099 %vec = load <32 x i8>, <32 x i8>* %vp 5100 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 5101 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5102 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5103 ret <32 x i8> %res 5104} 5105 5106define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) { 5107; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0: 5108; GENERIC: # %bb.0: 5109; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5110; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5111; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] 5112; GENERIC-NEXT: retq # sched: [1:1.00] 5113; 5114; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0: 5115; SKX: # %bb.0: 5116; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5117; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5118; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] 5119; SKX-NEXT: retq # sched: [7:1.00] 5120 %vec = load <32 x i8>, <32 x i8>* %vp 5121 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 5122 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5123 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5124 ret <32 x i8> %res 5125} 5126 5127define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5128; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1: 5129; GENERIC: # %bb.0: 5130; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5131; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5132; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50] 5133; GENERIC-NEXT: retq # sched: [1:1.00] 5134; 5135; SKX-LABEL: test_masked_32xi8_perm_mem_mask1: 5136; SKX: # %bb.0: 5137; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5138; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5139; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] 5140; SKX-NEXT: retq # sched: [7:1.00] 5141 %vec = load <32 x i8>, <32 x i8>* %vp 5142 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 5143 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5144 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5145 ret <32 x i8> %res 5146} 5147 5148define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) { 5149; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1: 5150; GENERIC: # %bb.0: 5151; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5152; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5153; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50] 5154; GENERIC-NEXT: retq # sched: [1:1.00] 5155; 5156; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1: 5157; SKX: # %bb.0: 5158; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5159; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5160; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] 5161; SKX-NEXT: retq # sched: [7:1.00] 5162 %vec = load <32 x i8>, <32 x i8>* %vp 5163 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 5164 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5165 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5166 ret <32 x i8> %res 5167} 5168 5169define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5170; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2: 5171; GENERIC: # %bb.0: 5172; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5173; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5174; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50] 5175; GENERIC-NEXT: retq # sched: [1:1.00] 5176; 5177; SKX-LABEL: test_masked_32xi8_perm_mem_mask2: 5178; SKX: # %bb.0: 5179; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5180; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5181; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] 5182; SKX-NEXT: retq # sched: [7:1.00] 5183 %vec = load <32 x i8>, <32 x i8>* %vp 5184 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 5185 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5186 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5187 ret <32 x i8> %res 5188} 5189 5190define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) { 5191; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2: 5192; GENERIC: # %bb.0: 5193; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5194; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5195; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50] 5196; GENERIC-NEXT: retq # sched: [1:1.00] 5197; 5198; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2: 5199; SKX: # %bb.0: 5200; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5201; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5202; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] 5203; SKX-NEXT: retq # sched: [7:1.00] 5204 %vec = load <32 x i8>, <32 x i8>* %vp 5205 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 5206 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5207 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5208 ret <32 x i8> %res 5209} 5210 5211define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { 5212; GENERIC-LABEL: test_32xi8_perm_mem_mask3: 5213; GENERIC: # %bb.0: 5214; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5215; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] 5216; GENERIC-NEXT: retq # sched: [1:1.00] 5217; 5218; SKX-LABEL: test_32xi8_perm_mem_mask3: 5219; SKX: # %bb.0: 5220; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5221; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] 5222; SKX-NEXT: retq # sched: [7:1.00] 5223 %vec = load <32 x i8>, <32 x i8>* %vp 5224 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 5225 ret <32 x i8> %res 5226} 5227define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5228; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3: 5229; GENERIC: # %bb.0: 5230; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5231; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5232; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] 5233; GENERIC-NEXT: retq # sched: [1:1.00] 5234; 5235; SKX-LABEL: test_masked_32xi8_perm_mem_mask3: 5236; SKX: # %bb.0: 5237; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5238; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5239; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] 5240; SKX-NEXT: retq # sched: [7:1.00] 5241 %vec = load <32 x i8>, <32 x i8>* %vp 5242 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 5243 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5244 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5245 ret <32 x i8> %res 5246} 5247 5248define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) { 5249; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3: 5250; GENERIC: # %bb.0: 5251; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5252; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5253; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] 5254; GENERIC-NEXT: retq # sched: [1:1.00] 5255; 5256; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3: 5257; SKX: # %bb.0: 5258; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5259; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5260; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] 5261; SKX-NEXT: retq # sched: [7:1.00] 5262 %vec = load <32 x i8>, <32 x i8>* %vp 5263 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 5264 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5265 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5266 ret <32 x i8> %res 5267} 5268 5269define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { 5270; GENERIC-LABEL: test_64xi8_perm_mask0: 5271; GENERIC: # %bb.0: 5272; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] 5273; GENERIC-NEXT: retq # sched: [1:1.00] 5274; 5275; SKX-LABEL: test_64xi8_perm_mask0: 5276; SKX: # %bb.0: 5277; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] 5278; SKX-NEXT: retq # sched: [7:1.00] 5279 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 5280 ret <64 x i8> %res 5281} 5282define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5283; GENERIC-LABEL: test_masked_64xi8_perm_mask0: 5284; GENERIC: # %bb.0: 5285; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5286; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] 5287; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5288; GENERIC-NEXT: retq # sched: [1:1.00] 5289; 5290; SKX-LABEL: test_masked_64xi8_perm_mask0: 5291; SKX: # %bb.0: 5292; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5293; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] 5294; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5295; SKX-NEXT: retq # sched: [7:1.00] 5296 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 5297 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5298 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5299 ret <64 x i8> %res 5300} 5301 5302define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { 5303; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: 5304; GENERIC: # %bb.0: 5305; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5306; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] 5307; GENERIC-NEXT: retq # sched: [1:1.00] 5308; 5309; SKX-LABEL: test_masked_z_64xi8_perm_mask0: 5310; SKX: # %bb.0: 5311; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5312; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] 5313; SKX-NEXT: retq # sched: [7:1.00] 5314 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 5315 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5316 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5317 ret <64 x i8> %res 5318} 5319define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5320; GENERIC-LABEL: test_masked_64xi8_perm_mask1: 5321; GENERIC: # %bb.0: 5322; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5323; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] 5324; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5325; GENERIC-NEXT: retq # sched: [1:1.00] 5326; 5327; SKX-LABEL: test_masked_64xi8_perm_mask1: 5328; SKX: # %bb.0: 5329; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5330; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] 5331; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5332; SKX-NEXT: retq # sched: [7:1.00] 5333 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 5334 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5335 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5336 ret <64 x i8> %res 5337} 5338 5339define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { 5340; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: 5341; GENERIC: # %bb.0: 5342; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5343; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] 5344; GENERIC-NEXT: retq # sched: [1:1.00] 5345; 5346; SKX-LABEL: test_masked_z_64xi8_perm_mask1: 5347; SKX: # %bb.0: 5348; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5349; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] 5350; SKX-NEXT: retq # sched: [7:1.00] 5351 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 5352 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5353 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5354 ret <64 x i8> %res 5355} 5356define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5357; GENERIC-LABEL: test_masked_64xi8_perm_mask2: 5358; GENERIC: # %bb.0: 5359; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5360; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] 5361; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5362; GENERIC-NEXT: retq # sched: [1:1.00] 5363; 5364; SKX-LABEL: test_masked_64xi8_perm_mask2: 5365; SKX: # %bb.0: 5366; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5367; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] 5368; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5369; SKX-NEXT: retq # sched: [7:1.00] 5370 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 5371 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5372 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5373 ret <64 x i8> %res 5374} 5375 5376define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { 5377; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: 5378; GENERIC: # %bb.0: 5379; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5380; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] 5381; GENERIC-NEXT: retq # sched: [1:1.00] 5382; 5383; SKX-LABEL: test_masked_z_64xi8_perm_mask2: 5384; SKX: # %bb.0: 5385; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5386; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] 5387; SKX-NEXT: retq # sched: [7:1.00] 5388 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 5389 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5390 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5391 ret <64 x i8> %res 5392} 5393define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { 5394; GENERIC-LABEL: test_64xi8_perm_mask3: 5395; GENERIC: # %bb.0: 5396; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] 5397; GENERIC-NEXT: retq # sched: [1:1.00] 5398; 5399; SKX-LABEL: test_64xi8_perm_mask3: 5400; SKX: # %bb.0: 5401; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] 5402; SKX-NEXT: retq # sched: [7:1.00] 5403 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 5404 ret <64 x i8> %res 5405} 5406define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5407; GENERIC-LABEL: test_masked_64xi8_perm_mask3: 5408; GENERIC: # %bb.0: 5409; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5410; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] 5411; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5412; GENERIC-NEXT: retq # sched: [1:1.00] 5413; 5414; SKX-LABEL: test_masked_64xi8_perm_mask3: 5415; SKX: # %bb.0: 5416; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5417; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] 5418; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5419; SKX-NEXT: retq # sched: [7:1.00] 5420 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 5421 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5422 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5423 ret <64 x i8> %res 5424} 5425 5426define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { 5427; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: 5428; GENERIC: # %bb.0: 5429; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5430; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] 5431; GENERIC-NEXT: retq # sched: [1:1.00] 5432; 5433; SKX-LABEL: test_masked_z_64xi8_perm_mask3: 5434; SKX: # %bb.0: 5435; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5436; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] 5437; SKX-NEXT: retq # sched: [7:1.00] 5438 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 5439 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5440 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5441 ret <64 x i8> %res 5442} 5443define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { 5444; GENERIC-LABEL: test_64xi8_perm_mem_mask0: 5445; GENERIC: # %bb.0: 5446; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] 5447; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] 5448; GENERIC-NEXT: retq # sched: [1:1.00] 5449; 5450; SKX-LABEL: test_64xi8_perm_mem_mask0: 5451; SKX: # %bb.0: 5452; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] 5453; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] 5454; SKX-NEXT: retq # sched: [7:1.00] 5455 %vec = load <64 x i8>, <64 x i8>* %vp 5456 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 5457 ret <64 x i8> %res 5458} 5459define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5460; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0: 5461; GENERIC: # %bb.0: 5462; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5463; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5464; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] 5465; GENERIC-NEXT: retq # sched: [1:1.00] 5466; 5467; SKX-LABEL: test_masked_64xi8_perm_mem_mask0: 5468; SKX: # %bb.0: 5469; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5470; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5471; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] 5472; SKX-NEXT: retq # sched: [7:1.00] 5473 %vec = load <64 x i8>, <64 x i8>* %vp 5474 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 5475 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5476 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5477 ret <64 x i8> %res 5478} 5479 5480define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) { 5481; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0: 5482; GENERIC: # %bb.0: 5483; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5484; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5485; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] 5486; GENERIC-NEXT: retq # sched: [1:1.00] 5487; 5488; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0: 5489; SKX: # %bb.0: 5490; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5491; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5492; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] 5493; SKX-NEXT: retq # sched: [7:1.00] 5494 %vec = load <64 x i8>, <64 x i8>* %vp 5495 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 5496 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5497 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5498 ret <64 x i8> %res 5499} 5500 5501define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5502; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1: 5503; GENERIC: # %bb.0: 5504; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5505; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5506; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] 5507; GENERIC-NEXT: retq # sched: [1:1.00] 5508; 5509; SKX-LABEL: test_masked_64xi8_perm_mem_mask1: 5510; SKX: # %bb.0: 5511; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5512; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5513; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] 5514; SKX-NEXT: retq # sched: [7:1.00] 5515 %vec = load <64 x i8>, <64 x i8>* %vp 5516 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 5517 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5518 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5519 ret <64 x i8> %res 5520} 5521 5522define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) { 5523; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1: 5524; GENERIC: # %bb.0: 5525; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5526; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5527; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] 5528; GENERIC-NEXT: retq # sched: [1:1.00] 5529; 5530; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1: 5531; SKX: # %bb.0: 5532; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5533; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5534; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] 5535; SKX-NEXT: retq # sched: [7:1.00] 5536 %vec = load <64 x i8>, <64 x i8>* %vp 5537 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 5538 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5539 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5540 ret <64 x i8> %res 5541} 5542 5543define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5544; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2: 5545; GENERIC: # %bb.0: 5546; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5547; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5548; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] 5549; GENERIC-NEXT: retq # sched: [1:1.00] 5550; 5551; SKX-LABEL: test_masked_64xi8_perm_mem_mask2: 5552; SKX: # %bb.0: 5553; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5554; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5555; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] 5556; SKX-NEXT: retq # sched: [7:1.00] 5557 %vec = load <64 x i8>, <64 x i8>* %vp 5558 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 5559 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5560 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5561 ret <64 x i8> %res 5562} 5563 5564define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) { 5565; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2: 5566; GENERIC: # %bb.0: 5567; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5568; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5569; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] 5570; GENERIC-NEXT: retq # sched: [1:1.00] 5571; 5572; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2: 5573; SKX: # %bb.0: 5574; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5575; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5576; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] 5577; SKX-NEXT: retq # sched: [7:1.00] 5578 %vec = load <64 x i8>, <64 x i8>* %vp 5579 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 5580 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5581 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5582 ret <64 x i8> %res 5583} 5584 5585define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { 5586; GENERIC-LABEL: test_64xi8_perm_mem_mask3: 5587; GENERIC: # %bb.0: 5588; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] 5589; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] 5590; GENERIC-NEXT: retq # sched: [1:1.00] 5591; 5592; SKX-LABEL: test_64xi8_perm_mem_mask3: 5593; SKX: # %bb.0: 5594; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] 5595; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] 5596; SKX-NEXT: retq # sched: [7:1.00] 5597 %vec = load <64 x i8>, <64 x i8>* %vp 5598 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 5599 ret <64 x i8> %res 5600} 5601define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5602; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3: 5603; GENERIC: # %bb.0: 5604; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5605; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5606; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] 5607; GENERIC-NEXT: retq # sched: [1:1.00] 5608; 5609; SKX-LABEL: test_masked_64xi8_perm_mem_mask3: 5610; SKX: # %bb.0: 5611; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5612; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5613; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] 5614; SKX-NEXT: retq # sched: [7:1.00] 5615 %vec = load <64 x i8>, <64 x i8>* %vp 5616 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 5617 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5618 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5619 ret <64 x i8> %res 5620} 5621 5622define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) { 5623; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3: 5624; GENERIC: # %bb.0: 5625; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5626; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5627; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] 5628; GENERIC-NEXT: retq # sched: [1:1.00] 5629; 5630; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3: 5631; SKX: # %bb.0: 5632; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5633; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5634; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] 5635; SKX-NEXT: retq # sched: [7:1.00] 5636 %vec = load <64 x i8>, <64 x i8>* %vp 5637 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 5638 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5639 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5640 ret <64 x i8> %res 5641} 5642 5643define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { 5644; GENERIC-LABEL: test_8xi16_perm_high_mask0: 5645; GENERIC: # %bb.0: 5646; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] 5647; GENERIC-NEXT: retq # sched: [1:1.00] 5648; 5649; SKX-LABEL: test_8xi16_perm_high_mask0: 5650; SKX: # %bb.0: 5651; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] 5652; SKX-NEXT: retq # sched: [7:1.00] 5653 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 5654 ret <8 x i16> %res 5655} 5656define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5657; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0: 5658; GENERIC: # %bb.0: 5659; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5660; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] 5661; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5662; GENERIC-NEXT: retq # sched: [1:1.00] 5663; 5664; SKX-LABEL: test_masked_8xi16_perm_high_mask0: 5665; SKX: # %bb.0: 5666; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5667; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] 5668; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5669; SKX-NEXT: retq # sched: [7:1.00] 5670 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 5671 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5672 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5673 ret <8 x i16> %res 5674} 5675 5676define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { 5677; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0: 5678; GENERIC: # %bb.0: 5679; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5680; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] 5681; GENERIC-NEXT: retq # sched: [1:1.00] 5682; 5683; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0: 5684; SKX: # %bb.0: 5685; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5686; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] 5687; SKX-NEXT: retq # sched: [7:1.00] 5688 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 5689 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5690 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5691 ret <8 x i16> %res 5692} 5693define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5694; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1: 5695; GENERIC: # %bb.0: 5696; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5697; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] 5698; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5699; GENERIC-NEXT: retq # sched: [1:1.00] 5700; 5701; SKX-LABEL: test_masked_8xi16_perm_low_mask1: 5702; SKX: # %bb.0: 5703; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5704; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] 5705; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5706; SKX-NEXT: retq # sched: [7:1.00] 5707 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 5708 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5709 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5710 ret <8 x i16> %res 5711} 5712 5713define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { 5714; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1: 5715; GENERIC: # %bb.0: 5716; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5717; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] 5718; GENERIC-NEXT: retq # sched: [1:1.00] 5719; 5720; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1: 5721; SKX: # %bb.0: 5722; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5723; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] 5724; SKX-NEXT: retq # sched: [7:1.00] 5725 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 5726 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5727 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5728 ret <8 x i16> %res 5729} 5730define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5731; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2: 5732; GENERIC: # %bb.0: 5733; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5734; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] 5735; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5736; GENERIC-NEXT: retq # sched: [1:1.00] 5737; 5738; SKX-LABEL: test_masked_8xi16_perm_high_mask2: 5739; SKX: # %bb.0: 5740; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5741; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] 5742; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5743; SKX-NEXT: retq # sched: [7:1.00] 5744 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 5745 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5746 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5747 ret <8 x i16> %res 5748} 5749 5750define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { 5751; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2: 5752; GENERIC: # %bb.0: 5753; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5754; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] 5755; GENERIC-NEXT: retq # sched: [1:1.00] 5756; 5757; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2: 5758; SKX: # %bb.0: 5759; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5760; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] 5761; SKX-NEXT: retq # sched: [7:1.00] 5762 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 5763 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5764 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5765 ret <8 x i16> %res 5766} 5767define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { 5768; GENERIC-LABEL: test_8xi16_perm_low_mask3: 5769; GENERIC: # %bb.0: 5770; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] 5771; GENERIC-NEXT: retq # sched: [1:1.00] 5772; 5773; SKX-LABEL: test_8xi16_perm_low_mask3: 5774; SKX: # %bb.0: 5775; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] 5776; SKX-NEXT: retq # sched: [7:1.00] 5777 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 5778 ret <8 x i16> %res 5779} 5780define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5781; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3: 5782; GENERIC: # %bb.0: 5783; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5784; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] 5785; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5786; GENERIC-NEXT: retq # sched: [1:1.00] 5787; 5788; SKX-LABEL: test_masked_8xi16_perm_low_mask3: 5789; SKX: # %bb.0: 5790; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5791; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] 5792; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5793; SKX-NEXT: retq # sched: [7:1.00] 5794 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 5795 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5796 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5797 ret <8 x i16> %res 5798} 5799 5800define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { 5801; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3: 5802; GENERIC: # %bb.0: 5803; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5804; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] 5805; GENERIC-NEXT: retq # sched: [1:1.00] 5806; 5807; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3: 5808; SKX: # %bb.0: 5809; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5810; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] 5811; SKX-NEXT: retq # sched: [7:1.00] 5812 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 5813 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5814 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5815 ret <8 x i16> %res 5816} 5817define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5818; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4: 5819; GENERIC: # %bb.0: 5820; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5821; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] 5822; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5823; GENERIC-NEXT: retq # sched: [1:1.00] 5824; 5825; SKX-LABEL: test_masked_8xi16_perm_high_mask4: 5826; SKX: # %bb.0: 5827; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5828; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] 5829; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5830; SKX-NEXT: retq # sched: [7:1.00] 5831 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 5832 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5833 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5834 ret <8 x i16> %res 5835} 5836 5837define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { 5838; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4: 5839; GENERIC: # %bb.0: 5840; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5841; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] 5842; GENERIC-NEXT: retq # sched: [1:1.00] 5843; 5844; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4: 5845; SKX: # %bb.0: 5846; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5847; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] 5848; SKX-NEXT: retq # sched: [7:1.00] 5849 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 5850 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5851 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5852 ret <8 x i16> %res 5853} 5854define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5855; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5: 5856; GENERIC: # %bb.0: 5857; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5858; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] 5859; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5860; GENERIC-NEXT: retq # sched: [1:1.00] 5861; 5862; SKX-LABEL: test_masked_8xi16_perm_low_mask5: 5863; SKX: # %bb.0: 5864; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5865; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] 5866; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5867; SKX-NEXT: retq # sched: [7:1.00] 5868 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 5869 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5870 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5871 ret <8 x i16> %res 5872} 5873 5874define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { 5875; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5: 5876; GENERIC: # %bb.0: 5877; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5878; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] 5879; GENERIC-NEXT: retq # sched: [1:1.00] 5880; 5881; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5: 5882; SKX: # %bb.0: 5883; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5884; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] 5885; SKX-NEXT: retq # sched: [7:1.00] 5886 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 5887 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5888 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5889 ret <8 x i16> %res 5890} 5891define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { 5892; GENERIC-LABEL: test_8xi16_perm_high_mask6: 5893; GENERIC: # %bb.0: 5894; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] 5895; GENERIC-NEXT: retq # sched: [1:1.00] 5896; 5897; SKX-LABEL: test_8xi16_perm_high_mask6: 5898; SKX: # %bb.0: 5899; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] 5900; SKX-NEXT: retq # sched: [7:1.00] 5901 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 5902 ret <8 x i16> %res 5903} 5904define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5905; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6: 5906; GENERIC: # %bb.0: 5907; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5908; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] 5909; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5910; GENERIC-NEXT: retq # sched: [1:1.00] 5911; 5912; SKX-LABEL: test_masked_8xi16_perm_high_mask6: 5913; SKX: # %bb.0: 5914; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5915; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] 5916; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5917; SKX-NEXT: retq # sched: [7:1.00] 5918 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 5919 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5920 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5921 ret <8 x i16> %res 5922} 5923 5924define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { 5925; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6: 5926; GENERIC: # %bb.0: 5927; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5928; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] 5929; GENERIC-NEXT: retq # sched: [1:1.00] 5930; 5931; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6: 5932; SKX: # %bb.0: 5933; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5934; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] 5935; SKX-NEXT: retq # sched: [7:1.00] 5936 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 5937 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5938 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5939 ret <8 x i16> %res 5940} 5941define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5942; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7: 5943; GENERIC: # %bb.0: 5944; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5945; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] 5946; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5947; GENERIC-NEXT: retq # sched: [1:1.00] 5948; 5949; SKX-LABEL: test_masked_8xi16_perm_low_mask7: 5950; SKX: # %bb.0: 5951; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5952; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] 5953; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5954; SKX-NEXT: retq # sched: [7:1.00] 5955 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 5956 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5957 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5958 ret <8 x i16> %res 5959} 5960 5961define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { 5962; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7: 5963; GENERIC: # %bb.0: 5964; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5965; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] 5966; GENERIC-NEXT: retq # sched: [1:1.00] 5967; 5968; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7: 5969; SKX: # %bb.0: 5970; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5971; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] 5972; SKX-NEXT: retq # sched: [7:1.00] 5973 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 5974 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5975 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5976 ret <8 x i16> %res 5977} 5978define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { 5979; GENERIC-LABEL: test_8xi16_perm_high_mem_mask0: 5980; GENERIC: # %bb.0: 5981; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] 5982; GENERIC-NEXT: retq # sched: [1:1.00] 5983; 5984; SKX-LABEL: test_8xi16_perm_high_mem_mask0: 5985; SKX: # %bb.0: 5986; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] 5987; SKX-NEXT: retq # sched: [7:1.00] 5988 %vec = load <8 x i16>, <8 x i16>* %vp 5989 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 5990 ret <8 x i16> %res 5991} 5992define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 5993; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: 5994; GENERIC: # %bb.0: 5995; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5996; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] 5997; GENERIC-NEXT: retq # sched: [1:1.00] 5998; 5999; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0: 6000; SKX: # %bb.0: 6001; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6002; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] 6003; SKX-NEXT: retq # sched: [7:1.00] 6004 %vec = load <8 x i16>, <8 x i16>* %vp 6005 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 6006 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6007 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6008 ret <8 x i16> %res 6009} 6010 6011define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { 6012; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: 6013; GENERIC: # %bb.0: 6014; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6015; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] 6016; GENERIC-NEXT: retq # sched: [1:1.00] 6017; 6018; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: 6019; SKX: # %bb.0: 6020; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6021; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] 6022; SKX-NEXT: retq # sched: [7:1.00] 6023 %vec = load <8 x i16>, <8 x i16>* %vp 6024 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 6025 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6026 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6027 ret <8 x i16> %res 6028} 6029 6030define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6031; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: 6032; GENERIC: # %bb.0: 6033; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6034; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] 6035; GENERIC-NEXT: retq # sched: [1:1.00] 6036; 6037; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1: 6038; SKX: # %bb.0: 6039; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6040; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] 6041; SKX-NEXT: retq # sched: [7:1.00] 6042 %vec = load <8 x i16>, <8 x i16>* %vp 6043 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6044 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6045 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6046 ret <8 x i16> %res 6047} 6048 6049define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { 6050; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: 6051; GENERIC: # %bb.0: 6052; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6053; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] 6054; GENERIC-NEXT: retq # sched: [1:1.00] 6055; 6056; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: 6057; SKX: # %bb.0: 6058; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6059; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] 6060; SKX-NEXT: retq # sched: [7:1.00] 6061 %vec = load <8 x i16>, <8 x i16>* %vp 6062 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6063 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6064 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6065 ret <8 x i16> %res 6066} 6067 6068define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6069; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: 6070; GENERIC: # %bb.0: 6071; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6072; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] 6073; GENERIC-NEXT: retq # sched: [1:1.00] 6074; 6075; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2: 6076; SKX: # %bb.0: 6077; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6078; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] 6079; SKX-NEXT: retq # sched: [7:1.00] 6080 %vec = load <8 x i16>, <8 x i16>* %vp 6081 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 6082 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6083 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6084 ret <8 x i16> %res 6085} 6086 6087define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { 6088; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: 6089; GENERIC: # %bb.0: 6090; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6091; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] 6092; GENERIC-NEXT: retq # sched: [1:1.00] 6093; 6094; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: 6095; SKX: # %bb.0: 6096; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6097; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] 6098; SKX-NEXT: retq # sched: [7:1.00] 6099 %vec = load <8 x i16>, <8 x i16>* %vp 6100 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 6101 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6102 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6103 ret <8 x i16> %res 6104} 6105 6106define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { 6107; GENERIC-LABEL: test_8xi16_perm_low_mem_mask3: 6108; GENERIC: # %bb.0: 6109; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] 6110; GENERIC-NEXT: retq # sched: [1:1.00] 6111; 6112; SKX-LABEL: test_8xi16_perm_low_mem_mask3: 6113; SKX: # %bb.0: 6114; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] 6115; SKX-NEXT: retq # sched: [7:1.00] 6116 %vec = load <8 x i16>, <8 x i16>* %vp 6117 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 6118 ret <8 x i16> %res 6119} 6120define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6121; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: 6122; GENERIC: # %bb.0: 6123; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6124; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] 6125; GENERIC-NEXT: retq # sched: [1:1.00] 6126; 6127; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3: 6128; SKX: # %bb.0: 6129; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6130; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] 6131; SKX-NEXT: retq # sched: [7:1.00] 6132 %vec = load <8 x i16>, <8 x i16>* %vp 6133 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 6134 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6135 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6136 ret <8 x i16> %res 6137} 6138 6139define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { 6140; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: 6141; GENERIC: # %bb.0: 6142; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6143; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] 6144; GENERIC-NEXT: retq # sched: [1:1.00] 6145; 6146; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: 6147; SKX: # %bb.0: 6148; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6149; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] 6150; SKX-NEXT: retq # sched: [7:1.00] 6151 %vec = load <8 x i16>, <8 x i16>* %vp 6152 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 6153 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6154 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6155 ret <8 x i16> %res 6156} 6157 6158define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6159; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: 6160; GENERIC: # %bb.0: 6161; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6162; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] 6163; GENERIC-NEXT: retq # sched: [1:1.00] 6164; 6165; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4: 6166; SKX: # %bb.0: 6167; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6168; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] 6169; SKX-NEXT: retq # sched: [7:1.00] 6170 %vec = load <8 x i16>, <8 x i16>* %vp 6171 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 6172 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6173 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6174 ret <8 x i16> %res 6175} 6176 6177define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { 6178; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: 6179; GENERIC: # %bb.0: 6180; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6181; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] 6182; GENERIC-NEXT: retq # sched: [1:1.00] 6183; 6184; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: 6185; SKX: # %bb.0: 6186; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6187; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] 6188; SKX-NEXT: retq # sched: [7:1.00] 6189 %vec = load <8 x i16>, <8 x i16>* %vp 6190 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 6191 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6192 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6193 ret <8 x i16> %res 6194} 6195 6196define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6197; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: 6198; GENERIC: # %bb.0: 6199; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6200; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] 6201; GENERIC-NEXT: retq # sched: [1:1.00] 6202; 6203; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5: 6204; SKX: # %bb.0: 6205; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6206; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] 6207; SKX-NEXT: retq # sched: [7:1.00] 6208 %vec = load <8 x i16>, <8 x i16>* %vp 6209 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6210 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6211 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6212 ret <8 x i16> %res 6213} 6214 6215define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { 6216; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: 6217; GENERIC: # %bb.0: 6218; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6219; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] 6220; GENERIC-NEXT: retq # sched: [1:1.00] 6221; 6222; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: 6223; SKX: # %bb.0: 6224; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6225; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] 6226; SKX-NEXT: retq # sched: [7:1.00] 6227 %vec = load <8 x i16>, <8 x i16>* %vp 6228 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6229 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6230 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6231 ret <8 x i16> %res 6232} 6233 6234define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { 6235; GENERIC-LABEL: test_8xi16_perm_high_mem_mask6: 6236; GENERIC: # %bb.0: 6237; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] 6238; GENERIC-NEXT: retq # sched: [1:1.00] 6239; 6240; SKX-LABEL: test_8xi16_perm_high_mem_mask6: 6241; SKX: # %bb.0: 6242; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] 6243; SKX-NEXT: retq # sched: [7:1.00] 6244 %vec = load <8 x i16>, <8 x i16>* %vp 6245 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 6246 ret <8 x i16> %res 6247} 6248define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6249; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: 6250; GENERIC: # %bb.0: 6251; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6252; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] 6253; GENERIC-NEXT: retq # sched: [1:1.00] 6254; 6255; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6: 6256; SKX: # %bb.0: 6257; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6258; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] 6259; SKX-NEXT: retq # sched: [7:1.00] 6260 %vec = load <8 x i16>, <8 x i16>* %vp 6261 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 6262 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6263 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6264 ret <8 x i16> %res 6265} 6266 6267define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { 6268; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: 6269; GENERIC: # %bb.0: 6270; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6271; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] 6272; GENERIC-NEXT: retq # sched: [1:1.00] 6273; 6274; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: 6275; SKX: # %bb.0: 6276; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6277; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] 6278; SKX-NEXT: retq # sched: [7:1.00] 6279 %vec = load <8 x i16>, <8 x i16>* %vp 6280 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 6281 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6282 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6283 ret <8 x i16> %res 6284} 6285 6286define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6287; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: 6288; GENERIC: # %bb.0: 6289; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6290; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] 6291; GENERIC-NEXT: retq # sched: [1:1.00] 6292; 6293; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7: 6294; SKX: # %bb.0: 6295; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6296; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] 6297; SKX-NEXT: retq # sched: [7:1.00] 6298 %vec = load <8 x i16>, <8 x i16>* %vp 6299 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 6300 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6301 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6302 ret <8 x i16> %res 6303} 6304 6305define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { 6306; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: 6307; GENERIC: # %bb.0: 6308; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6309; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] 6310; GENERIC-NEXT: retq # sched: [1:1.00] 6311; 6312; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: 6313; SKX: # %bb.0: 6314; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6315; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] 6316; SKX-NEXT: retq # sched: [7:1.00] 6317 %vec = load <8 x i16>, <8 x i16>* %vp 6318 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 6319 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6320 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6321 ret <8 x i16> %res 6322} 6323 6324define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { 6325; GENERIC-LABEL: test_16xi16_perm_high_mask0: 6326; GENERIC: # %bb.0: 6327; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6328; GENERIC-NEXT: retq # sched: [1:1.00] 6329; 6330; SKX-LABEL: test_16xi16_perm_high_mask0: 6331; SKX: # %bb.0: 6332; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6333; SKX-NEXT: retq # sched: [7:1.00] 6334 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 6335 ret <16 x i16> %res 6336} 6337define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6338; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0: 6339; GENERIC: # %bb.0: 6340; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6341; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6342; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6343; GENERIC-NEXT: retq # sched: [1:1.00] 6344; 6345; SKX-LABEL: test_masked_16xi16_perm_high_mask0: 6346; SKX: # %bb.0: 6347; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6348; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6349; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6350; SKX-NEXT: retq # sched: [7:1.00] 6351 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 6352 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6353 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6354 ret <16 x i16> %res 6355} 6356 6357define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { 6358; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0: 6359; GENERIC: # %bb.0: 6360; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6361; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6362; GENERIC-NEXT: retq # sched: [1:1.00] 6363; 6364; SKX-LABEL: test_masked_z_16xi16_perm_high_mask0: 6365; SKX: # %bb.0: 6366; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6367; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6368; SKX-NEXT: retq # sched: [7:1.00] 6369 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 6370 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6371 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6372 ret <16 x i16> %res 6373} 6374define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6375; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1: 6376; GENERIC: # %bb.0: 6377; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6378; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6379; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6380; GENERIC-NEXT: retq # sched: [1:1.00] 6381; 6382; SKX-LABEL: test_masked_16xi16_perm_low_mask1: 6383; SKX: # %bb.0: 6384; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6385; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6386; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6387; SKX-NEXT: retq # sched: [7:1.00] 6388 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6389 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6390 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6391 ret <16 x i16> %res 6392} 6393 6394define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { 6395; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1: 6396; GENERIC: # %bb.0: 6397; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6398; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6399; GENERIC-NEXT: retq # sched: [1:1.00] 6400; 6401; SKX-LABEL: test_masked_z_16xi16_perm_low_mask1: 6402; SKX: # %bb.0: 6403; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6404; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6405; SKX-NEXT: retq # sched: [7:1.00] 6406 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6407 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6408 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6409 ret <16 x i16> %res 6410} 6411define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6412; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2: 6413; GENERIC: # %bb.0: 6414; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6415; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6416; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6417; GENERIC-NEXT: retq # sched: [1:1.00] 6418; 6419; SKX-LABEL: test_masked_16xi16_perm_high_mask2: 6420; SKX: # %bb.0: 6421; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6422; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6423; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6424; SKX-NEXT: retq # sched: [7:1.00] 6425 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 6426 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6427 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6428 ret <16 x i16> %res 6429} 6430 6431define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { 6432; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2: 6433; GENERIC: # %bb.0: 6434; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6435; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6436; GENERIC-NEXT: retq # sched: [1:1.00] 6437; 6438; SKX-LABEL: test_masked_z_16xi16_perm_high_mask2: 6439; SKX: # %bb.0: 6440; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6441; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6442; SKX-NEXT: retq # sched: [7:1.00] 6443 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 6444 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6445 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6446 ret <16 x i16> %res 6447} 6448define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { 6449; GENERIC-LABEL: test_16xi16_perm_low_mask3: 6450; GENERIC: # %bb.0: 6451; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6452; GENERIC-NEXT: retq # sched: [1:1.00] 6453; 6454; SKX-LABEL: test_16xi16_perm_low_mask3: 6455; SKX: # %bb.0: 6456; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6457; SKX-NEXT: retq # sched: [7:1.00] 6458 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6459 ret <16 x i16> %res 6460} 6461define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6462; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3: 6463; GENERIC: # %bb.0: 6464; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6465; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6466; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6467; GENERIC-NEXT: retq # sched: [1:1.00] 6468; 6469; SKX-LABEL: test_masked_16xi16_perm_low_mask3: 6470; SKX: # %bb.0: 6471; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6472; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6473; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6474; SKX-NEXT: retq # sched: [7:1.00] 6475 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6476 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6477 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6478 ret <16 x i16> %res 6479} 6480 6481define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { 6482; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3: 6483; GENERIC: # %bb.0: 6484; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6485; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6486; GENERIC-NEXT: retq # sched: [1:1.00] 6487; 6488; SKX-LABEL: test_masked_z_16xi16_perm_low_mask3: 6489; SKX: # %bb.0: 6490; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6491; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6492; SKX-NEXT: retq # sched: [7:1.00] 6493 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6494 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6495 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6496 ret <16 x i16> %res 6497} 6498define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6499; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4: 6500; GENERIC: # %bb.0: 6501; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6502; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6503; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6504; GENERIC-NEXT: retq # sched: [1:1.00] 6505; 6506; SKX-LABEL: test_masked_16xi16_perm_high_mask4: 6507; SKX: # %bb.0: 6508; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6509; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6510; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6511; SKX-NEXT: retq # sched: [7:1.00] 6512 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 6513 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6514 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6515 ret <16 x i16> %res 6516} 6517 6518define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { 6519; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4: 6520; GENERIC: # %bb.0: 6521; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6522; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6523; GENERIC-NEXT: retq # sched: [1:1.00] 6524; 6525; SKX-LABEL: test_masked_z_16xi16_perm_high_mask4: 6526; SKX: # %bb.0: 6527; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6528; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6529; SKX-NEXT: retq # sched: [7:1.00] 6530 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 6531 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6532 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6533 ret <16 x i16> %res 6534} 6535define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6536; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5: 6537; GENERIC: # %bb.0: 6538; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6539; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6540; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6541; GENERIC-NEXT: retq # sched: [1:1.00] 6542; 6543; SKX-LABEL: test_masked_16xi16_perm_low_mask5: 6544; SKX: # %bb.0: 6545; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6546; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6547; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6548; SKX-NEXT: retq # sched: [7:1.00] 6549 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6550 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6551 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6552 ret <16 x i16> %res 6553} 6554 6555define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { 6556; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5: 6557; GENERIC: # %bb.0: 6558; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6559; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6560; GENERIC-NEXT: retq # sched: [1:1.00] 6561; 6562; SKX-LABEL: test_masked_z_16xi16_perm_low_mask5: 6563; SKX: # %bb.0: 6564; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6565; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6566; SKX-NEXT: retq # sched: [7:1.00] 6567 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6568 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6569 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6570 ret <16 x i16> %res 6571} 6572define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { 6573; GENERIC-LABEL: test_16xi16_perm_high_mask6: 6574; GENERIC: # %bb.0: 6575; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6576; GENERIC-NEXT: retq # sched: [1:1.00] 6577; 6578; SKX-LABEL: test_16xi16_perm_high_mask6: 6579; SKX: # %bb.0: 6580; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6581; SKX-NEXT: retq # sched: [7:1.00] 6582 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 6583 ret <16 x i16> %res 6584} 6585define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6586; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6: 6587; GENERIC: # %bb.0: 6588; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6589; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6590; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6591; GENERIC-NEXT: retq # sched: [1:1.00] 6592; 6593; SKX-LABEL: test_masked_16xi16_perm_high_mask6: 6594; SKX: # %bb.0: 6595; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6596; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6597; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6598; SKX-NEXT: retq # sched: [7:1.00] 6599 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 6600 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6601 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6602 ret <16 x i16> %res 6603} 6604 6605define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { 6606; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6: 6607; GENERIC: # %bb.0: 6608; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6609; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6610; GENERIC-NEXT: retq # sched: [1:1.00] 6611; 6612; SKX-LABEL: test_masked_z_16xi16_perm_high_mask6: 6613; SKX: # %bb.0: 6614; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6615; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6616; SKX-NEXT: retq # sched: [7:1.00] 6617 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 6618 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6619 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6620 ret <16 x i16> %res 6621} 6622define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6623; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7: 6624; GENERIC: # %bb.0: 6625; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6626; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6627; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6628; GENERIC-NEXT: retq # sched: [1:1.00] 6629; 6630; SKX-LABEL: test_masked_16xi16_perm_low_mask7: 6631; SKX: # %bb.0: 6632; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6633; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6634; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6635; SKX-NEXT: retq # sched: [7:1.00] 6636 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 6637 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6638 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6639 ret <16 x i16> %res 6640} 6641 6642define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { 6643; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7: 6644; GENERIC: # %bb.0: 6645; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6646; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6647; GENERIC-NEXT: retq # sched: [1:1.00] 6648; 6649; SKX-LABEL: test_masked_z_16xi16_perm_low_mask7: 6650; SKX: # %bb.0: 6651; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6652; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6653; SKX-NEXT: retq # sched: [7:1.00] 6654 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 6655 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6656 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6657 ret <16 x i16> %res 6658} 6659define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { 6660; GENERIC-LABEL: test_16xi16_perm_high_mem_mask0: 6661; GENERIC: # %bb.0: 6662; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6663; GENERIC-NEXT: retq # sched: [1:1.00] 6664; 6665; SKX-LABEL: test_16xi16_perm_high_mem_mask0: 6666; SKX: # %bb.0: 6667; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6668; SKX-NEXT: retq # sched: [7:1.00] 6669 %vec = load <16 x i16>, <16 x i16>* %vp 6670 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 6671 ret <16 x i16> %res 6672} 6673define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6674; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0: 6675; GENERIC: # %bb.0: 6676; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6677; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6678; GENERIC-NEXT: retq # sched: [1:1.00] 6679; 6680; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask0: 6681; SKX: # %bb.0: 6682; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6683; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6684; SKX-NEXT: retq # sched: [7:1.00] 6685 %vec = load <16 x i16>, <16 x i16>* %vp 6686 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 6687 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6688 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6689 ret <16 x i16> %res 6690} 6691 6692define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 6693; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: 6694; GENERIC: # %bb.0: 6695; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6696; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6697; GENERIC-NEXT: retq # sched: [1:1.00] 6698; 6699; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: 6700; SKX: # %bb.0: 6701; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6702; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6703; SKX-NEXT: retq # sched: [7:1.00] 6704 %vec = load <16 x i16>, <16 x i16>* %vp 6705 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 6706 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6707 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6708 ret <16 x i16> %res 6709} 6710 6711define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6712; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1: 6713; GENERIC: # %bb.0: 6714; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6715; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6716; GENERIC-NEXT: retq # sched: [1:1.00] 6717; 6718; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask1: 6719; SKX: # %bb.0: 6720; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6721; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6722; SKX-NEXT: retq # sched: [7:1.00] 6723 %vec = load <16 x i16>, <16 x i16>* %vp 6724 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6725 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6726 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6727 ret <16 x i16> %res 6728} 6729 6730define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 6731; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: 6732; GENERIC: # %bb.0: 6733; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6734; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6735; GENERIC-NEXT: retq # sched: [1:1.00] 6736; 6737; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: 6738; SKX: # %bb.0: 6739; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6740; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6741; SKX-NEXT: retq # sched: [7:1.00] 6742 %vec = load <16 x i16>, <16 x i16>* %vp 6743 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6744 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6745 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6746 ret <16 x i16> %res 6747} 6748 6749define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6750; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2: 6751; GENERIC: # %bb.0: 6752; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6753; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6754; GENERIC-NEXT: retq # sched: [1:1.00] 6755; 6756; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask2: 6757; SKX: # %bb.0: 6758; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6759; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6760; SKX-NEXT: retq # sched: [7:1.00] 6761 %vec = load <16 x i16>, <16 x i16>* %vp 6762 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 6763 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6764 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6765 ret <16 x i16> %res 6766} 6767 6768define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 6769; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: 6770; GENERIC: # %bb.0: 6771; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6772; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6773; GENERIC-NEXT: retq # sched: [1:1.00] 6774; 6775; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: 6776; SKX: # %bb.0: 6777; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6778; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6779; SKX-NEXT: retq # sched: [7:1.00] 6780 %vec = load <16 x i16>, <16 x i16>* %vp 6781 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 6782 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6783 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6784 ret <16 x i16> %res 6785} 6786 6787define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { 6788; GENERIC-LABEL: test_16xi16_perm_low_mem_mask3: 6789; GENERIC: # %bb.0: 6790; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6791; GENERIC-NEXT: retq # sched: [1:1.00] 6792; 6793; SKX-LABEL: test_16xi16_perm_low_mem_mask3: 6794; SKX: # %bb.0: 6795; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6796; SKX-NEXT: retq # sched: [7:1.00] 6797 %vec = load <16 x i16>, <16 x i16>* %vp 6798 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6799 ret <16 x i16> %res 6800} 6801define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6802; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3: 6803; GENERIC: # %bb.0: 6804; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6805; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6806; GENERIC-NEXT: retq # sched: [1:1.00] 6807; 6808; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask3: 6809; SKX: # %bb.0: 6810; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6811; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6812; SKX-NEXT: retq # sched: [7:1.00] 6813 %vec = load <16 x i16>, <16 x i16>* %vp 6814 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6815 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6816 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6817 ret <16 x i16> %res 6818} 6819 6820define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 6821; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: 6822; GENERIC: # %bb.0: 6823; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6824; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6825; GENERIC-NEXT: retq # sched: [1:1.00] 6826; 6827; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: 6828; SKX: # %bb.0: 6829; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6830; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6831; SKX-NEXT: retq # sched: [7:1.00] 6832 %vec = load <16 x i16>, <16 x i16>* %vp 6833 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6834 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6835 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6836 ret <16 x i16> %res 6837} 6838 6839define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6840; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4: 6841; GENERIC: # %bb.0: 6842; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6843; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6844; GENERIC-NEXT: retq # sched: [1:1.00] 6845; 6846; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask4: 6847; SKX: # %bb.0: 6848; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6849; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6850; SKX-NEXT: retq # sched: [7:1.00] 6851 %vec = load <16 x i16>, <16 x i16>* %vp 6852 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 6853 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6854 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6855 ret <16 x i16> %res 6856} 6857 6858define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { 6859; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: 6860; GENERIC: # %bb.0: 6861; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6862; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6863; GENERIC-NEXT: retq # sched: [1:1.00] 6864; 6865; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: 6866; SKX: # %bb.0: 6867; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6868; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6869; SKX-NEXT: retq # sched: [7:1.00] 6870 %vec = load <16 x i16>, <16 x i16>* %vp 6871 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 6872 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6873 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6874 ret <16 x i16> %res 6875} 6876 6877define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6878; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5: 6879; GENERIC: # %bb.0: 6880; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6881; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6882; GENERIC-NEXT: retq # sched: [1:1.00] 6883; 6884; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask5: 6885; SKX: # %bb.0: 6886; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6887; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6888; SKX-NEXT: retq # sched: [7:1.00] 6889 %vec = load <16 x i16>, <16 x i16>* %vp 6890 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6891 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6892 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6893 ret <16 x i16> %res 6894} 6895 6896define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { 6897; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: 6898; GENERIC: # %bb.0: 6899; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6900; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6901; GENERIC-NEXT: retq # sched: [1:1.00] 6902; 6903; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: 6904; SKX: # %bb.0: 6905; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6906; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6907; SKX-NEXT: retq # sched: [7:1.00] 6908 %vec = load <16 x i16>, <16 x i16>* %vp 6909 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6910 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6911 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6912 ret <16 x i16> %res 6913} 6914 6915define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { 6916; GENERIC-LABEL: test_16xi16_perm_high_mem_mask6: 6917; GENERIC: # %bb.0: 6918; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6919; GENERIC-NEXT: retq # sched: [1:1.00] 6920; 6921; SKX-LABEL: test_16xi16_perm_high_mem_mask6: 6922; SKX: # %bb.0: 6923; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6924; SKX-NEXT: retq # sched: [7:1.00] 6925 %vec = load <16 x i16>, <16 x i16>* %vp 6926 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 6927 ret <16 x i16> %res 6928} 6929define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6930; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6: 6931; GENERIC: # %bb.0: 6932; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6933; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6934; GENERIC-NEXT: retq # sched: [1:1.00] 6935; 6936; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask6: 6937; SKX: # %bb.0: 6938; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6939; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6940; SKX-NEXT: retq # sched: [7:1.00] 6941 %vec = load <16 x i16>, <16 x i16>* %vp 6942 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 6943 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6944 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6945 ret <16 x i16> %res 6946} 6947 6948define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { 6949; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: 6950; GENERIC: # %bb.0: 6951; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6952; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6953; GENERIC-NEXT: retq # sched: [1:1.00] 6954; 6955; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: 6956; SKX: # %bb.0: 6957; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6958; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6959; SKX-NEXT: retq # sched: [7:1.00] 6960 %vec = load <16 x i16>, <16 x i16>* %vp 6961 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 6962 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6963 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6964 ret <16 x i16> %res 6965} 6966 6967define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6968; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7: 6969; GENERIC: # %bb.0: 6970; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6971; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6972; GENERIC-NEXT: retq # sched: [1:1.00] 6973; 6974; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask7: 6975; SKX: # %bb.0: 6976; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6977; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6978; SKX-NEXT: retq # sched: [7:1.00] 6979 %vec = load <16 x i16>, <16 x i16>* %vp 6980 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6981 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6982 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6983 ret <16 x i16> %res 6984} 6985 6986define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { 6987; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: 6988; GENERIC: # %bb.0: 6989; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6990; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6991; GENERIC-NEXT: retq # sched: [1:1.00] 6992; 6993; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: 6994; SKX: # %bb.0: 6995; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6996; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6997; SKX-NEXT: retq # sched: [7:1.00] 6998 %vec = load <16 x i16>, <16 x i16>* %vp 6999 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 7000 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 7001 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 7002 ret <16 x i16> %res 7003} 7004 7005define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { 7006; GENERIC-LABEL: test_32xi16_perm_high_mask0: 7007; GENERIC: # %bb.0: 7008; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7009; GENERIC-NEXT: retq # sched: [1:1.00] 7010; 7011; SKX-LABEL: test_32xi16_perm_high_mask0: 7012; SKX: # %bb.0: 7013; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7014; SKX-NEXT: retq # sched: [7:1.00] 7015 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 7016 ret <32 x i16> %res 7017} 7018define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7019; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0: 7020; GENERIC: # %bb.0: 7021; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7022; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7023; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7024; GENERIC-NEXT: retq # sched: [1:1.00] 7025; 7026; SKX-LABEL: test_masked_32xi16_perm_high_mask0: 7027; SKX: # %bb.0: 7028; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7029; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7030; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7031; SKX-NEXT: retq # sched: [7:1.00] 7032 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 7033 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7034 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7035 ret <32 x i16> %res 7036} 7037 7038define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { 7039; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0: 7040; GENERIC: # %bb.0: 7041; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7042; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7043; GENERIC-NEXT: retq # sched: [1:1.00] 7044; 7045; SKX-LABEL: test_masked_z_32xi16_perm_high_mask0: 7046; SKX: # %bb.0: 7047; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7048; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7049; SKX-NEXT: retq # sched: [7:1.00] 7050 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 7051 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7052 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7053 ret <32 x i16> %res 7054} 7055define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7056; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1: 7057; GENERIC: # %bb.0: 7058; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7059; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7060; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7061; GENERIC-NEXT: retq # sched: [1:1.00] 7062; 7063; SKX-LABEL: test_masked_32xi16_perm_low_mask1: 7064; SKX: # %bb.0: 7065; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7066; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7067; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7068; SKX-NEXT: retq # sched: [7:1.00] 7069 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 7070 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7071 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7072 ret <32 x i16> %res 7073} 7074 7075define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { 7076; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1: 7077; GENERIC: # %bb.0: 7078; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7079; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7080; GENERIC-NEXT: retq # sched: [1:1.00] 7081; 7082; SKX-LABEL: test_masked_z_32xi16_perm_low_mask1: 7083; SKX: # %bb.0: 7084; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7085; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7086; SKX-NEXT: retq # sched: [7:1.00] 7087 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 7088 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7089 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7090 ret <32 x i16> %res 7091} 7092define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7093; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2: 7094; GENERIC: # %bb.0: 7095; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7096; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7097; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7098; GENERIC-NEXT: retq # sched: [1:1.00] 7099; 7100; SKX-LABEL: test_masked_32xi16_perm_high_mask2: 7101; SKX: # %bb.0: 7102; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7103; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7104; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7105; SKX-NEXT: retq # sched: [7:1.00] 7106 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 7107 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7108 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7109 ret <32 x i16> %res 7110} 7111 7112define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { 7113; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2: 7114; GENERIC: # %bb.0: 7115; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7116; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7117; GENERIC-NEXT: retq # sched: [1:1.00] 7118; 7119; SKX-LABEL: test_masked_z_32xi16_perm_high_mask2: 7120; SKX: # %bb.0: 7121; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7122; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7123; SKX-NEXT: retq # sched: [7:1.00] 7124 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 7125 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7126 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7127 ret <32 x i16> %res 7128} 7129define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { 7130; GENERIC-LABEL: test_32xi16_perm_low_mask3: 7131; GENERIC: # %bb.0: 7132; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7133; GENERIC-NEXT: retq # sched: [1:1.00] 7134; 7135; SKX-LABEL: test_32xi16_perm_low_mask3: 7136; SKX: # %bb.0: 7137; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7138; SKX-NEXT: retq # sched: [7:1.00] 7139 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 7140 ret <32 x i16> %res 7141} 7142define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7143; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3: 7144; GENERIC: # %bb.0: 7145; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7146; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7147; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7148; GENERIC-NEXT: retq # sched: [1:1.00] 7149; 7150; SKX-LABEL: test_masked_32xi16_perm_low_mask3: 7151; SKX: # %bb.0: 7152; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7153; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7154; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7155; SKX-NEXT: retq # sched: [7:1.00] 7156 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 7157 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7158 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7159 ret <32 x i16> %res 7160} 7161 7162define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { 7163; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3: 7164; GENERIC: # %bb.0: 7165; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7166; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7167; GENERIC-NEXT: retq # sched: [1:1.00] 7168; 7169; SKX-LABEL: test_masked_z_32xi16_perm_low_mask3: 7170; SKX: # %bb.0: 7171; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7172; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7173; SKX-NEXT: retq # sched: [7:1.00] 7174 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 7175 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7176 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7177 ret <32 x i16> %res 7178} 7179define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7180; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4: 7181; GENERIC: # %bb.0: 7182; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7183; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7184; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7185; GENERIC-NEXT: retq # sched: [1:1.00] 7186; 7187; SKX-LABEL: test_masked_32xi16_perm_high_mask4: 7188; SKX: # %bb.0: 7189; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7190; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7191; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7192; SKX-NEXT: retq # sched: [7:1.00] 7193 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 7194 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7195 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7196 ret <32 x i16> %res 7197} 7198 7199define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { 7200; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4: 7201; GENERIC: # %bb.0: 7202; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7203; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7204; GENERIC-NEXT: retq # sched: [1:1.00] 7205; 7206; SKX-LABEL: test_masked_z_32xi16_perm_high_mask4: 7207; SKX: # %bb.0: 7208; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7209; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7210; SKX-NEXT: retq # sched: [7:1.00] 7211 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 7212 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7213 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7214 ret <32 x i16> %res 7215} 7216define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7217; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5: 7218; GENERIC: # %bb.0: 7219; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7220; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7221; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7222; GENERIC-NEXT: retq # sched: [1:1.00] 7223; 7224; SKX-LABEL: test_masked_32xi16_perm_low_mask5: 7225; SKX: # %bb.0: 7226; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7227; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7228; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7229; SKX-NEXT: retq # sched: [7:1.00] 7230 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 7231 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7232 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7233 ret <32 x i16> %res 7234} 7235 7236define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { 7237; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5: 7238; GENERIC: # %bb.0: 7239; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7240; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7241; GENERIC-NEXT: retq # sched: [1:1.00] 7242; 7243; SKX-LABEL: test_masked_z_32xi16_perm_low_mask5: 7244; SKX: # %bb.0: 7245; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7246; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7247; SKX-NEXT: retq # sched: [7:1.00] 7248 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 7249 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7250 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7251 ret <32 x i16> %res 7252} 7253define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { 7254; GENERIC-LABEL: test_32xi16_perm_high_mask6: 7255; GENERIC: # %bb.0: 7256; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7257; GENERIC-NEXT: retq # sched: [1:1.00] 7258; 7259; SKX-LABEL: test_32xi16_perm_high_mask6: 7260; SKX: # %bb.0: 7261; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7262; SKX-NEXT: retq # sched: [7:1.00] 7263 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 7264 ret <32 x i16> %res 7265} 7266define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7267; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6: 7268; GENERIC: # %bb.0: 7269; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7270; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7271; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7272; GENERIC-NEXT: retq # sched: [1:1.00] 7273; 7274; SKX-LABEL: test_masked_32xi16_perm_high_mask6: 7275; SKX: # %bb.0: 7276; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7277; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7278; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7279; SKX-NEXT: retq # sched: [7:1.00] 7280 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 7281 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7282 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7283 ret <32 x i16> %res 7284} 7285 7286define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { 7287; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6: 7288; GENERIC: # %bb.0: 7289; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7290; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7291; GENERIC-NEXT: retq # sched: [1:1.00] 7292; 7293; SKX-LABEL: test_masked_z_32xi16_perm_high_mask6: 7294; SKX: # %bb.0: 7295; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7296; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7297; SKX-NEXT: retq # sched: [7:1.00] 7298 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 7299 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7300 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7301 ret <32 x i16> %res 7302} 7303define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7304; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7: 7305; GENERIC: # %bb.0: 7306; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7307; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7308; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7309; GENERIC-NEXT: retq # sched: [1:1.00] 7310; 7311; SKX-LABEL: test_masked_32xi16_perm_low_mask7: 7312; SKX: # %bb.0: 7313; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7314; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7315; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7316; SKX-NEXT: retq # sched: [7:1.00] 7317 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7318 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7319 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7320 ret <32 x i16> %res 7321} 7322 7323define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { 7324; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7: 7325; GENERIC: # %bb.0: 7326; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7327; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7328; GENERIC-NEXT: retq # sched: [1:1.00] 7329; 7330; SKX-LABEL: test_masked_z_32xi16_perm_low_mask7: 7331; SKX: # %bb.0: 7332; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7333; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7334; SKX-NEXT: retq # sched: [7:1.00] 7335 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7336 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7337 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7338 ret <32 x i16> %res 7339} 7340define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { 7341; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0: 7342; GENERIC: # %bb.0: 7343; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7344; GENERIC-NEXT: retq # sched: [1:1.00] 7345; 7346; SKX-LABEL: test_32xi16_perm_high_mem_mask0: 7347; SKX: # %bb.0: 7348; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7349; SKX-NEXT: retq # sched: [7:1.00] 7350 %vec = load <32 x i16>, <32 x i16>* %vp 7351 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 7352 ret <32 x i16> %res 7353} 7354define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7355; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: 7356; GENERIC: # %bb.0: 7357; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7358; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7359; GENERIC-NEXT: retq # sched: [1:1.00] 7360; 7361; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0: 7362; SKX: # %bb.0: 7363; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7364; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7365; SKX-NEXT: retq # sched: [7:1.00] 7366 %vec = load <32 x i16>, <32 x i16>* %vp 7367 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 7368 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7369 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7370 ret <32 x i16> %res 7371} 7372 7373define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 7374; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: 7375; GENERIC: # %bb.0: 7376; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7377; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7378; GENERIC-NEXT: retq # sched: [1:1.00] 7379; 7380; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: 7381; SKX: # %bb.0: 7382; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7383; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7384; SKX-NEXT: retq # sched: [7:1.00] 7385 %vec = load <32 x i16>, <32 x i16>* %vp 7386 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 7387 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7388 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7389 ret <32 x i16> %res 7390} 7391 7392define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7393; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: 7394; GENERIC: # %bb.0: 7395; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7396; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7397; GENERIC-NEXT: retq # sched: [1:1.00] 7398; 7399; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1: 7400; SKX: # %bb.0: 7401; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7402; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7403; SKX-NEXT: retq # sched: [7:1.00] 7404 %vec = load <32 x i16>, <32 x i16>* %vp 7405 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 7406 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7407 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7408 ret <32 x i16> %res 7409} 7410 7411define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 7412; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: 7413; GENERIC: # %bb.0: 7414; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7415; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7416; GENERIC-NEXT: retq # sched: [1:1.00] 7417; 7418; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: 7419; SKX: # %bb.0: 7420; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7421; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7422; SKX-NEXT: retq # sched: [7:1.00] 7423 %vec = load <32 x i16>, <32 x i16>* %vp 7424 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 7425 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7426 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7427 ret <32 x i16> %res 7428} 7429 7430define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7431; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: 7432; GENERIC: # %bb.0: 7433; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7434; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7435; GENERIC-NEXT: retq # sched: [1:1.00] 7436; 7437; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2: 7438; SKX: # %bb.0: 7439; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7440; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7441; SKX-NEXT: retq # sched: [7:1.00] 7442 %vec = load <32 x i16>, <32 x i16>* %vp 7443 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 7444 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7445 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7446 ret <32 x i16> %res 7447} 7448 7449define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 7450; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: 7451; GENERIC: # %bb.0: 7452; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7453; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7454; GENERIC-NEXT: retq # sched: [1:1.00] 7455; 7456; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: 7457; SKX: # %bb.0: 7458; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7459; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7460; SKX-NEXT: retq # sched: [7:1.00] 7461 %vec = load <32 x i16>, <32 x i16>* %vp 7462 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 7463 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7464 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7465 ret <32 x i16> %res 7466} 7467 7468define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { 7469; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3: 7470; GENERIC: # %bb.0: 7471; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7472; GENERIC-NEXT: retq # sched: [1:1.00] 7473; 7474; SKX-LABEL: test_32xi16_perm_low_mem_mask3: 7475; SKX: # %bb.0: 7476; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7477; SKX-NEXT: retq # sched: [7:1.00] 7478 %vec = load <32 x i16>, <32 x i16>* %vp 7479 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 7480 ret <32 x i16> %res 7481} 7482define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7483; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: 7484; GENERIC: # %bb.0: 7485; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7486; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7487; GENERIC-NEXT: retq # sched: [1:1.00] 7488; 7489; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3: 7490; SKX: # %bb.0: 7491; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7492; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7493; SKX-NEXT: retq # sched: [7:1.00] 7494 %vec = load <32 x i16>, <32 x i16>* %vp 7495 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 7496 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7497 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7498 ret <32 x i16> %res 7499} 7500 7501define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 7502; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: 7503; GENERIC: # %bb.0: 7504; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7505; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7506; GENERIC-NEXT: retq # sched: [1:1.00] 7507; 7508; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: 7509; SKX: # %bb.0: 7510; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7511; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7512; SKX-NEXT: retq # sched: [7:1.00] 7513 %vec = load <32 x i16>, <32 x i16>* %vp 7514 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 7515 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7516 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7517 ret <32 x i16> %res 7518} 7519 7520define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7521; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: 7522; GENERIC: # %bb.0: 7523; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7524; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7525; GENERIC-NEXT: retq # sched: [1:1.00] 7526; 7527; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4: 7528; SKX: # %bb.0: 7529; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7530; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7531; SKX-NEXT: retq # sched: [7:1.00] 7532 %vec = load <32 x i16>, <32 x i16>* %vp 7533 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 7534 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7535 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7536 ret <32 x i16> %res 7537} 7538 7539define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { 7540; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: 7541; GENERIC: # %bb.0: 7542; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7543; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7544; GENERIC-NEXT: retq # sched: [1:1.00] 7545; 7546; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: 7547; SKX: # %bb.0: 7548; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7549; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7550; SKX-NEXT: retq # sched: [7:1.00] 7551 %vec = load <32 x i16>, <32 x i16>* %vp 7552 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 7553 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7554 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7555 ret <32 x i16> %res 7556} 7557 7558define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7559; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: 7560; GENERIC: # %bb.0: 7561; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7562; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7563; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] 7564; GENERIC-NEXT: retq # sched: [1:1.00] 7565; 7566; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask5: 7567; SKX: # %bb.0: 7568; SKX-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7569; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7570; SKX-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.33] 7571; SKX-NEXT: retq # sched: [7:1.00] 7572 %vec = load <32 x i16>, <32 x i16>* %vp 7573 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 7574 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7575 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7576 ret <32 x i16> %res 7577} 7578 7579define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { 7580; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: 7581; GENERIC: # %bb.0: 7582; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7583; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7584; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 7585; GENERIC-NEXT: retq # sched: [1:1.00] 7586; 7587; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: 7588; SKX: # %bb.0: 7589; SKX-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7590; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7591; SKX-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] 7592; SKX-NEXT: retq # sched: [7:1.00] 7593 %vec = load <32 x i16>, <32 x i16>* %vp 7594 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 7595 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7596 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7597 ret <32 x i16> %res 7598} 7599 7600define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { 7601; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6: 7602; GENERIC: # %bb.0: 7603; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7604; GENERIC-NEXT: retq # sched: [1:1.00] 7605; 7606; SKX-LABEL: test_32xi16_perm_high_mem_mask6: 7607; SKX: # %bb.0: 7608; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7609; SKX-NEXT: retq # sched: [7:1.00] 7610 %vec = load <32 x i16>, <32 x i16>* %vp 7611 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 7612 ret <32 x i16> %res 7613} 7614define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7615; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: 7616; GENERIC: # %bb.0: 7617; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7618; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7619; GENERIC-NEXT: retq # sched: [1:1.00] 7620; 7621; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6: 7622; SKX: # %bb.0: 7623; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7624; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7625; SKX-NEXT: retq # sched: [7:1.00] 7626 %vec = load <32 x i16>, <32 x i16>* %vp 7627 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 7628 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7629 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7630 ret <32 x i16> %res 7631} 7632 7633define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { 7634; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: 7635; GENERIC: # %bb.0: 7636; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7637; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7638; GENERIC-NEXT: retq # sched: [1:1.00] 7639; 7640; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: 7641; SKX: # %bb.0: 7642; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7643; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7644; SKX-NEXT: retq # sched: [7:1.00] 7645 %vec = load <32 x i16>, <32 x i16>* %vp 7646 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 7647 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7648 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7649 ret <32 x i16> %res 7650} 7651 7652define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7653; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: 7654; GENERIC: # %bb.0: 7655; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7656; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7657; GENERIC-NEXT: retq # sched: [1:1.00] 7658; 7659; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7: 7660; SKX: # %bb.0: 7661; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7662; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7663; SKX-NEXT: retq # sched: [7:1.00] 7664 %vec = load <32 x i16>, <32 x i16>* %vp 7665 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7666 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7667 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7668 ret <32 x i16> %res 7669} 7670 7671define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { 7672; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: 7673; GENERIC: # %bb.0: 7674; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7675; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7676; GENERIC-NEXT: retq # sched: [1:1.00] 7677; 7678; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: 7679; SKX: # %bb.0: 7680; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7681; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7682; SKX-NEXT: retq # sched: [7:1.00] 7683 %vec = load <32 x i16>, <32 x i16>* %vp 7684 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7685 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7686 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7687 ret <32 x i16> %res 7688} 7689 7690define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { 7691; GENERIC-LABEL: test_4xi32_perm_mask0: 7692; GENERIC: # %bb.0: 7693; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] 7694; GENERIC-NEXT: retq # sched: [1:1.00] 7695; 7696; SKX-LABEL: test_4xi32_perm_mask0: 7697; SKX: # %bb.0: 7698; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] 7699; SKX-NEXT: retq # sched: [7:1.00] 7700 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 7701 ret <4 x i32> %res 7702} 7703define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7704; GENERIC-LABEL: test_masked_4xi32_perm_mask0: 7705; GENERIC: # %bb.0: 7706; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7707; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50] 7708; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7709; GENERIC-NEXT: retq # sched: [1:1.00] 7710; 7711; SKX-LABEL: test_masked_4xi32_perm_mask0: 7712; SKX: # %bb.0: 7713; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7714; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] 7715; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7716; SKX-NEXT: retq # sched: [7:1.00] 7717 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 7718 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7719 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7720 ret <4 x i32> %res 7721} 7722 7723define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { 7724; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0: 7725; GENERIC: # %bb.0: 7726; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7727; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50] 7728; GENERIC-NEXT: retq # sched: [1:1.00] 7729; 7730; SKX-LABEL: test_masked_z_4xi32_perm_mask0: 7731; SKX: # %bb.0: 7732; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7733; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] 7734; SKX-NEXT: retq # sched: [7:1.00] 7735 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 7736 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7737 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7738 ret <4 x i32> %res 7739} 7740define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7741; GENERIC-LABEL: test_masked_4xi32_perm_mask1: 7742; GENERIC: # %bb.0: 7743; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7744; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50] 7745; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7746; GENERIC-NEXT: retq # sched: [1:1.00] 7747; 7748; SKX-LABEL: test_masked_4xi32_perm_mask1: 7749; SKX: # %bb.0: 7750; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7751; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] 7752; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7753; SKX-NEXT: retq # sched: [7:1.00] 7754 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 7755 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7756 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7757 ret <4 x i32> %res 7758} 7759 7760define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { 7761; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1: 7762; GENERIC: # %bb.0: 7763; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7764; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50] 7765; GENERIC-NEXT: retq # sched: [1:1.00] 7766; 7767; SKX-LABEL: test_masked_z_4xi32_perm_mask1: 7768; SKX: # %bb.0: 7769; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7770; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] 7771; SKX-NEXT: retq # sched: [7:1.00] 7772 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 7773 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7774 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7775 ret <4 x i32> %res 7776} 7777define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7778; GENERIC-LABEL: test_masked_4xi32_perm_mask2: 7779; GENERIC: # %bb.0: 7780; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7781; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50] 7782; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7783; GENERIC-NEXT: retq # sched: [1:1.00] 7784; 7785; SKX-LABEL: test_masked_4xi32_perm_mask2: 7786; SKX: # %bb.0: 7787; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7788; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] 7789; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7790; SKX-NEXT: retq # sched: [7:1.00] 7791 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 7792 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7793 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7794 ret <4 x i32> %res 7795} 7796 7797define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { 7798; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2: 7799; GENERIC: # %bb.0: 7800; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7801; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50] 7802; GENERIC-NEXT: retq # sched: [1:1.00] 7803; 7804; SKX-LABEL: test_masked_z_4xi32_perm_mask2: 7805; SKX: # %bb.0: 7806; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7807; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] 7808; SKX-NEXT: retq # sched: [7:1.00] 7809 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 7810 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7811 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7812 ret <4 x i32> %res 7813} 7814define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { 7815; GENERIC-LABEL: test_4xi32_perm_mask3: 7816; GENERIC: # %bb.0: 7817; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] 7818; GENERIC-NEXT: retq # sched: [1:1.00] 7819; 7820; SKX-LABEL: test_4xi32_perm_mask3: 7821; SKX: # %bb.0: 7822; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] 7823; SKX-NEXT: retq # sched: [7:1.00] 7824 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 7825 ret <4 x i32> %res 7826} 7827define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7828; GENERIC-LABEL: test_masked_4xi32_perm_mask3: 7829; GENERIC: # %bb.0: 7830; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7831; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50] 7832; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7833; GENERIC-NEXT: retq # sched: [1:1.00] 7834; 7835; SKX-LABEL: test_masked_4xi32_perm_mask3: 7836; SKX: # %bb.0: 7837; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7838; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] 7839; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7840; SKX-NEXT: retq # sched: [7:1.00] 7841 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 7842 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7843 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7844 ret <4 x i32> %res 7845} 7846 7847define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { 7848; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3: 7849; GENERIC: # %bb.0: 7850; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7851; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50] 7852; GENERIC-NEXT: retq # sched: [1:1.00] 7853; 7854; SKX-LABEL: test_masked_z_4xi32_perm_mask3: 7855; SKX: # %bb.0: 7856; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7857; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] 7858; SKX-NEXT: retq # sched: [7:1.00] 7859 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 7860 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7861 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7862 ret <4 x i32> %res 7863} 7864define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { 7865; GENERIC-LABEL: test_4xi32_perm_mem_mask0: 7866; GENERIC: # %bb.0: 7867; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] 7868; GENERIC-NEXT: retq # sched: [1:1.00] 7869; 7870; SKX-LABEL: test_4xi32_perm_mem_mask0: 7871; SKX: # %bb.0: 7872; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] 7873; SKX-NEXT: retq # sched: [7:1.00] 7874 %vec = load <4 x i32>, <4 x i32>* %vp 7875 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 7876 ret <4 x i32> %res 7877} 7878define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 7879; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: 7880; GENERIC: # %bb.0: 7881; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7882; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50] 7883; GENERIC-NEXT: retq # sched: [1:1.00] 7884; 7885; SKX-LABEL: test_masked_4xi32_perm_mem_mask0: 7886; SKX: # %bb.0: 7887; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7888; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:1.00] 7889; SKX-NEXT: retq # sched: [7:1.00] 7890 %vec = load <4 x i32>, <4 x i32>* %vp 7891 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 7892 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7893 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7894 ret <4 x i32> %res 7895} 7896 7897define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { 7898; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: 7899; GENERIC: # %bb.0: 7900; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 7901; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50] 7902; GENERIC-NEXT: retq # sched: [1:1.00] 7903; 7904; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0: 7905; SKX: # %bb.0: 7906; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 7907; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:1.00] 7908; SKX-NEXT: retq # sched: [7:1.00] 7909 %vec = load <4 x i32>, <4 x i32>* %vp 7910 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 7911 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7912 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7913 ret <4 x i32> %res 7914} 7915 7916define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 7917; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: 7918; GENERIC: # %bb.0: 7919; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7920; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50] 7921; GENERIC-NEXT: retq # sched: [1:1.00] 7922; 7923; SKX-LABEL: test_masked_4xi32_perm_mem_mask1: 7924; SKX: # %bb.0: 7925; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7926; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:1.00] 7927; SKX-NEXT: retq # sched: [7:1.00] 7928 %vec = load <4 x i32>, <4 x i32>* %vp 7929 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 7930 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7931 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7932 ret <4 x i32> %res 7933} 7934 7935define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { 7936; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: 7937; GENERIC: # %bb.0: 7938; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 7939; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50] 7940; GENERIC-NEXT: retq # sched: [1:1.00] 7941; 7942; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1: 7943; SKX: # %bb.0: 7944; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 7945; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:1.00] 7946; SKX-NEXT: retq # sched: [7:1.00] 7947 %vec = load <4 x i32>, <4 x i32>* %vp 7948 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 7949 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7950 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7951 ret <4 x i32> %res 7952} 7953 7954define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 7955; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: 7956; GENERIC: # %bb.0: 7957; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7958; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50] 7959; GENERIC-NEXT: retq # sched: [1:1.00] 7960; 7961; SKX-LABEL: test_masked_4xi32_perm_mem_mask2: 7962; SKX: # %bb.0: 7963; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7964; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:1.00] 7965; SKX-NEXT: retq # sched: [7:1.00] 7966 %vec = load <4 x i32>, <4 x i32>* %vp 7967 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 7968 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7969 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7970 ret <4 x i32> %res 7971} 7972 7973define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { 7974; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: 7975; GENERIC: # %bb.0: 7976; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 7977; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50] 7978; GENERIC-NEXT: retq # sched: [1:1.00] 7979; 7980; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2: 7981; SKX: # %bb.0: 7982; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 7983; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:1.00] 7984; SKX-NEXT: retq # sched: [7:1.00] 7985 %vec = load <4 x i32>, <4 x i32>* %vp 7986 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 7987 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7988 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7989 ret <4 x i32> %res 7990} 7991 7992define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { 7993; GENERIC-LABEL: test_4xi32_perm_mem_mask3: 7994; GENERIC: # %bb.0: 7995; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] 7996; GENERIC-NEXT: retq # sched: [1:1.00] 7997; 7998; SKX-LABEL: test_4xi32_perm_mem_mask3: 7999; SKX: # %bb.0: 8000; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] 8001; SKX-NEXT: retq # sched: [7:1.00] 8002 %vec = load <4 x i32>, <4 x i32>* %vp 8003 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 8004 ret <4 x i32> %res 8005} 8006define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 8007; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: 8008; GENERIC: # %bb.0: 8009; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 8010; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50] 8011; GENERIC-NEXT: retq # sched: [1:1.00] 8012; 8013; SKX-LABEL: test_masked_4xi32_perm_mem_mask3: 8014; SKX: # %bb.0: 8015; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 8016; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:1.00] 8017; SKX-NEXT: retq # sched: [7:1.00] 8018 %vec = load <4 x i32>, <4 x i32>* %vp 8019 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 8020 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 8021 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 8022 ret <4 x i32> %res 8023} 8024 8025define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { 8026; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: 8027; GENERIC: # %bb.0: 8028; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 8029; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50] 8030; GENERIC-NEXT: retq # sched: [1:1.00] 8031; 8032; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3: 8033; SKX: # %bb.0: 8034; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 8035; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:1.00] 8036; SKX-NEXT: retq # sched: [7:1.00] 8037 %vec = load <4 x i32>, <4 x i32>* %vp 8038 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 8039 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 8040 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 8041 ret <4 x i32> %res 8042} 8043 8044define <8 x i32> @test2_8xi32_perm_mask0(<8 x i32> %vec) { 8045; GENERIC-LABEL: test2_8xi32_perm_mask0: 8046; GENERIC: # %bb.0: 8047; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8048; GENERIC-NEXT: retq # sched: [1:1.00] 8049; 8050; SKX-LABEL: test2_8xi32_perm_mask0: 8051; SKX: # %bb.0: 8052; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8053; SKX-NEXT: retq # sched: [7:1.00] 8054 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 8055 ret <8 x i32> %res 8056} 8057define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8058; GENERIC-LABEL: test2_masked_8xi32_perm_mask0: 8059; GENERIC: # %bb.0: 8060; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8061; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8062; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8063; GENERIC-NEXT: retq # sched: [1:1.00] 8064; 8065; SKX-LABEL: test2_masked_8xi32_perm_mask0: 8066; SKX: # %bb.0: 8067; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8068; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8069; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8070; SKX-NEXT: retq # sched: [7:1.00] 8071 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 8072 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8073 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8074 ret <8 x i32> %res 8075} 8076 8077define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 8078; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0: 8079; GENERIC: # %bb.0: 8080; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8081; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8082; GENERIC-NEXT: retq # sched: [1:1.00] 8083; 8084; SKX-LABEL: test2_masked_z_8xi32_perm_mask0: 8085; SKX: # %bb.0: 8086; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8087; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8088; SKX-NEXT: retq # sched: [7:1.00] 8089 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 8090 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8091 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8092 ret <8 x i32> %res 8093} 8094define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8095; GENERIC-LABEL: test2_masked_8xi32_perm_mask1: 8096; GENERIC: # %bb.0: 8097; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8098; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8099; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8100; GENERIC-NEXT: retq # sched: [1:1.00] 8101; 8102; SKX-LABEL: test2_masked_8xi32_perm_mask1: 8103; SKX: # %bb.0: 8104; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8105; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8106; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8107; SKX-NEXT: retq # sched: [7:1.00] 8108 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 8109 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8110 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8111 ret <8 x i32> %res 8112} 8113 8114define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 8115; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1: 8116; GENERIC: # %bb.0: 8117; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8118; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8119; GENERIC-NEXT: retq # sched: [1:1.00] 8120; 8121; SKX-LABEL: test2_masked_z_8xi32_perm_mask1: 8122; SKX: # %bb.0: 8123; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8124; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8125; SKX-NEXT: retq # sched: [7:1.00] 8126 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 8127 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8128 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8129 ret <8 x i32> %res 8130} 8131define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8132; GENERIC-LABEL: test2_masked_8xi32_perm_mask2: 8133; GENERIC: # %bb.0: 8134; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8135; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8136; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8137; GENERIC-NEXT: retq # sched: [1:1.00] 8138; 8139; SKX-LABEL: test2_masked_8xi32_perm_mask2: 8140; SKX: # %bb.0: 8141; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8142; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8143; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8144; SKX-NEXT: retq # sched: [7:1.00] 8145 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 8146 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8147 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8148 ret <8 x i32> %res 8149} 8150 8151define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 8152; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2: 8153; GENERIC: # %bb.0: 8154; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8155; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8156; GENERIC-NEXT: retq # sched: [1:1.00] 8157; 8158; SKX-LABEL: test2_masked_z_8xi32_perm_mask2: 8159; SKX: # %bb.0: 8160; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8161; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8162; SKX-NEXT: retq # sched: [7:1.00] 8163 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 8164 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8165 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8166 ret <8 x i32> %res 8167} 8168define <8 x i32> @test2_8xi32_perm_mask3(<8 x i32> %vec) { 8169; GENERIC-LABEL: test2_8xi32_perm_mask3: 8170; GENERIC: # %bb.0: 8171; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8172; GENERIC-NEXT: retq # sched: [1:1.00] 8173; 8174; SKX-LABEL: test2_8xi32_perm_mask3: 8175; SKX: # %bb.0: 8176; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8177; SKX-NEXT: retq # sched: [7:1.00] 8178 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 8179 ret <8 x i32> %res 8180} 8181define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8182; GENERIC-LABEL: test2_masked_8xi32_perm_mask3: 8183; GENERIC: # %bb.0: 8184; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8185; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8186; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8187; GENERIC-NEXT: retq # sched: [1:1.00] 8188; 8189; SKX-LABEL: test2_masked_8xi32_perm_mask3: 8190; SKX: # %bb.0: 8191; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8192; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8193; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8194; SKX-NEXT: retq # sched: [7:1.00] 8195 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 8196 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8197 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8198 ret <8 x i32> %res 8199} 8200 8201define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 8202; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3: 8203; GENERIC: # %bb.0: 8204; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8205; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8206; GENERIC-NEXT: retq # sched: [1:1.00] 8207; 8208; SKX-LABEL: test2_masked_z_8xi32_perm_mask3: 8209; SKX: # %bb.0: 8210; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8211; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8212; SKX-NEXT: retq # sched: [7:1.00] 8213 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 8214 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8215 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8216 ret <8 x i32> %res 8217} 8218define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 8219; GENERIC-LABEL: test2_8xi32_perm_mem_mask0: 8220; GENERIC: # %bb.0: 8221; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8222; GENERIC-NEXT: retq # sched: [1:1.00] 8223; 8224; SKX-LABEL: test2_8xi32_perm_mem_mask0: 8225; SKX: # %bb.0: 8226; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8227; SKX-NEXT: retq # sched: [7:1.00] 8228 %vec = load <8 x i32>, <8 x i32>* %vp 8229 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 8230 ret <8 x i32> %res 8231} 8232define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8233; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0: 8234; GENERIC: # %bb.0: 8235; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8236; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8237; GENERIC-NEXT: retq # sched: [1:1.00] 8238; 8239; SKX-LABEL: test2_masked_8xi32_perm_mem_mask0: 8240; SKX: # %bb.0: 8241; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8242; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8243; SKX-NEXT: retq # sched: [7:1.00] 8244 %vec = load <8 x i32>, <8 x i32>* %vp 8245 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 8246 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8247 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8248 ret <8 x i32> %res 8249} 8250 8251define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 8252; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0: 8253; GENERIC: # %bb.0: 8254; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8255; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8256; GENERIC-NEXT: retq # sched: [1:1.00] 8257; 8258; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask0: 8259; SKX: # %bb.0: 8260; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8261; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8262; SKX-NEXT: retq # sched: [7:1.00] 8263 %vec = load <8 x i32>, <8 x i32>* %vp 8264 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 8265 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8266 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8267 ret <8 x i32> %res 8268} 8269 8270define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8271; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1: 8272; GENERIC: # %bb.0: 8273; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8274; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8275; GENERIC-NEXT: retq # sched: [1:1.00] 8276; 8277; SKX-LABEL: test2_masked_8xi32_perm_mem_mask1: 8278; SKX: # %bb.0: 8279; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8280; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8281; SKX-NEXT: retq # sched: [7:1.00] 8282 %vec = load <8 x i32>, <8 x i32>* %vp 8283 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 8284 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8285 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8286 ret <8 x i32> %res 8287} 8288 8289define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 8290; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1: 8291; GENERIC: # %bb.0: 8292; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8293; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8294; GENERIC-NEXT: retq # sched: [1:1.00] 8295; 8296; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask1: 8297; SKX: # %bb.0: 8298; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8299; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8300; SKX-NEXT: retq # sched: [7:1.00] 8301 %vec = load <8 x i32>, <8 x i32>* %vp 8302 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 8303 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8304 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8305 ret <8 x i32> %res 8306} 8307 8308define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8309; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2: 8310; GENERIC: # %bb.0: 8311; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8312; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8313; GENERIC-NEXT: retq # sched: [1:1.00] 8314; 8315; SKX-LABEL: test2_masked_8xi32_perm_mem_mask2: 8316; SKX: # %bb.0: 8317; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8318; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8319; SKX-NEXT: retq # sched: [7:1.00] 8320 %vec = load <8 x i32>, <8 x i32>* %vp 8321 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 8322 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8323 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8324 ret <8 x i32> %res 8325} 8326 8327define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 8328; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2: 8329; GENERIC: # %bb.0: 8330; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8331; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8332; GENERIC-NEXT: retq # sched: [1:1.00] 8333; 8334; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask2: 8335; SKX: # %bb.0: 8336; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8337; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8338; SKX-NEXT: retq # sched: [7:1.00] 8339 %vec = load <8 x i32>, <8 x i32>* %vp 8340 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 8341 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8342 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8343 ret <8 x i32> %res 8344} 8345 8346define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 8347; GENERIC-LABEL: test2_8xi32_perm_mem_mask3: 8348; GENERIC: # %bb.0: 8349; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8350; GENERIC-NEXT: retq # sched: [1:1.00] 8351; 8352; SKX-LABEL: test2_8xi32_perm_mem_mask3: 8353; SKX: # %bb.0: 8354; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8355; SKX-NEXT: retq # sched: [7:1.00] 8356 %vec = load <8 x i32>, <8 x i32>* %vp 8357 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 8358 ret <8 x i32> %res 8359} 8360define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8361; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3: 8362; GENERIC: # %bb.0: 8363; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8364; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8365; GENERIC-NEXT: retq # sched: [1:1.00] 8366; 8367; SKX-LABEL: test2_masked_8xi32_perm_mem_mask3: 8368; SKX: # %bb.0: 8369; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8370; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8371; SKX-NEXT: retq # sched: [7:1.00] 8372 %vec = load <8 x i32>, <8 x i32>* %vp 8373 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 8374 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8375 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8376 ret <8 x i32> %res 8377} 8378 8379define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 8380; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3: 8381; GENERIC: # %bb.0: 8382; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8383; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8384; GENERIC-NEXT: retq # sched: [1:1.00] 8385; 8386; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask3: 8387; SKX: # %bb.0: 8388; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8389; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8390; SKX-NEXT: retq # sched: [7:1.00] 8391 %vec = load <8 x i32>, <8 x i32>* %vp 8392 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 8393 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8394 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8395 ret <8 x i32> %res 8396} 8397 8398define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { 8399; GENERIC-LABEL: test2_16xi32_perm_mask0: 8400; GENERIC: # %bb.0: 8401; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8402; GENERIC-NEXT: retq # sched: [1:1.00] 8403; 8404; SKX-LABEL: test2_16xi32_perm_mask0: 8405; SKX: # %bb.0: 8406; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8407; SKX-NEXT: retq # sched: [7:1.00] 8408 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 8409 ret <16 x i32> %res 8410} 8411define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8412; GENERIC-LABEL: test2_masked_16xi32_perm_mask0: 8413; GENERIC: # %bb.0: 8414; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8415; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8416; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8417; GENERIC-NEXT: retq # sched: [1:1.00] 8418; 8419; SKX-LABEL: test2_masked_16xi32_perm_mask0: 8420; SKX: # %bb.0: 8421; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8422; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8423; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8424; SKX-NEXT: retq # sched: [7:1.00] 8425 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 8426 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8427 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8428 ret <16 x i32> %res 8429} 8430 8431define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 8432; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0: 8433; GENERIC: # %bb.0: 8434; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8435; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8436; GENERIC-NEXT: retq # sched: [1:1.00] 8437; 8438; SKX-LABEL: test2_masked_z_16xi32_perm_mask0: 8439; SKX: # %bb.0: 8440; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8441; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8442; SKX-NEXT: retq # sched: [7:1.00] 8443 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 8444 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8445 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8446 ret <16 x i32> %res 8447} 8448define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8449; GENERIC-LABEL: test2_masked_16xi32_perm_mask1: 8450; GENERIC: # %bb.0: 8451; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8452; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8453; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8454; GENERIC-NEXT: retq # sched: [1:1.00] 8455; 8456; SKX-LABEL: test2_masked_16xi32_perm_mask1: 8457; SKX: # %bb.0: 8458; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8459; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8460; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8461; SKX-NEXT: retq # sched: [7:1.00] 8462 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 8463 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8464 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8465 ret <16 x i32> %res 8466} 8467 8468define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 8469; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1: 8470; GENERIC: # %bb.0: 8471; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8472; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8473; GENERIC-NEXT: retq # sched: [1:1.00] 8474; 8475; SKX-LABEL: test2_masked_z_16xi32_perm_mask1: 8476; SKX: # %bb.0: 8477; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8478; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8479; SKX-NEXT: retq # sched: [7:1.00] 8480 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 8481 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8482 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8483 ret <16 x i32> %res 8484} 8485define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8486; GENERIC-LABEL: test2_masked_16xi32_perm_mask2: 8487; GENERIC: # %bb.0: 8488; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8489; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8490; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8491; GENERIC-NEXT: retq # sched: [1:1.00] 8492; 8493; SKX-LABEL: test2_masked_16xi32_perm_mask2: 8494; SKX: # %bb.0: 8495; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8496; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8497; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8498; SKX-NEXT: retq # sched: [7:1.00] 8499 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 8500 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8501 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8502 ret <16 x i32> %res 8503} 8504 8505define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 8506; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2: 8507; GENERIC: # %bb.0: 8508; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8509; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8510; GENERIC-NEXT: retq # sched: [1:1.00] 8511; 8512; SKX-LABEL: test2_masked_z_16xi32_perm_mask2: 8513; SKX: # %bb.0: 8514; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8515; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8516; SKX-NEXT: retq # sched: [7:1.00] 8517 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 8518 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8519 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8520 ret <16 x i32> %res 8521} 8522define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { 8523; GENERIC-LABEL: test2_16xi32_perm_mask3: 8524; GENERIC: # %bb.0: 8525; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8526; GENERIC-NEXT: retq # sched: [1:1.00] 8527; 8528; SKX-LABEL: test2_16xi32_perm_mask3: 8529; SKX: # %bb.0: 8530; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8531; SKX-NEXT: retq # sched: [7:1.00] 8532 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 8533 ret <16 x i32> %res 8534} 8535define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8536; GENERIC-LABEL: test2_masked_16xi32_perm_mask3: 8537; GENERIC: # %bb.0: 8538; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8539; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8540; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8541; GENERIC-NEXT: retq # sched: [1:1.00] 8542; 8543; SKX-LABEL: test2_masked_16xi32_perm_mask3: 8544; SKX: # %bb.0: 8545; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8546; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8547; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8548; SKX-NEXT: retq # sched: [7:1.00] 8549 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 8550 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8551 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8552 ret <16 x i32> %res 8553} 8554 8555define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 8556; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3: 8557; GENERIC: # %bb.0: 8558; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8559; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8560; GENERIC-NEXT: retq # sched: [1:1.00] 8561; 8562; SKX-LABEL: test2_masked_z_16xi32_perm_mask3: 8563; SKX: # %bb.0: 8564; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8565; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8566; SKX-NEXT: retq # sched: [7:1.00] 8567 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 8568 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8569 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8570 ret <16 x i32> %res 8571} 8572define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 8573; GENERIC-LABEL: test2_16xi32_perm_mem_mask0: 8574; GENERIC: # %bb.0: 8575; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8576; GENERIC-NEXT: retq # sched: [1:1.00] 8577; 8578; SKX-LABEL: test2_16xi32_perm_mem_mask0: 8579; SKX: # %bb.0: 8580; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8581; SKX-NEXT: retq # sched: [7:1.00] 8582 %vec = load <16 x i32>, <16 x i32>* %vp 8583 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 8584 ret <16 x i32> %res 8585} 8586define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8587; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: 8588; GENERIC: # %bb.0: 8589; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8590; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8591; GENERIC-NEXT: retq # sched: [1:1.00] 8592; 8593; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0: 8594; SKX: # %bb.0: 8595; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8596; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8597; SKX-NEXT: retq # sched: [7:1.00] 8598 %vec = load <16 x i32>, <16 x i32>* %vp 8599 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 8600 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8601 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8602 ret <16 x i32> %res 8603} 8604 8605define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 8606; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: 8607; GENERIC: # %bb.0: 8608; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8609; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8610; GENERIC-NEXT: retq # sched: [1:1.00] 8611; 8612; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0: 8613; SKX: # %bb.0: 8614; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8615; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8616; SKX-NEXT: retq # sched: [7:1.00] 8617 %vec = load <16 x i32>, <16 x i32>* %vp 8618 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 8619 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8620 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8621 ret <16 x i32> %res 8622} 8623 8624define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8625; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: 8626; GENERIC: # %bb.0: 8627; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8628; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8629; GENERIC-NEXT: retq # sched: [1:1.00] 8630; 8631; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1: 8632; SKX: # %bb.0: 8633; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8634; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8635; SKX-NEXT: retq # sched: [7:1.00] 8636 %vec = load <16 x i32>, <16 x i32>* %vp 8637 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 8638 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8639 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8640 ret <16 x i32> %res 8641} 8642 8643define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 8644; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: 8645; GENERIC: # %bb.0: 8646; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8647; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8648; GENERIC-NEXT: retq # sched: [1:1.00] 8649; 8650; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1: 8651; SKX: # %bb.0: 8652; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8653; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8654; SKX-NEXT: retq # sched: [7:1.00] 8655 %vec = load <16 x i32>, <16 x i32>* %vp 8656 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 8657 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8658 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8659 ret <16 x i32> %res 8660} 8661 8662define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8663; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: 8664; GENERIC: # %bb.0: 8665; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8666; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8667; GENERIC-NEXT: retq # sched: [1:1.00] 8668; 8669; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2: 8670; SKX: # %bb.0: 8671; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8672; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8673; SKX-NEXT: retq # sched: [7:1.00] 8674 %vec = load <16 x i32>, <16 x i32>* %vp 8675 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 8676 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8677 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8678 ret <16 x i32> %res 8679} 8680 8681define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 8682; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: 8683; GENERIC: # %bb.0: 8684; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8685; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8686; GENERIC-NEXT: retq # sched: [1:1.00] 8687; 8688; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2: 8689; SKX: # %bb.0: 8690; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8691; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8692; SKX-NEXT: retq # sched: [7:1.00] 8693 %vec = load <16 x i32>, <16 x i32>* %vp 8694 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 8695 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8696 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8697 ret <16 x i32> %res 8698} 8699 8700define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 8701; GENERIC-LABEL: test2_16xi32_perm_mem_mask3: 8702; GENERIC: # %bb.0: 8703; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8704; GENERIC-NEXT: retq # sched: [1:1.00] 8705; 8706; SKX-LABEL: test2_16xi32_perm_mem_mask3: 8707; SKX: # %bb.0: 8708; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8709; SKX-NEXT: retq # sched: [7:1.00] 8710 %vec = load <16 x i32>, <16 x i32>* %vp 8711 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 8712 ret <16 x i32> %res 8713} 8714define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8715; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: 8716; GENERIC: # %bb.0: 8717; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8718; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8719; GENERIC-NEXT: retq # sched: [1:1.00] 8720; 8721; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3: 8722; SKX: # %bb.0: 8723; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8724; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8725; SKX-NEXT: retq # sched: [7:1.00] 8726 %vec = load <16 x i32>, <16 x i32>* %vp 8727 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 8728 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8729 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8730 ret <16 x i32> %res 8731} 8732 8733define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 8734; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: 8735; GENERIC: # %bb.0: 8736; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8737; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8738; GENERIC-NEXT: retq # sched: [1:1.00] 8739; 8740; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3: 8741; SKX: # %bb.0: 8742; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8743; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8744; SKX-NEXT: retq # sched: [7:1.00] 8745 %vec = load <16 x i32>, <16 x i32>* %vp 8746 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 8747 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8748 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8749 ret <16 x i32> %res 8750} 8751 8752define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) { 8753; GENERIC-LABEL: test2_8xfloat_shuff_mask0: 8754; GENERIC: # %bb.0: 8755; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 8756; GENERIC-NEXT: retq # sched: [1:1.00] 8757; 8758; SKX-LABEL: test2_8xfloat_shuff_mask0: 8759; SKX: # %bb.0: 8760; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 8761; SKX-NEXT: retq # sched: [7:1.00] 8762 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8763 ret <8 x float> %res 8764} 8765define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8766; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0: 8767; GENERIC: # %bb.0: 8768; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8769; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8770; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8771; GENERIC-NEXT: retq # sched: [1:1.00] 8772; 8773; SKX-LABEL: test2_8xfloat_masked_shuff_mask0: 8774; SKX: # %bb.0: 8775; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8776; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8777; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8778; SKX-NEXT: retq # sched: [7:1.00] 8779 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8780 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8781 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8782 ret <8 x float> %res 8783} 8784 8785define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8786; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0: 8787; GENERIC: # %bb.0: 8788; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8789; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8790; GENERIC-NEXT: retq # sched: [1:1.00] 8791; 8792; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask0: 8793; SKX: # %bb.0: 8794; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8795; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8796; SKX-NEXT: retq # sched: [7:1.00] 8797 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8798 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8799 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8800 ret <8 x float> %res 8801} 8802define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8803; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1: 8804; GENERIC: # %bb.0: 8805; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8806; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8807; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8808; GENERIC-NEXT: retq # sched: [1:1.00] 8809; 8810; SKX-LABEL: test2_8xfloat_masked_shuff_mask1: 8811; SKX: # %bb.0: 8812; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8813; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8814; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8815; SKX-NEXT: retq # sched: [7:1.00] 8816 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8817 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8818 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8819 ret <8 x float> %res 8820} 8821 8822define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8823; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1: 8824; GENERIC: # %bb.0: 8825; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8826; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8827; GENERIC-NEXT: retq # sched: [1:1.00] 8828; 8829; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask1: 8830; SKX: # %bb.0: 8831; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8832; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8833; SKX-NEXT: retq # sched: [7:1.00] 8834 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8835 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8836 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8837 ret <8 x float> %res 8838} 8839define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8840; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2: 8841; GENERIC: # %bb.0: 8842; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8843; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 8844; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8845; GENERIC-NEXT: retq # sched: [1:1.00] 8846; 8847; SKX-LABEL: test2_8xfloat_masked_shuff_mask2: 8848; SKX: # %bb.0: 8849; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8850; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 8851; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8852; SKX-NEXT: retq # sched: [7:1.00] 8853 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8854 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8855 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8856 ret <8 x float> %res 8857} 8858 8859define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8860; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2: 8861; GENERIC: # %bb.0: 8862; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8863; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 8864; GENERIC-NEXT: retq # sched: [1:1.00] 8865; 8866; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask2: 8867; SKX: # %bb.0: 8868; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8869; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 8870; SKX-NEXT: retq # sched: [7:1.00] 8871 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8872 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8873 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8874 ret <8 x float> %res 8875} 8876define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) { 8877; GENERIC-LABEL: test2_8xfloat_shuff_mask3: 8878; GENERIC: # %bb.0: 8879; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 8880; GENERIC-NEXT: retq # sched: [1:1.00] 8881; 8882; SKX-LABEL: test2_8xfloat_shuff_mask3: 8883; SKX: # %bb.0: 8884; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 8885; SKX-NEXT: retq # sched: [7:1.00] 8886 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8887 ret <8 x float> %res 8888} 8889define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8890; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3: 8891; GENERIC: # %bb.0: 8892; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8893; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8894; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8895; GENERIC-NEXT: retq # sched: [1:1.00] 8896; 8897; SKX-LABEL: test2_8xfloat_masked_shuff_mask3: 8898; SKX: # %bb.0: 8899; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8900; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8901; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8902; SKX-NEXT: retq # sched: [7:1.00] 8903 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8904 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8905 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8906 ret <8 x float> %res 8907} 8908 8909define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8910; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3: 8911; GENERIC: # %bb.0: 8912; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8913; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8914; GENERIC-NEXT: retq # sched: [1:1.00] 8915; 8916; SKX-LABEL: test_8xfloat_zero_masked_shuff_mask3: 8917; SKX: # %bb.0: 8918; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8919; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8920; SKX-NEXT: retq # sched: [7:1.00] 8921 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8922 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8923 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8924 ret <8 x float> %res 8925} 8926define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 8927; GENERIC-LABEL: test_8xfloat_shuff_mem_mask0: 8928; GENERIC: # %bb.0: 8929; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 8930; GENERIC-NEXT: retq # sched: [1:1.00] 8931; 8932; SKX-LABEL: test_8xfloat_shuff_mem_mask0: 8933; SKX: # %bb.0: 8934; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 8935; SKX-NEXT: retq # sched: [7:1.00] 8936 %vec2 = load <8 x float>, <8 x float>* %vec2p 8937 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8938 ret <8 x float> %res 8939} 8940define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 8941; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: 8942; GENERIC: # %bb.0: 8943; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8944; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 8945; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 8946; GENERIC-NEXT: retq # sched: [1:1.00] 8947; 8948; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask0: 8949; SKX: # %bb.0: 8950; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8951; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 8952; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 8953; SKX-NEXT: retq # sched: [7:1.00] 8954 %vec2 = load <8 x float>, <8 x float>* %vec2p 8955 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8956 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8957 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8958 ret <8 x float> %res 8959} 8960 8961define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 8962; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: 8963; GENERIC: # %bb.0: 8964; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8965; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 8966; GENERIC-NEXT: retq # sched: [1:1.00] 8967; 8968; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: 8969; SKX: # %bb.0: 8970; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8971; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 8972; SKX-NEXT: retq # sched: [7:1.00] 8973 %vec2 = load <8 x float>, <8 x float>* %vec2p 8974 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8975 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8976 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8977 ret <8 x float> %res 8978} 8979 8980define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 8981; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: 8982; GENERIC: # %bb.0: 8983; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8984; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 8985; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 8986; GENERIC-NEXT: retq # sched: [1:1.00] 8987; 8988; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask1: 8989; SKX: # %bb.0: 8990; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8991; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 8992; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 8993; SKX-NEXT: retq # sched: [7:1.00] 8994 %vec2 = load <8 x float>, <8 x float>* %vec2p 8995 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8996 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8997 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8998 ret <8 x float> %res 8999} 9000 9001define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 9002; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: 9003; GENERIC: # %bb.0: 9004; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 9005; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 9006; GENERIC-NEXT: retq # sched: [1:1.00] 9007; 9008; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: 9009; SKX: # %bb.0: 9010; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 9011; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 9012; SKX-NEXT: retq # sched: [7:1.00] 9013 %vec2 = load <8 x float>, <8 x float>* %vec2p 9014 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 9015 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9016 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 9017 ret <8 x float> %res 9018} 9019 9020define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 9021; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: 9022; GENERIC: # %bb.0: 9023; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 9024; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9025; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 9026; GENERIC-NEXT: retq # sched: [1:1.00] 9027; 9028; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask2: 9029; SKX: # %bb.0: 9030; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 9031; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9032; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 9033; SKX-NEXT: retq # sched: [7:1.00] 9034 %vec2 = load <8 x float>, <8 x float>* %vec2p 9035 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9036 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9037 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 9038 ret <8 x float> %res 9039} 9040 9041define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 9042; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: 9043; GENERIC: # %bb.0: 9044; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 9045; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9046; GENERIC-NEXT: retq # sched: [1:1.00] 9047; 9048; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: 9049; SKX: # %bb.0: 9050; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 9051; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9052; SKX-NEXT: retq # sched: [7:1.00] 9053 %vec2 = load <8 x float>, <8 x float>* %vec2p 9054 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9055 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9056 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 9057 ret <8 x float> %res 9058} 9059 9060define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 9061; GENERIC-LABEL: test_8xfloat_shuff_mem_mask3: 9062; GENERIC: # %bb.0: 9063; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 9064; GENERIC-NEXT: retq # sched: [1:1.00] 9065; 9066; SKX-LABEL: test_8xfloat_shuff_mem_mask3: 9067; SKX: # %bb.0: 9068; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 9069; SKX-NEXT: retq # sched: [7:1.00] 9070 %vec2 = load <8 x float>, <8 x float>* %vec2p 9071 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9072 ret <8 x float> %res 9073} 9074define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 9075; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: 9076; GENERIC: # %bb.0: 9077; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 9078; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9079; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 9080; GENERIC-NEXT: retq # sched: [1:1.00] 9081; 9082; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask3: 9083; SKX: # %bb.0: 9084; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 9085; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9086; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 9087; SKX-NEXT: retq # sched: [7:1.00] 9088 %vec2 = load <8 x float>, <8 x float>* %vec2p 9089 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9090 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9091 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 9092 ret <8 x float> %res 9093} 9094 9095define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 9096; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: 9097; GENERIC: # %bb.0: 9098; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 9099; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9100; GENERIC-NEXT: retq # sched: [1:1.00] 9101; 9102; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: 9103; SKX: # %bb.0: 9104; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 9105; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9106; SKX-NEXT: retq # sched: [7:1.00] 9107 %vec2 = load <8 x float>, <8 x float>* %vec2p 9108 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9109 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9110 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 9111 ret <8 x float> %res 9112} 9113 9114define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9115; GENERIC-LABEL: test_16xfloat_shuff_mask0: 9116; GENERIC: # %bb.0: 9117; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 9118; GENERIC-NEXT: retq # sched: [1:1.00] 9119; 9120; SKX-LABEL: test_16xfloat_shuff_mask0: 9121; SKX: # %bb.0: 9122; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 9123; SKX-NEXT: retq # sched: [7:1.00] 9124 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 9125 ret <16 x float> %res 9126} 9127define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9128; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0: 9129; GENERIC: # %bb.0: 9130; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9131; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 9132; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9133; GENERIC-NEXT: retq # sched: [1:1.00] 9134; 9135; SKX-LABEL: test_16xfloat_masked_shuff_mask0: 9136; SKX: # %bb.0: 9137; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9138; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 9139; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9140; SKX-NEXT: retq # sched: [7:1.00] 9141 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 9142 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9143 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9144 ret <16 x float> %res 9145} 9146 9147define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9148; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0: 9149; GENERIC: # %bb.0: 9150; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9151; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 9152; GENERIC-NEXT: retq # sched: [1:1.00] 9153; 9154; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask0: 9155; SKX: # %bb.0: 9156; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9157; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 9158; SKX-NEXT: retq # sched: [7:1.00] 9159 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 9160 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9161 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9162 ret <16 x float> %res 9163} 9164define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9165; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1: 9166; GENERIC: # %bb.0: 9167; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9168; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] 9169; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9170; GENERIC-NEXT: retq # sched: [1:1.00] 9171; 9172; SKX-LABEL: test_16xfloat_masked_shuff_mask1: 9173; SKX: # %bb.0: 9174; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9175; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] 9176; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9177; SKX-NEXT: retq # sched: [7:1.00] 9178 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> 9179 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9180 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9181 ret <16 x float> %res 9182} 9183 9184define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9185; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1: 9186; GENERIC: # %bb.0: 9187; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9188; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] 9189; GENERIC-NEXT: retq # sched: [1:1.00] 9190; 9191; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask1: 9192; SKX: # %bb.0: 9193; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9194; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] 9195; SKX-NEXT: retq # sched: [7:1.00] 9196 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> 9197 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9198 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9199 ret <16 x float> %res 9200} 9201define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9202; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2: 9203; GENERIC: # %bb.0: 9204; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9205; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] 9206; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9207; GENERIC-NEXT: retq # sched: [1:1.00] 9208; 9209; SKX-LABEL: test_16xfloat_masked_shuff_mask2: 9210; SKX: # %bb.0: 9211; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9212; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] 9213; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9214; SKX-NEXT: retq # sched: [7:1.00] 9215 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 9216 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9217 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9218 ret <16 x float> %res 9219} 9220 9221define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9222; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2: 9223; GENERIC: # %bb.0: 9224; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9225; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] 9226; GENERIC-NEXT: retq # sched: [1:1.00] 9227; 9228; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask2: 9229; SKX: # %bb.0: 9230; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9231; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] 9232; SKX-NEXT: retq # sched: [7:1.00] 9233 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 9234 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9235 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9236 ret <16 x float> %res 9237} 9238define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { 9239; GENERIC-LABEL: test_16xfloat_shuff_mask3: 9240; GENERIC: # %bb.0: 9241; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] 9242; GENERIC-NEXT: retq # sched: [1:1.00] 9243; 9244; SKX-LABEL: test_16xfloat_shuff_mask3: 9245; SKX: # %bb.0: 9246; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] 9247; SKX-NEXT: retq # sched: [7:1.00] 9248 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 9249 ret <16 x float> %res 9250} 9251define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9252; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3: 9253; GENERIC: # %bb.0: 9254; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9255; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] 9256; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9257; GENERIC-NEXT: retq # sched: [1:1.00] 9258; 9259; SKX-LABEL: test_16xfloat_masked_shuff_mask3: 9260; SKX: # %bb.0: 9261; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9262; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] 9263; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9264; SKX-NEXT: retq # sched: [7:1.00] 9265 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 9266 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9267 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9268 ret <16 x float> %res 9269} 9270 9271define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9272; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3: 9273; GENERIC: # %bb.0: 9274; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9275; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] 9276; GENERIC-NEXT: retq # sched: [1:1.00] 9277; 9278; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask3: 9279; SKX: # %bb.0: 9280; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9281; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] 9282; SKX-NEXT: retq # sched: [7:1.00] 9283 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 9284 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9285 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9286 ret <16 x float> %res 9287} 9288define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 9289; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0: 9290; GENERIC: # %bb.0: 9291; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9292; GENERIC-NEXT: retq # sched: [1:1.00] 9293; 9294; SKX-LABEL: test_16xfloat_shuff_mem_mask0: 9295; SKX: # %bb.0: 9296; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9297; SKX-NEXT: retq # sched: [7:1.00] 9298 %vec2 = load <16 x float>, <16 x float>* %vec2p 9299 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9300 ret <16 x float> %res 9301} 9302define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9303; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: 9304; GENERIC: # %bb.0: 9305; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9306; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9307; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9308; GENERIC-NEXT: retq # sched: [1:1.00] 9309; 9310; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask0: 9311; SKX: # %bb.0: 9312; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9313; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9314; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9315; SKX-NEXT: retq # sched: [7:1.00] 9316 %vec2 = load <16 x float>, <16 x float>* %vec2p 9317 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9318 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9319 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9320 ret <16 x float> %res 9321} 9322 9323define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9324; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: 9325; GENERIC: # %bb.0: 9326; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9327; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9328; GENERIC-NEXT: retq # sched: [1:1.00] 9329; 9330; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: 9331; SKX: # %bb.0: 9332; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9333; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9334; SKX-NEXT: retq # sched: [7:1.00] 9335 %vec2 = load <16 x float>, <16 x float>* %vec2p 9336 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9337 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9338 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9339 ret <16 x float> %res 9340} 9341 9342define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9343; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: 9344; GENERIC: # %bb.0: 9345; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9346; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9347; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9348; GENERIC-NEXT: retq # sched: [1:1.00] 9349; 9350; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask1: 9351; SKX: # %bb.0: 9352; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9353; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9354; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9355; SKX-NEXT: retq # sched: [7:1.00] 9356 %vec2 = load <16 x float>, <16 x float>* %vec2p 9357 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9358 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9359 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9360 ret <16 x float> %res 9361} 9362 9363define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9364; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: 9365; GENERIC: # %bb.0: 9366; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9367; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9368; GENERIC-NEXT: retq # sched: [1:1.00] 9369; 9370; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: 9371; SKX: # %bb.0: 9372; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9373; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9374; SKX-NEXT: retq # sched: [7:1.00] 9375 %vec2 = load <16 x float>, <16 x float>* %vec2p 9376 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9377 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9378 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9379 ret <16 x float> %res 9380} 9381 9382define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9383; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: 9384; GENERIC: # %bb.0: 9385; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9386; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] 9387; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9388; GENERIC-NEXT: retq # sched: [1:1.00] 9389; 9390; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask2: 9391; SKX: # %bb.0: 9392; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9393; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] 9394; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9395; SKX-NEXT: retq # sched: [7:1.00] 9396 %vec2 = load <16 x float>, <16 x float>* %vec2p 9397 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27> 9398 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9399 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9400 ret <16 x float> %res 9401} 9402 9403define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9404; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: 9405; GENERIC: # %bb.0: 9406; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9407; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] 9408; GENERIC-NEXT: retq # sched: [1:1.00] 9409; 9410; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: 9411; SKX: # %bb.0: 9412; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9413; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] 9414; SKX-NEXT: retq # sched: [7:1.00] 9415 %vec2 = load <16 x float>, <16 x float>* %vec2p 9416 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27> 9417 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9418 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9419 ret <16 x float> %res 9420} 9421 9422define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 9423; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3: 9424; GENERIC: # %bb.0: 9425; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 9426; GENERIC-NEXT: retq # sched: [1:1.00] 9427; 9428; SKX-LABEL: test_16xfloat_shuff_mem_mask3: 9429; SKX: # %bb.0: 9430; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 9431; SKX-NEXT: retq # sched: [7:1.00] 9432 %vec2 = load <16 x float>, <16 x float>* %vec2p 9433 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 9434 ret <16 x float> %res 9435} 9436define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9437; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: 9438; GENERIC: # %bb.0: 9439; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9440; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 9441; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9442; GENERIC-NEXT: retq # sched: [1:1.00] 9443; 9444; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask3: 9445; SKX: # %bb.0: 9446; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9447; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 9448; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9449; SKX-NEXT: retq # sched: [7:1.00] 9450 %vec2 = load <16 x float>, <16 x float>* %vec2p 9451 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 9452 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9453 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9454 ret <16 x float> %res 9455} 9456 9457define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9458; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: 9459; GENERIC: # %bb.0: 9460; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9461; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 9462; GENERIC-NEXT: retq # sched: [1:1.00] 9463; 9464; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: 9465; SKX: # %bb.0: 9466; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9467; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 9468; SKX-NEXT: retq # sched: [7:1.00] 9469 %vec2 = load <16 x float>, <16 x float>* %vec2p 9470 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 9471 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9472 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9473 ret <16 x float> %res 9474} 9475 9476define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) { 9477; GENERIC-LABEL: test_4xdouble_shuff_mask0: 9478; GENERIC: # %bb.0: 9479; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9480; GENERIC-NEXT: retq # sched: [1:1.00] 9481; 9482; SKX-LABEL: test_4xdouble_shuff_mask0: 9483; SKX: # %bb.0: 9484; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9485; SKX-NEXT: retq # sched: [7:1.00] 9486 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9487 ret <4 x double> %res 9488} 9489define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9490; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0: 9491; GENERIC: # %bb.0: 9492; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9493; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9494; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9495; GENERIC-NEXT: retq # sched: [1:1.00] 9496; 9497; SKX-LABEL: test_4xdouble_masked_shuff_mask0: 9498; SKX: # %bb.0: 9499; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9500; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9501; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9502; SKX-NEXT: retq # sched: [7:1.00] 9503 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9504 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9505 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9506 ret <4 x double> %res 9507} 9508 9509define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9510; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0: 9511; GENERIC: # %bb.0: 9512; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9513; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9514; GENERIC-NEXT: retq # sched: [1:1.00] 9515; 9516; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask0: 9517; SKX: # %bb.0: 9518; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9519; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9520; SKX-NEXT: retq # sched: [7:1.00] 9521 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9522 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9523 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9524 ret <4 x double> %res 9525} 9526define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9527; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1: 9528; GENERIC: # %bb.0: 9529; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9530; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9531; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9532; GENERIC-NEXT: retq # sched: [1:1.00] 9533; 9534; SKX-LABEL: test_4xdouble_masked_shuff_mask1: 9535; SKX: # %bb.0: 9536; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9537; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9538; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9539; SKX-NEXT: retq # sched: [7:1.00] 9540 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9541 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9542 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9543 ret <4 x double> %res 9544} 9545 9546define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9547; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1: 9548; GENERIC: # %bb.0: 9549; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9550; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9551; GENERIC-NEXT: retq # sched: [1:1.00] 9552; 9553; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask1: 9554; SKX: # %bb.0: 9555; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9556; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9557; SKX-NEXT: retq # sched: [7:1.00] 9558 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9559 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9560 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9561 ret <4 x double> %res 9562} 9563define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9564; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2: 9565; GENERIC: # %bb.0: 9566; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9567; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9568; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9569; GENERIC-NEXT: retq # sched: [1:1.00] 9570; 9571; SKX-LABEL: test_4xdouble_masked_shuff_mask2: 9572; SKX: # %bb.0: 9573; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9574; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9575; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9576; SKX-NEXT: retq # sched: [7:1.00] 9577 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9578 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9579 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9580 ret <4 x double> %res 9581} 9582 9583define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9584; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2: 9585; GENERIC: # %bb.0: 9586; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9587; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9588; GENERIC-NEXT: retq # sched: [1:1.00] 9589; 9590; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask2: 9591; SKX: # %bb.0: 9592; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9593; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9594; SKX-NEXT: retq # sched: [7:1.00] 9595 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9596 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9597 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9598 ret <4 x double> %res 9599} 9600define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) { 9601; GENERIC-LABEL: test_4xdouble_shuff_mask3: 9602; GENERIC: # %bb.0: 9603; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9604; GENERIC-NEXT: retq # sched: [1:1.00] 9605; 9606; SKX-LABEL: test_4xdouble_shuff_mask3: 9607; SKX: # %bb.0: 9608; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9609; SKX-NEXT: retq # sched: [7:1.00] 9610 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9611 ret <4 x double> %res 9612} 9613define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9614; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3: 9615; GENERIC: # %bb.0: 9616; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9617; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9618; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9619; GENERIC-NEXT: retq # sched: [1:1.00] 9620; 9621; SKX-LABEL: test_4xdouble_masked_shuff_mask3: 9622; SKX: # %bb.0: 9623; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9624; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9625; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9626; SKX-NEXT: retq # sched: [7:1.00] 9627 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9628 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9629 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9630 ret <4 x double> %res 9631} 9632 9633define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9634; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3: 9635; GENERIC: # %bb.0: 9636; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9637; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9638; GENERIC-NEXT: retq # sched: [1:1.00] 9639; 9640; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask3: 9641; SKX: # %bb.0: 9642; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9643; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9644; SKX-NEXT: retq # sched: [7:1.00] 9645 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9646 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9647 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9648 ret <4 x double> %res 9649} 9650define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 9651; GENERIC-LABEL: test_4xdouble_shuff_mem_mask0: 9652; GENERIC: # %bb.0: 9653; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 9654; GENERIC-NEXT: retq # sched: [1:1.00] 9655; 9656; SKX-LABEL: test_4xdouble_shuff_mem_mask0: 9657; SKX: # %bb.0: 9658; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 9659; SKX-NEXT: retq # sched: [7:1.00] 9660 %vec2 = load <4 x double>, <4 x double>* %vec2p 9661 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9662 ret <4 x double> %res 9663} 9664define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9665; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: 9666; GENERIC: # %bb.0: 9667; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9668; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9669; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9670; GENERIC-NEXT: retq # sched: [1:1.00] 9671; 9672; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask0: 9673; SKX: # %bb.0: 9674; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9675; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9676; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9677; SKX-NEXT: retq # sched: [7:1.00] 9678 %vec2 = load <4 x double>, <4 x double>* %vec2p 9679 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9680 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9681 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9682 ret <4 x double> %res 9683} 9684 9685define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9686; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: 9687; GENERIC: # %bb.0: 9688; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9689; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9690; GENERIC-NEXT: retq # sched: [1:1.00] 9691; 9692; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: 9693; SKX: # %bb.0: 9694; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9695; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9696; SKX-NEXT: retq # sched: [7:1.00] 9697 %vec2 = load <4 x double>, <4 x double>* %vec2p 9698 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9699 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9700 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9701 ret <4 x double> %res 9702} 9703 9704define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9705; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: 9706; GENERIC: # %bb.0: 9707; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9708; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9709; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9710; GENERIC-NEXT: retq # sched: [1:1.00] 9711; 9712; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask1: 9713; SKX: # %bb.0: 9714; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9715; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9716; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9717; SKX-NEXT: retq # sched: [7:1.00] 9718 %vec2 = load <4 x double>, <4 x double>* %vec2p 9719 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9720 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9721 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9722 ret <4 x double> %res 9723} 9724 9725define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9726; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: 9727; GENERIC: # %bb.0: 9728; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9729; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9730; GENERIC-NEXT: retq # sched: [1:1.00] 9731; 9732; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: 9733; SKX: # %bb.0: 9734; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9735; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9736; SKX-NEXT: retq # sched: [7:1.00] 9737 %vec2 = load <4 x double>, <4 x double>* %vec2p 9738 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9739 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9740 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9741 ret <4 x double> %res 9742} 9743 9744define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9745; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: 9746; GENERIC: # %bb.0: 9747; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9748; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9749; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9750; GENERIC-NEXT: retq # sched: [1:1.00] 9751; 9752; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask2: 9753; SKX: # %bb.0: 9754; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9755; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9756; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9757; SKX-NEXT: retq # sched: [7:1.00] 9758 %vec2 = load <4 x double>, <4 x double>* %vec2p 9759 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9760 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9761 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9762 ret <4 x double> %res 9763} 9764 9765define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9766; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: 9767; GENERIC: # %bb.0: 9768; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9769; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9770; GENERIC-NEXT: retq # sched: [1:1.00] 9771; 9772; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: 9773; SKX: # %bb.0: 9774; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9775; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9776; SKX-NEXT: retq # sched: [7:1.00] 9777 %vec2 = load <4 x double>, <4 x double>* %vec2p 9778 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9779 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9780 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9781 ret <4 x double> %res 9782} 9783 9784define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 9785; GENERIC-LABEL: test_4xdouble_shuff_mem_mask3: 9786; GENERIC: # %bb.0: 9787; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 9788; GENERIC-NEXT: retq # sched: [1:1.00] 9789; 9790; SKX-LABEL: test_4xdouble_shuff_mem_mask3: 9791; SKX: # %bb.0: 9792; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 9793; SKX-NEXT: retq # sched: [7:1.00] 9794 %vec2 = load <4 x double>, <4 x double>* %vec2p 9795 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9796 ret <4 x double> %res 9797} 9798define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9799; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: 9800; GENERIC: # %bb.0: 9801; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9802; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9803; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9804; GENERIC-NEXT: retq # sched: [1:1.00] 9805; 9806; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask3: 9807; SKX: # %bb.0: 9808; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9809; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9810; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9811; SKX-NEXT: retq # sched: [7:1.00] 9812 %vec2 = load <4 x double>, <4 x double>* %vec2p 9813 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9814 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9815 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9816 ret <4 x double> %res 9817} 9818 9819define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9820; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: 9821; GENERIC: # %bb.0: 9822; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9823; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9824; GENERIC-NEXT: retq # sched: [1:1.00] 9825; 9826; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: 9827; SKX: # %bb.0: 9828; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9829; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9830; SKX-NEXT: retq # sched: [7:1.00] 9831 %vec2 = load <4 x double>, <4 x double>* %vec2p 9832 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9833 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9834 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9835 ret <4 x double> %res 9836} 9837 9838define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { 9839; GENERIC-LABEL: test_8xdouble_shuff_mask0: 9840; GENERIC: # %bb.0: 9841; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] 9842; GENERIC-NEXT: retq # sched: [1:1.00] 9843; 9844; SKX-LABEL: test_8xdouble_shuff_mask0: 9845; SKX: # %bb.0: 9846; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] 9847; SKX-NEXT: retq # sched: [7:1.00] 9848 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 9849 ret <8 x double> %res 9850} 9851define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9852; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0: 9853; GENERIC: # %bb.0: 9854; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9855; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] 9856; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9857; GENERIC-NEXT: retq # sched: [1:1.00] 9858; 9859; SKX-LABEL: test_8xdouble_masked_shuff_mask0: 9860; SKX: # %bb.0: 9861; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9862; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] 9863; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9864; SKX-NEXT: retq # sched: [7:1.00] 9865 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 9866 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9867 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9868 ret <8 x double> %res 9869} 9870 9871define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9872; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0: 9873; GENERIC: # %bb.0: 9874; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9875; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] 9876; GENERIC-NEXT: retq # sched: [1:1.00] 9877; 9878; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask0: 9879; SKX: # %bb.0: 9880; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 9881; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] 9882; SKX-NEXT: retq # sched: [7:1.00] 9883 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 9884 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9885 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 9886 ret <8 x double> %res 9887} 9888define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9889; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1: 9890; GENERIC: # %bb.0: 9891; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9892; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] 9893; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9894; GENERIC-NEXT: retq # sched: [1:1.00] 9895; 9896; SKX-LABEL: test_8xdouble_masked_shuff_mask1: 9897; SKX: # %bb.0: 9898; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9899; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] 9900; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9901; SKX-NEXT: retq # sched: [7:1.00] 9902 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 9903 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9904 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9905 ret <8 x double> %res 9906} 9907 9908define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9909; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1: 9910; GENERIC: # %bb.0: 9911; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9912; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] 9913; GENERIC-NEXT: retq # sched: [1:1.00] 9914; 9915; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask1: 9916; SKX: # %bb.0: 9917; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 9918; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] 9919; SKX-NEXT: retq # sched: [7:1.00] 9920 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 9921 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9922 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 9923 ret <8 x double> %res 9924} 9925define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9926; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2: 9927; GENERIC: # %bb.0: 9928; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9929; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] 9930; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9931; GENERIC-NEXT: retq # sched: [1:1.00] 9932; 9933; SKX-LABEL: test_8xdouble_masked_shuff_mask2: 9934; SKX: # %bb.0: 9935; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9936; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] 9937; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9938; SKX-NEXT: retq # sched: [7:1.00] 9939 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> 9940 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9941 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9942 ret <8 x double> %res 9943} 9944 9945define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9946; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2: 9947; GENERIC: # %bb.0: 9948; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9949; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] 9950; GENERIC-NEXT: retq # sched: [1:1.00] 9951; 9952; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask2: 9953; SKX: # %bb.0: 9954; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 9955; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] 9956; SKX-NEXT: retq # sched: [7:1.00] 9957 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> 9958 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9959 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 9960 ret <8 x double> %res 9961} 9962define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { 9963; GENERIC-LABEL: test_8xdouble_shuff_mask3: 9964; GENERIC: # %bb.0: 9965; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] 9966; GENERIC-NEXT: retq # sched: [1:1.00] 9967; 9968; SKX-LABEL: test_8xdouble_shuff_mask3: 9969; SKX: # %bb.0: 9970; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] 9971; SKX-NEXT: retq # sched: [7:1.00] 9972 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 9973 ret <8 x double> %res 9974} 9975define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9976; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3: 9977; GENERIC: # %bb.0: 9978; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9979; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] 9980; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9981; GENERIC-NEXT: retq # sched: [1:1.00] 9982; 9983; SKX-LABEL: test_8xdouble_masked_shuff_mask3: 9984; SKX: # %bb.0: 9985; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9986; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] 9987; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9988; SKX-NEXT: retq # sched: [7:1.00] 9989 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 9990 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9991 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9992 ret <8 x double> %res 9993} 9994 9995define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9996; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3: 9997; GENERIC: # %bb.0: 9998; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9999; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] 10000; GENERIC-NEXT: retq # sched: [1:1.00] 10001; 10002; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask3: 10003; SKX: # %bb.0: 10004; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10005; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] 10006; SKX-NEXT: retq # sched: [7:1.00] 10007 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 10008 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10009 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10010 ret <8 x double> %res 10011} 10012define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 10013; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0: 10014; GENERIC: # %bb.0: 10015; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] 10016; GENERIC-NEXT: retq # sched: [1:1.00] 10017; 10018; SKX-LABEL: test_8xdouble_shuff_mem_mask0: 10019; SKX: # %bb.0: 10020; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] 10021; SKX-NEXT: retq # sched: [7:1.00] 10022 %vec2 = load <8 x double>, <8 x double>* %vec2p 10023 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 10024 ret <8 x double> %res 10025} 10026define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10027; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: 10028; GENERIC: # %bb.0: 10029; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10030; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] 10031; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10032; GENERIC-NEXT: retq # sched: [1:1.00] 10033; 10034; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask0: 10035; SKX: # %bb.0: 10036; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10037; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] 10038; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10039; SKX-NEXT: retq # sched: [7:1.00] 10040 %vec2 = load <8 x double>, <8 x double>* %vec2p 10041 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 10042 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10043 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10044 ret <8 x double> %res 10045} 10046 10047define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10048; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: 10049; GENERIC: # %bb.0: 10050; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10051; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] 10052; GENERIC-NEXT: retq # sched: [1:1.00] 10053; 10054; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: 10055; SKX: # %bb.0: 10056; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10057; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] 10058; SKX-NEXT: retq # sched: [7:1.00] 10059 %vec2 = load <8 x double>, <8 x double>* %vec2p 10060 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 10061 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10062 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10063 ret <8 x double> %res 10064} 10065 10066define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10067; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: 10068; GENERIC: # %bb.0: 10069; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10070; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] 10071; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10072; GENERIC-NEXT: retq # sched: [1:1.00] 10073; 10074; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask1: 10075; SKX: # %bb.0: 10076; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10077; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] 10078; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10079; SKX-NEXT: retq # sched: [7:1.00] 10080 %vec2 = load <8 x double>, <8 x double>* %vec2p 10081 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10082 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10083 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10084 ret <8 x double> %res 10085} 10086 10087define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10088; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: 10089; GENERIC: # %bb.0: 10090; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10091; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] 10092; GENERIC-NEXT: retq # sched: [1:1.00] 10093; 10094; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: 10095; SKX: # %bb.0: 10096; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10097; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] 10098; SKX-NEXT: retq # sched: [7:1.00] 10099 %vec2 = load <8 x double>, <8 x double>* %vec2p 10100 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10101 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10102 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10103 ret <8 x double> %res 10104} 10105 10106define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10107; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: 10108; GENERIC: # %bb.0: 10109; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10110; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] 10111; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10112; GENERIC-NEXT: retq # sched: [1:1.00] 10113; 10114; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask2: 10115; SKX: # %bb.0: 10116; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10117; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] 10118; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10119; SKX-NEXT: retq # sched: [7:1.00] 10120 %vec2 = load <8 x double>, <8 x double>* %vec2p 10121 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13> 10122 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10123 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10124 ret <8 x double> %res 10125} 10126 10127define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10128; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: 10129; GENERIC: # %bb.0: 10130; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10131; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] 10132; GENERIC-NEXT: retq # sched: [1:1.00] 10133; 10134; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: 10135; SKX: # %bb.0: 10136; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10137; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] 10138; SKX-NEXT: retq # sched: [7:1.00] 10139 %vec2 = load <8 x double>, <8 x double>* %vec2p 10140 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13> 10141 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10142 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10143 ret <8 x double> %res 10144} 10145 10146define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 10147; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3: 10148; GENERIC: # %bb.0: 10149; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] 10150; GENERIC-NEXT: retq # sched: [1:1.00] 10151; 10152; SKX-LABEL: test_8xdouble_shuff_mem_mask3: 10153; SKX: # %bb.0: 10154; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] 10155; SKX-NEXT: retq # sched: [7:1.00] 10156 %vec2 = load <8 x double>, <8 x double>* %vec2p 10157 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 10158 ret <8 x double> %res 10159} 10160define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10161; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: 10162; GENERIC: # %bb.0: 10163; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10164; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] 10165; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10166; GENERIC-NEXT: retq # sched: [1:1.00] 10167; 10168; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask3: 10169; SKX: # %bb.0: 10170; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10171; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] 10172; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10173; SKX-NEXT: retq # sched: [7:1.00] 10174 %vec2 = load <8 x double>, <8 x double>* %vec2p 10175 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 10176 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10177 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10178 ret <8 x double> %res 10179} 10180 10181define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10182; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: 10183; GENERIC: # %bb.0: 10184; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10185; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] 10186; GENERIC-NEXT: retq # sched: [1:1.00] 10187; 10188; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: 10189; SKX: # %bb.0: 10190; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10191; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] 10192; SKX-NEXT: retq # sched: [7:1.00] 10193 %vec2 = load <8 x double>, <8 x double>* %vec2p 10194 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 10195 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10196 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10197 ret <8 x double> %res 10198} 10199 10200define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) { 10201; GENERIC-LABEL: test_8xi32_shuff_mask0: 10202; GENERIC: # %bb.0: 10203; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 10204; GENERIC-NEXT: retq # sched: [1:1.00] 10205; 10206; SKX-LABEL: test_8xi32_shuff_mask0: 10207; SKX: # %bb.0: 10208; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 10209; SKX-NEXT: retq # sched: [7:1.00] 10210 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10211 ret <8 x i32> %res 10212} 10213define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10214; GENERIC-LABEL: test_8xi32_masked_shuff_mask0: 10215; GENERIC: # %bb.0: 10216; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10217; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10218; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10219; GENERIC-NEXT: retq # sched: [1:1.00] 10220; 10221; SKX-LABEL: test_8xi32_masked_shuff_mask0: 10222; SKX: # %bb.0: 10223; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10224; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10225; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10226; SKX-NEXT: retq # sched: [7:1.00] 10227 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10228 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10229 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10230 ret <8 x i32> %res 10231} 10232 10233define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10234; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0: 10235; GENERIC: # %bb.0: 10236; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10237; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10238; GENERIC-NEXT: retq # sched: [1:1.00] 10239; 10240; SKX-LABEL: test_8xi32_zero_masked_shuff_mask0: 10241; SKX: # %bb.0: 10242; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10243; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10244; SKX-NEXT: retq # sched: [7:1.00] 10245 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10246 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10247 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10248 ret <8 x i32> %res 10249} 10250define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10251; GENERIC-LABEL: test_8xi32_masked_shuff_mask1: 10252; GENERIC: # %bb.0: 10253; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10254; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10255; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10256; GENERIC-NEXT: retq # sched: [1:1.00] 10257; 10258; SKX-LABEL: test_8xi32_masked_shuff_mask1: 10259; SKX: # %bb.0: 10260; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10261; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10262; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10263; SKX-NEXT: retq # sched: [7:1.00] 10264 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10265 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10266 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10267 ret <8 x i32> %res 10268} 10269 10270define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10271; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1: 10272; GENERIC: # %bb.0: 10273; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10274; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10275; GENERIC-NEXT: retq # sched: [1:1.00] 10276; 10277; SKX-LABEL: test_8xi32_zero_masked_shuff_mask1: 10278; SKX: # %bb.0: 10279; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10280; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10281; SKX-NEXT: retq # sched: [7:1.00] 10282 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10283 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10284 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10285 ret <8 x i32> %res 10286} 10287define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10288; GENERIC-LABEL: test_8xi32_masked_shuff_mask2: 10289; GENERIC: # %bb.0: 10290; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10291; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10292; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10293; GENERIC-NEXT: retq # sched: [1:1.00] 10294; 10295; SKX-LABEL: test_8xi32_masked_shuff_mask2: 10296; SKX: # %bb.0: 10297; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10298; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10299; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10300; SKX-NEXT: retq # sched: [7:1.00] 10301 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10302 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10303 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10304 ret <8 x i32> %res 10305} 10306 10307define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10308; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2: 10309; GENERIC: # %bb.0: 10310; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10311; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10312; GENERIC-NEXT: retq # sched: [1:1.00] 10313; 10314; SKX-LABEL: test_8xi32_zero_masked_shuff_mask2: 10315; SKX: # %bb.0: 10316; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10317; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10318; SKX-NEXT: retq # sched: [7:1.00] 10319 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10320 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10321 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10322 ret <8 x i32> %res 10323} 10324define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) { 10325; GENERIC-LABEL: test_8xi32_shuff_mask3: 10326; GENERIC: # %bb.0: 10327; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10328; GENERIC-NEXT: retq # sched: [1:1.00] 10329; 10330; SKX-LABEL: test_8xi32_shuff_mask3: 10331; SKX: # %bb.0: 10332; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10333; SKX-NEXT: retq # sched: [7:1.00] 10334 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10335 ret <8 x i32> %res 10336} 10337define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10338; GENERIC-LABEL: test_8xi32_masked_shuff_mask3: 10339; GENERIC: # %bb.0: 10340; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10341; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10342; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10343; GENERIC-NEXT: retq # sched: [1:1.00] 10344; 10345; SKX-LABEL: test_8xi32_masked_shuff_mask3: 10346; SKX: # %bb.0: 10347; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10348; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10349; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10350; SKX-NEXT: retq # sched: [7:1.00] 10351 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10352 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10353 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10354 ret <8 x i32> %res 10355} 10356 10357define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10358; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3: 10359; GENERIC: # %bb.0: 10360; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10361; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10362; GENERIC-NEXT: retq # sched: [1:1.00] 10363; 10364; SKX-LABEL: test_8xi32_zero_masked_shuff_mask3: 10365; SKX: # %bb.0: 10366; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10367; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10368; SKX-NEXT: retq # sched: [7:1.00] 10369 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10370 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10371 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10372 ret <8 x i32> %res 10373} 10374define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { 10375; GENERIC-LABEL: test_8xi32_shuff_mem_mask0: 10376; GENERIC: # %bb.0: 10377; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 10378; GENERIC-NEXT: retq # sched: [1:1.00] 10379; 10380; SKX-LABEL: test_8xi32_shuff_mem_mask0: 10381; SKX: # %bb.0: 10382; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 10383; SKX-NEXT: retq # sched: [7:1.00] 10384 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10385 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10386 ret <8 x i32> %res 10387} 10388define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10389; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: 10390; GENERIC: # %bb.0: 10391; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10392; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 10393; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10394; GENERIC-NEXT: retq # sched: [1:1.00] 10395; 10396; SKX-LABEL: test_8xi32_masked_shuff_mem_mask0: 10397; SKX: # %bb.0: 10398; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10399; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 10400; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10401; SKX-NEXT: retq # sched: [7:1.00] 10402 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10403 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10404 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10405 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10406 ret <8 x i32> %res 10407} 10408 10409define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10410; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: 10411; GENERIC: # %bb.0: 10412; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10413; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 10414; GENERIC-NEXT: retq # sched: [1:1.00] 10415; 10416; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: 10417; SKX: # %bb.0: 10418; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10419; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 10420; SKX-NEXT: retq # sched: [7:1.00] 10421 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10422 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10423 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10424 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10425 ret <8 x i32> %res 10426} 10427 10428define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10429; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: 10430; GENERIC: # %bb.0: 10431; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10432; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10433; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10434; GENERIC-NEXT: retq # sched: [1:1.00] 10435; 10436; SKX-LABEL: test_8xi32_masked_shuff_mem_mask1: 10437; SKX: # %bb.0: 10438; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10439; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10440; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10441; SKX-NEXT: retq # sched: [7:1.00] 10442 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10443 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10444 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10445 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10446 ret <8 x i32> %res 10447} 10448 10449define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10450; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: 10451; GENERIC: # %bb.0: 10452; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10453; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10454; GENERIC-NEXT: retq # sched: [1:1.00] 10455; 10456; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: 10457; SKX: # %bb.0: 10458; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10459; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10460; SKX-NEXT: retq # sched: [7:1.00] 10461 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10462 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10463 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10464 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10465 ret <8 x i32> %res 10466} 10467 10468define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10469; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: 10470; GENERIC: # %bb.0: 10471; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10472; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10473; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10474; GENERIC-NEXT: retq # sched: [1:1.00] 10475; 10476; SKX-LABEL: test_8xi32_masked_shuff_mem_mask2: 10477; SKX: # %bb.0: 10478; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10479; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10480; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10481; SKX-NEXT: retq # sched: [7:1.00] 10482 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10483 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10484 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10485 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10486 ret <8 x i32> %res 10487} 10488 10489define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10490; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: 10491; GENERIC: # %bb.0: 10492; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10493; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10494; GENERIC-NEXT: retq # sched: [1:1.00] 10495; 10496; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: 10497; SKX: # %bb.0: 10498; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10499; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10500; SKX-NEXT: retq # sched: [7:1.00] 10501 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10502 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10503 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10504 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10505 ret <8 x i32> %res 10506} 10507 10508define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { 10509; GENERIC-LABEL: test_8xi32_shuff_mem_mask3: 10510; GENERIC: # %bb.0: 10511; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 10512; GENERIC-NEXT: retq # sched: [1:1.00] 10513; 10514; SKX-LABEL: test_8xi32_shuff_mem_mask3: 10515; SKX: # %bb.0: 10516; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 10517; SKX-NEXT: retq # sched: [7:1.00] 10518 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10519 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10520 ret <8 x i32> %res 10521} 10522define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10523; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: 10524; GENERIC: # %bb.0: 10525; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10526; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10527; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10528; GENERIC-NEXT: retq # sched: [1:1.00] 10529; 10530; SKX-LABEL: test_8xi32_masked_shuff_mem_mask3: 10531; SKX: # %bb.0: 10532; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10533; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10534; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10535; SKX-NEXT: retq # sched: [7:1.00] 10536 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10537 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10538 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10539 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10540 ret <8 x i32> %res 10541} 10542 10543define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10544; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: 10545; GENERIC: # %bb.0: 10546; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10547; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10548; GENERIC-NEXT: retq # sched: [1:1.00] 10549; 10550; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: 10551; SKX: # %bb.0: 10552; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10553; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10554; SKX-NEXT: retq # sched: [7:1.00] 10555 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10556 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10557 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10558 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10559 ret <8 x i32> %res 10560} 10561 10562define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { 10563; GENERIC-LABEL: test_16xi32_shuff_mask0: 10564; GENERIC: # %bb.0: 10565; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 10566; GENERIC-NEXT: retq # sched: [1:1.00] 10567; 10568; SKX-LABEL: test_16xi32_shuff_mask0: 10569; SKX: # %bb.0: 10570; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 10571; SKX-NEXT: retq # sched: [7:1.00] 10572 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10573 ret <16 x i32> %res 10574} 10575define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10576; GENERIC-LABEL: test_16xi32_masked_shuff_mask0: 10577; GENERIC: # %bb.0: 10578; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10579; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 10580; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10581; GENERIC-NEXT: retq # sched: [1:1.00] 10582; 10583; SKX-LABEL: test_16xi32_masked_shuff_mask0: 10584; SKX: # %bb.0: 10585; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10586; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 10587; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10588; SKX-NEXT: retq # sched: [7:1.00] 10589 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10590 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10591 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10592 ret <16 x i32> %res 10593} 10594 10595define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10596; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0: 10597; GENERIC: # %bb.0: 10598; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10599; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 10600; GENERIC-NEXT: retq # sched: [1:1.00] 10601; 10602; SKX-LABEL: test_16xi32_zero_masked_shuff_mask0: 10603; SKX: # %bb.0: 10604; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10605; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 10606; SKX-NEXT: retq # sched: [7:1.00] 10607 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10608 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10609 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10610 ret <16 x i32> %res 10611} 10612define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10613; GENERIC-LABEL: test_16xi32_masked_shuff_mask1: 10614; GENERIC: # %bb.0: 10615; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10616; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10617; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10618; GENERIC-NEXT: retq # sched: [1:1.00] 10619; 10620; SKX-LABEL: test_16xi32_masked_shuff_mask1: 10621; SKX: # %bb.0: 10622; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10623; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10624; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10625; SKX-NEXT: retq # sched: [7:1.00] 10626 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10627 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10628 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10629 ret <16 x i32> %res 10630} 10631 10632define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10633; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1: 10634; GENERIC: # %bb.0: 10635; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10636; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10637; GENERIC-NEXT: retq # sched: [1:1.00] 10638; 10639; SKX-LABEL: test_16xi32_zero_masked_shuff_mask1: 10640; SKX: # %bb.0: 10641; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10642; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10643; SKX-NEXT: retq # sched: [7:1.00] 10644 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10645 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10646 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10647 ret <16 x i32> %res 10648} 10649define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10650; GENERIC-LABEL: test_16xi32_masked_shuff_mask2: 10651; GENERIC: # %bb.0: 10652; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10653; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] 10654; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10655; GENERIC-NEXT: retq # sched: [1:1.00] 10656; 10657; SKX-LABEL: test_16xi32_masked_shuff_mask2: 10658; SKX: # %bb.0: 10659; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10660; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] 10661; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10662; SKX-NEXT: retq # sched: [7:1.00] 10663 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> 10664 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10665 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10666 ret <16 x i32> %res 10667} 10668 10669define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10670; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2: 10671; GENERIC: # %bb.0: 10672; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10673; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] 10674; GENERIC-NEXT: retq # sched: [1:1.00] 10675; 10676; SKX-LABEL: test_16xi32_zero_masked_shuff_mask2: 10677; SKX: # %bb.0: 10678; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10679; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] 10680; SKX-NEXT: retq # sched: [7:1.00] 10681 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> 10682 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10683 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10684 ret <16 x i32> %res 10685} 10686define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { 10687; GENERIC-LABEL: test_16xi32_shuff_mask3: 10688; GENERIC: # %bb.0: 10689; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10690; GENERIC-NEXT: retq # sched: [1:1.00] 10691; 10692; SKX-LABEL: test_16xi32_shuff_mask3: 10693; SKX: # %bb.0: 10694; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10695; SKX-NEXT: retq # sched: [7:1.00] 10696 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10697 ret <16 x i32> %res 10698} 10699define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10700; GENERIC-LABEL: test_16xi32_masked_shuff_mask3: 10701; GENERIC: # %bb.0: 10702; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10703; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10704; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10705; GENERIC-NEXT: retq # sched: [1:1.00] 10706; 10707; SKX-LABEL: test_16xi32_masked_shuff_mask3: 10708; SKX: # %bb.0: 10709; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10710; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10711; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10712; SKX-NEXT: retq # sched: [7:1.00] 10713 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10714 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10715 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10716 ret <16 x i32> %res 10717} 10718 10719define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10720; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3: 10721; GENERIC: # %bb.0: 10722; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10723; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10724; GENERIC-NEXT: retq # sched: [1:1.00] 10725; 10726; SKX-LABEL: test_16xi32_zero_masked_shuff_mask3: 10727; SKX: # %bb.0: 10728; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10729; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10730; SKX-NEXT: retq # sched: [7:1.00] 10731 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10732 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10733 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10734 ret <16 x i32> %res 10735} 10736define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { 10737; GENERIC-LABEL: test_16xi32_shuff_mem_mask0: 10738; GENERIC: # %bb.0: 10739; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] 10740; GENERIC-NEXT: retq # sched: [1:1.00] 10741; 10742; SKX-LABEL: test_16xi32_shuff_mem_mask0: 10743; SKX: # %bb.0: 10744; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] 10745; SKX-NEXT: retq # sched: [7:1.00] 10746 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10747 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 10748 ret <16 x i32> %res 10749} 10750define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10751; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: 10752; GENERIC: # %bb.0: 10753; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10754; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] 10755; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10756; GENERIC-NEXT: retq # sched: [1:1.00] 10757; 10758; SKX-LABEL: test_16xi32_masked_shuff_mem_mask0: 10759; SKX: # %bb.0: 10760; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10761; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] 10762; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10763; SKX-NEXT: retq # sched: [7:1.00] 10764 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10765 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 10766 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10767 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10768 ret <16 x i32> %res 10769} 10770 10771define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10772; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: 10773; GENERIC: # %bb.0: 10774; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10775; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] 10776; GENERIC-NEXT: retq # sched: [1:1.00] 10777; 10778; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: 10779; SKX: # %bb.0: 10780; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10781; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] 10782; SKX-NEXT: retq # sched: [7:1.00] 10783 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10784 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 10785 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10786 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10787 ret <16 x i32> %res 10788} 10789 10790define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10791; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: 10792; GENERIC: # %bb.0: 10793; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10794; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] 10795; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10796; GENERIC-NEXT: retq # sched: [1:1.00] 10797; 10798; SKX-LABEL: test_16xi32_masked_shuff_mem_mask1: 10799; SKX: # %bb.0: 10800; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10801; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] 10802; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10803; SKX-NEXT: retq # sched: [7:1.00] 10804 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10805 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 10806 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10807 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10808 ret <16 x i32> %res 10809} 10810 10811define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10812; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: 10813; GENERIC: # %bb.0: 10814; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10815; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] 10816; GENERIC-NEXT: retq # sched: [1:1.00] 10817; 10818; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: 10819; SKX: # %bb.0: 10820; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10821; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] 10822; SKX-NEXT: retq # sched: [7:1.00] 10823 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10824 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 10825 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10826 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10827 ret <16 x i32> %res 10828} 10829 10830define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10831; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: 10832; GENERIC: # %bb.0: 10833; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10834; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 10835; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10836; GENERIC-NEXT: retq # sched: [1:1.00] 10837; 10838; SKX-LABEL: test_16xi32_masked_shuff_mem_mask2: 10839; SKX: # %bb.0: 10840; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10841; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 10842; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10843; SKX-NEXT: retq # sched: [7:1.00] 10844 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10845 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 10846 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10847 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10848 ret <16 x i32> %res 10849} 10850 10851define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10852; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: 10853; GENERIC: # %bb.0: 10854; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10855; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 10856; GENERIC-NEXT: retq # sched: [1:1.00] 10857; 10858; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: 10859; SKX: # %bb.0: 10860; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10861; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 10862; SKX-NEXT: retq # sched: [7:1.00] 10863 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10864 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 10865 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10866 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10867 ret <16 x i32> %res 10868} 10869 10870define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { 10871; GENERIC-LABEL: test_16xi32_shuff_mem_mask3: 10872; GENERIC: # %bb.0: 10873; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] 10874; GENERIC-NEXT: retq # sched: [1:1.00] 10875; 10876; SKX-LABEL: test_16xi32_shuff_mem_mask3: 10877; SKX: # %bb.0: 10878; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] 10879; SKX-NEXT: retq # sched: [7:1.00] 10880 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10881 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10882 ret <16 x i32> %res 10883} 10884define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10885; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: 10886; GENERIC: # %bb.0: 10887; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10888; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] 10889; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10890; GENERIC-NEXT: retq # sched: [1:1.00] 10891; 10892; SKX-LABEL: test_16xi32_masked_shuff_mem_mask3: 10893; SKX: # %bb.0: 10894; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10895; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] 10896; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10897; SKX-NEXT: retq # sched: [7:1.00] 10898 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10899 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10900 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10901 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10902 ret <16 x i32> %res 10903} 10904 10905define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10906; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: 10907; GENERIC: # %bb.0: 10908; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10909; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] 10910; GENERIC-NEXT: retq # sched: [1:1.00] 10911; 10912; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: 10913; SKX: # %bb.0: 10914; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10915; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] 10916; SKX-NEXT: retq # sched: [7:1.00] 10917 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10918 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10919 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10920 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10921 ret <16 x i32> %res 10922} 10923 10924define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) { 10925; GENERIC-LABEL: test_4xi64_shuff_mask0: 10926; GENERIC: # %bb.0: 10927; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10928; GENERIC-NEXT: retq # sched: [1:1.00] 10929; 10930; SKX-LABEL: test_4xi64_shuff_mask0: 10931; SKX: # %bb.0: 10932; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10933; SKX-NEXT: retq # sched: [7:1.00] 10934 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 10935 ret <4 x i64> %res 10936} 10937define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 10938; GENERIC-LABEL: test_4xi64_masked_shuff_mask0: 10939; GENERIC: # %bb.0: 10940; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 10941; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10942; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10943; GENERIC-NEXT: retq # sched: [1:1.00] 10944; 10945; SKX-LABEL: test_4xi64_masked_shuff_mask0: 10946; SKX: # %bb.0: 10947; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 10948; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10949; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10950; SKX-NEXT: retq # sched: [7:1.00] 10951 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 10952 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 10953 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 10954 ret <4 x i64> %res 10955} 10956 10957define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 10958; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0: 10959; GENERIC: # %bb.0: 10960; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 10961; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10962; GENERIC-NEXT: retq # sched: [1:1.00] 10963; 10964; SKX-LABEL: test_4xi64_zero_masked_shuff_mask0: 10965; SKX: # %bb.0: 10966; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 10967; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10968; SKX-NEXT: retq # sched: [7:1.00] 10969 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 10970 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 10971 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 10972 ret <4 x i64> %res 10973} 10974define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 10975; GENERIC-LABEL: test_4xi64_masked_shuff_mask1: 10976; GENERIC: # %bb.0: 10977; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 10978; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 10979; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10980; GENERIC-NEXT: retq # sched: [1:1.00] 10981; 10982; SKX-LABEL: test_4xi64_masked_shuff_mask1: 10983; SKX: # %bb.0: 10984; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 10985; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 10986; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10987; SKX-NEXT: retq # sched: [7:1.00] 10988 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 10989 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 10990 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 10991 ret <4 x i64> %res 10992} 10993 10994define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 10995; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1: 10996; GENERIC: # %bb.0: 10997; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 10998; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 10999; GENERIC-NEXT: retq # sched: [1:1.00] 11000; 11001; SKX-LABEL: test_4xi64_zero_masked_shuff_mask1: 11002; SKX: # %bb.0: 11003; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11004; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11005; SKX-NEXT: retq # sched: [7:1.00] 11006 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11007 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11008 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11009 ret <4 x i64> %res 11010} 11011define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 11012; GENERIC-LABEL: test_4xi64_masked_shuff_mask2: 11013; GENERIC: # %bb.0: 11014; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 11015; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 11016; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 11017; GENERIC-NEXT: retq # sched: [1:1.00] 11018; 11019; SKX-LABEL: test_4xi64_masked_shuff_mask2: 11020; SKX: # %bb.0: 11021; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 11022; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 11023; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 11024; SKX-NEXT: retq # sched: [7:1.00] 11025 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11026 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11027 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11028 ret <4 x i64> %res 11029} 11030 11031define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 11032; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2: 11033; GENERIC: # %bb.0: 11034; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11035; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 11036; GENERIC-NEXT: retq # sched: [1:1.00] 11037; 11038; SKX-LABEL: test_4xi64_zero_masked_shuff_mask2: 11039; SKX: # %bb.0: 11040; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11041; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 11042; SKX-NEXT: retq # sched: [7:1.00] 11043 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11044 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11045 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11046 ret <4 x i64> %res 11047} 11048define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) { 11049; GENERIC-LABEL: test_4xi64_shuff_mask3: 11050; GENERIC: # %bb.0: 11051; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 11052; GENERIC-NEXT: retq # sched: [1:1.00] 11053; 11054; SKX-LABEL: test_4xi64_shuff_mask3: 11055; SKX: # %bb.0: 11056; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11057; SKX-NEXT: retq # sched: [7:1.00] 11058 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11059 ret <4 x i64> %res 11060} 11061define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 11062; GENERIC-LABEL: test_4xi64_masked_shuff_mask3: 11063; GENERIC: # %bb.0: 11064; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 11065; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 11066; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 11067; GENERIC-NEXT: retq # sched: [1:1.00] 11068; 11069; SKX-LABEL: test_4xi64_masked_shuff_mask3: 11070; SKX: # %bb.0: 11071; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 11072; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11073; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 11074; SKX-NEXT: retq # sched: [7:1.00] 11075 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11076 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11077 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11078 ret <4 x i64> %res 11079} 11080 11081define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 11082; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3: 11083; GENERIC: # %bb.0: 11084; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11085; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 11086; GENERIC-NEXT: retq # sched: [1:1.00] 11087; 11088; SKX-LABEL: test_4xi64_zero_masked_shuff_mask3: 11089; SKX: # %bb.0: 11090; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11091; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11092; SKX-NEXT: retq # sched: [7:1.00] 11093 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11094 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11095 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11096 ret <4 x i64> %res 11097} 11098define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { 11099; GENERIC-LABEL: test_4xi64_shuff_mem_mask0: 11100; GENERIC: # %bb.0: 11101; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 11102; GENERIC-NEXT: retq # sched: [1:1.00] 11103; 11104; SKX-LABEL: test_4xi64_shuff_mem_mask0: 11105; SKX: # %bb.0: 11106; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 11107; SKX-NEXT: retq # sched: [7:1.00] 11108 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11109 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11110 ret <4 x i64> %res 11111} 11112define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11113; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: 11114; GENERIC: # %bb.0: 11115; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11116; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11117; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11118; GENERIC-NEXT: retq # sched: [1:1.00] 11119; 11120; SKX-LABEL: test_4xi64_masked_shuff_mem_mask0: 11121; SKX: # %bb.0: 11122; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11123; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11124; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11125; SKX-NEXT: retq # sched: [7:1.00] 11126 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11127 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11128 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11129 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11130 ret <4 x i64> %res 11131} 11132 11133define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11134; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: 11135; GENERIC: # %bb.0: 11136; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11137; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11138; GENERIC-NEXT: retq # sched: [1:1.00] 11139; 11140; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: 11141; SKX: # %bb.0: 11142; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11143; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11144; SKX-NEXT: retq # sched: [7:1.00] 11145 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11146 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11147 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11148 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11149 ret <4 x i64> %res 11150} 11151 11152define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11153; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: 11154; GENERIC: # %bb.0: 11155; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11156; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11157; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11158; GENERIC-NEXT: retq # sched: [1:1.00] 11159; 11160; SKX-LABEL: test_4xi64_masked_shuff_mem_mask1: 11161; SKX: # %bb.0: 11162; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11163; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11164; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11165; SKX-NEXT: retq # sched: [7:1.00] 11166 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11167 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11168 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11169 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11170 ret <4 x i64> %res 11171} 11172 11173define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11174; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: 11175; GENERIC: # %bb.0: 11176; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11177; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11178; GENERIC-NEXT: retq # sched: [1:1.00] 11179; 11180; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: 11181; SKX: # %bb.0: 11182; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11183; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11184; SKX-NEXT: retq # sched: [7:1.00] 11185 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11186 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11187 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11188 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11189 ret <4 x i64> %res 11190} 11191 11192define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11193; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: 11194; GENERIC: # %bb.0: 11195; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11196; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11197; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11198; GENERIC-NEXT: retq # sched: [1:1.00] 11199; 11200; SKX-LABEL: test_4xi64_masked_shuff_mem_mask2: 11201; SKX: # %bb.0: 11202; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11203; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11204; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11205; SKX-NEXT: retq # sched: [7:1.00] 11206 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11207 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11208 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11209 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11210 ret <4 x i64> %res 11211} 11212 11213define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11214; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: 11215; GENERIC: # %bb.0: 11216; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11217; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11218; GENERIC-NEXT: retq # sched: [1:1.00] 11219; 11220; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: 11221; SKX: # %bb.0: 11222; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11223; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11224; SKX-NEXT: retq # sched: [7:1.00] 11225 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11226 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11227 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11228 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11229 ret <4 x i64> %res 11230} 11231 11232define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { 11233; GENERIC-LABEL: test_4xi64_shuff_mem_mask3: 11234; GENERIC: # %bb.0: 11235; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 11236; GENERIC-NEXT: retq # sched: [1:1.00] 11237; 11238; SKX-LABEL: test_4xi64_shuff_mem_mask3: 11239; SKX: # %bb.0: 11240; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 11241; SKX-NEXT: retq # sched: [7:1.00] 11242 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11243 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11244 ret <4 x i64> %res 11245} 11246define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11247; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: 11248; GENERIC: # %bb.0: 11249; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11250; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11251; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11252; GENERIC-NEXT: retq # sched: [1:1.00] 11253; 11254; SKX-LABEL: test_4xi64_masked_shuff_mem_mask3: 11255; SKX: # %bb.0: 11256; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11257; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11258; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11259; SKX-NEXT: retq # sched: [7:1.00] 11260 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11261 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11262 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11263 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11264 ret <4 x i64> %res 11265} 11266 11267define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11268; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: 11269; GENERIC: # %bb.0: 11270; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11271; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11272; GENERIC-NEXT: retq # sched: [1:1.00] 11273; 11274; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: 11275; SKX: # %bb.0: 11276; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11277; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11278; SKX-NEXT: retq # sched: [7:1.00] 11279 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11280 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11281 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11282 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11283 ret <4 x i64> %res 11284} 11285 11286define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { 11287; GENERIC-LABEL: test_8xi64_shuff_mask0: 11288; GENERIC: # %bb.0: 11289; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] 11290; GENERIC-NEXT: retq # sched: [1:1.00] 11291; 11292; SKX-LABEL: test_8xi64_shuff_mask0: 11293; SKX: # %bb.0: 11294; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] 11295; SKX-NEXT: retq # sched: [7:1.00] 11296 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 11297 ret <8 x i64> %res 11298} 11299define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11300; GENERIC-LABEL: test_8xi64_masked_shuff_mask0: 11301; GENERIC: # %bb.0: 11302; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11303; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] 11304; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11305; GENERIC-NEXT: retq # sched: [1:1.00] 11306; 11307; SKX-LABEL: test_8xi64_masked_shuff_mask0: 11308; SKX: # %bb.0: 11309; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11310; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] 11311; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11312; SKX-NEXT: retq # sched: [7:1.00] 11313 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 11314 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11315 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11316 ret <8 x i64> %res 11317} 11318 11319define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11320; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0: 11321; GENERIC: # %bb.0: 11322; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11323; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] 11324; GENERIC-NEXT: retq # sched: [1:1.00] 11325; 11326; SKX-LABEL: test_8xi64_zero_masked_shuff_mask0: 11327; SKX: # %bb.0: 11328; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11329; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] 11330; SKX-NEXT: retq # sched: [7:1.00] 11331 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 11332 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11333 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11334 ret <8 x i64> %res 11335} 11336define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11337; GENERIC-LABEL: test_8xi64_masked_shuff_mask1: 11338; GENERIC: # %bb.0: 11339; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11340; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] 11341; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11342; GENERIC-NEXT: retq # sched: [1:1.00] 11343; 11344; SKX-LABEL: test_8xi64_masked_shuff_mask1: 11345; SKX: # %bb.0: 11346; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11347; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] 11348; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11349; SKX-NEXT: retq # sched: [7:1.00] 11350 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> 11351 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11352 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11353 ret <8 x i64> %res 11354} 11355 11356define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11357; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1: 11358; GENERIC: # %bb.0: 11359; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11360; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] 11361; GENERIC-NEXT: retq # sched: [1:1.00] 11362; 11363; SKX-LABEL: test_8xi64_zero_masked_shuff_mask1: 11364; SKX: # %bb.0: 11365; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11366; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] 11367; SKX-NEXT: retq # sched: [7:1.00] 11368 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> 11369 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11370 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11371 ret <8 x i64> %res 11372} 11373define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11374; GENERIC-LABEL: test_8xi64_masked_shuff_mask2: 11375; GENERIC: # %bb.0: 11376; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11377; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] 11378; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11379; GENERIC-NEXT: retq # sched: [1:1.00] 11380; 11381; SKX-LABEL: test_8xi64_masked_shuff_mask2: 11382; SKX: # %bb.0: 11383; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11384; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] 11385; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11386; SKX-NEXT: retq # sched: [7:1.00] 11387 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> 11388 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11389 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11390 ret <8 x i64> %res 11391} 11392 11393define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11394; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2: 11395; GENERIC: # %bb.0: 11396; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11397; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] 11398; GENERIC-NEXT: retq # sched: [1:1.00] 11399; 11400; SKX-LABEL: test_8xi64_zero_masked_shuff_mask2: 11401; SKX: # %bb.0: 11402; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11403; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] 11404; SKX-NEXT: retq # sched: [7:1.00] 11405 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> 11406 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11407 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11408 ret <8 x i64> %res 11409} 11410define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { 11411; GENERIC-LABEL: test_8xi64_shuff_mask3: 11412; GENERIC: # %bb.0: 11413; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] 11414; GENERIC-NEXT: retq # sched: [1:1.00] 11415; 11416; SKX-LABEL: test_8xi64_shuff_mask3: 11417; SKX: # %bb.0: 11418; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] 11419; SKX-NEXT: retq # sched: [7:1.00] 11420 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 11421 ret <8 x i64> %res 11422} 11423define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11424; GENERIC-LABEL: test_8xi64_masked_shuff_mask3: 11425; GENERIC: # %bb.0: 11426; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11427; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] 11428; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11429; GENERIC-NEXT: retq # sched: [1:1.00] 11430; 11431; SKX-LABEL: test_8xi64_masked_shuff_mask3: 11432; SKX: # %bb.0: 11433; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11434; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] 11435; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11436; SKX-NEXT: retq # sched: [7:1.00] 11437 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 11438 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11439 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11440 ret <8 x i64> %res 11441} 11442 11443define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11444; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3: 11445; GENERIC: # %bb.0: 11446; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11447; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] 11448; GENERIC-NEXT: retq # sched: [1:1.00] 11449; 11450; SKX-LABEL: test_8xi64_zero_masked_shuff_mask3: 11451; SKX: # %bb.0: 11452; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11453; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] 11454; SKX-NEXT: retq # sched: [7:1.00] 11455 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 11456 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11457 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11458 ret <8 x i64> %res 11459} 11460define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { 11461; GENERIC-LABEL: test_8xi64_shuff_mem_mask0: 11462; GENERIC: # %bb.0: 11463; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] 11464; GENERIC-NEXT: retq # sched: [1:1.00] 11465; 11466; SKX-LABEL: test_8xi64_shuff_mem_mask0: 11467; SKX: # %bb.0: 11468; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] 11469; SKX-NEXT: retq # sched: [7:1.00] 11470 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11471 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 11472 ret <8 x i64> %res 11473} 11474define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11475; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: 11476; GENERIC: # %bb.0: 11477; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11478; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] 11479; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11480; GENERIC-NEXT: retq # sched: [1:1.00] 11481; 11482; SKX-LABEL: test_8xi64_masked_shuff_mem_mask0: 11483; SKX: # %bb.0: 11484; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11485; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] 11486; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11487; SKX-NEXT: retq # sched: [7:1.00] 11488 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11489 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 11490 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11491 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11492 ret <8 x i64> %res 11493} 11494 11495define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11496; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: 11497; GENERIC: # %bb.0: 11498; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11499; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] 11500; GENERIC-NEXT: retq # sched: [1:1.00] 11501; 11502; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: 11503; SKX: # %bb.0: 11504; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11505; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] 11506; SKX-NEXT: retq # sched: [7:1.00] 11507 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11508 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 11509 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11510 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11511 ret <8 x i64> %res 11512} 11513 11514define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11515; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: 11516; GENERIC: # %bb.0: 11517; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11518; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] 11519; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11520; GENERIC-NEXT: retq # sched: [1:1.00] 11521; 11522; SKX-LABEL: test_8xi64_masked_shuff_mem_mask1: 11523; SKX: # %bb.0: 11524; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11525; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] 11526; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11527; SKX-NEXT: retq # sched: [7:1.00] 11528 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11529 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 11530 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11531 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11532 ret <8 x i64> %res 11533} 11534 11535define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11536; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: 11537; GENERIC: # %bb.0: 11538; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11539; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] 11540; GENERIC-NEXT: retq # sched: [1:1.00] 11541; 11542; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: 11543; SKX: # %bb.0: 11544; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11545; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] 11546; SKX-NEXT: retq # sched: [7:1.00] 11547 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11548 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 11549 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11550 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11551 ret <8 x i64> %res 11552} 11553 11554define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11555; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: 11556; GENERIC: # %bb.0: 11557; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11558; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] 11559; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11560; GENERIC-NEXT: retq # sched: [1:1.00] 11561; 11562; SKX-LABEL: test_8xi64_masked_shuff_mem_mask2: 11563; SKX: # %bb.0: 11564; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11565; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] 11566; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11567; SKX-NEXT: retq # sched: [7:1.00] 11568 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11569 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11> 11570 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11571 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11572 ret <8 x i64> %res 11573} 11574 11575define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11576; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: 11577; GENERIC: # %bb.0: 11578; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11579; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] 11580; GENERIC-NEXT: retq # sched: [1:1.00] 11581; 11582; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: 11583; SKX: # %bb.0: 11584; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11585; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] 11586; SKX-NEXT: retq # sched: [7:1.00] 11587 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11588 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11> 11589 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11590 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11591 ret <8 x i64> %res 11592} 11593 11594define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { 11595; GENERIC-LABEL: test_8xi64_shuff_mem_mask3: 11596; GENERIC: # %bb.0: 11597; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] 11598; GENERIC-NEXT: retq # sched: [1:1.00] 11599; 11600; SKX-LABEL: test_8xi64_shuff_mem_mask3: 11601; SKX: # %bb.0: 11602; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] 11603; SKX-NEXT: retq # sched: [7:1.00] 11604 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11605 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 11606 ret <8 x i64> %res 11607} 11608define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11609; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: 11610; GENERIC: # %bb.0: 11611; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11612; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] 11613; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11614; GENERIC-NEXT: retq # sched: [1:1.00] 11615; 11616; SKX-LABEL: test_8xi64_masked_shuff_mem_mask3: 11617; SKX: # %bb.0: 11618; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11619; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] 11620; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11621; SKX-NEXT: retq # sched: [7:1.00] 11622 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11623 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 11624 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11625 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11626 ret <8 x i64> %res 11627} 11628 11629define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11630; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: 11631; GENERIC: # %bb.0: 11632; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11633; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] 11634; GENERIC-NEXT: retq # sched: [1:1.00] 11635; 11636; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: 11637; SKX: # %bb.0: 11638; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11639; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] 11640; SKX-NEXT: retq # sched: [7:1.00] 11641 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11642 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 11643 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11644 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11645 ret <8 x i64> %res 11646} 11647 11648define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) { 11649; GENERIC-LABEL: test_4xfloat_unpack_low_mask0: 11650; GENERIC: # %bb.0: 11651; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11652; GENERIC-NEXT: retq # sched: [1:1.00] 11653; 11654; SKX-LABEL: test_4xfloat_unpack_low_mask0: 11655; SKX: # %bb.0: 11656; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11657; SKX-NEXT: retq # sched: [7:1.00] 11658 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11659 ret <4 x float> %res 11660} 11661define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11662; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0: 11663; GENERIC: # %bb.0: 11664; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11665; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11666; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11667; GENERIC-NEXT: retq # sched: [1:1.00] 11668; 11669; SKX-LABEL: test_4xfloat_masked_unpack_low_mask0: 11670; SKX: # %bb.0: 11671; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11672; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11673; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11674; SKX-NEXT: retq # sched: [7:1.00] 11675 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11676 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11677 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11678 ret <4 x float> %res 11679} 11680 11681define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11682; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: 11683; GENERIC: # %bb.0: 11684; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11685; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11686; GENERIC-NEXT: retq # sched: [1:1.00] 11687; 11688; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: 11689; SKX: # %bb.0: 11690; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11691; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11692; SKX-NEXT: retq # sched: [7:1.00] 11693 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11694 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11695 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11696 ret <4 x float> %res 11697} 11698define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11699; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1: 11700; GENERIC: # %bb.0: 11701; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11702; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11703; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11704; GENERIC-NEXT: retq # sched: [1:1.00] 11705; 11706; SKX-LABEL: test_4xfloat_masked_unpack_low_mask1: 11707; SKX: # %bb.0: 11708; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11709; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11710; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11711; SKX-NEXT: retq # sched: [7:1.00] 11712 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11713 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11714 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11715 ret <4 x float> %res 11716} 11717 11718define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11719; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: 11720; GENERIC: # %bb.0: 11721; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11722; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11723; GENERIC-NEXT: retq # sched: [1:1.00] 11724; 11725; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: 11726; SKX: # %bb.0: 11727; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11728; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11729; SKX-NEXT: retq # sched: [7:1.00] 11730 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11731 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11732 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11733 ret <4 x float> %res 11734} 11735define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11736; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2: 11737; GENERIC: # %bb.0: 11738; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11739; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11740; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11741; GENERIC-NEXT: retq # sched: [1:1.00] 11742; 11743; SKX-LABEL: test_4xfloat_masked_unpack_low_mask2: 11744; SKX: # %bb.0: 11745; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11746; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11747; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11748; SKX-NEXT: retq # sched: [7:1.00] 11749 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11750 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11751 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11752 ret <4 x float> %res 11753} 11754 11755define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11756; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: 11757; GENERIC: # %bb.0: 11758; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11759; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11760; GENERIC-NEXT: retq # sched: [1:1.00] 11761; 11762; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: 11763; SKX: # %bb.0: 11764; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11765; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11766; SKX-NEXT: retq # sched: [7:1.00] 11767 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11768 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11769 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11770 ret <4 x float> %res 11771} 11772define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) { 11773; GENERIC-LABEL: test_4xfloat_unpack_low_mask3: 11774; GENERIC: # %bb.0: 11775; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11776; GENERIC-NEXT: retq # sched: [1:1.00] 11777; 11778; SKX-LABEL: test_4xfloat_unpack_low_mask3: 11779; SKX: # %bb.0: 11780; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11781; SKX-NEXT: retq # sched: [7:1.00] 11782 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11783 ret <4 x float> %res 11784} 11785define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11786; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3: 11787; GENERIC: # %bb.0: 11788; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11789; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11790; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11791; GENERIC-NEXT: retq # sched: [1:1.00] 11792; 11793; SKX-LABEL: test_4xfloat_masked_unpack_low_mask3: 11794; SKX: # %bb.0: 11795; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11796; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11797; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11798; SKX-NEXT: retq # sched: [7:1.00] 11799 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11800 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11801 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11802 ret <4 x float> %res 11803} 11804 11805define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11806; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: 11807; GENERIC: # %bb.0: 11808; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11809; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11810; GENERIC-NEXT: retq # sched: [1:1.00] 11811; 11812; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: 11813; SKX: # %bb.0: 11814; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11815; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11816; SKX-NEXT: retq # sched: [7:1.00] 11817 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11818 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11819 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11820 ret <4 x float> %res 11821} 11822define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 11823; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask0: 11824; GENERIC: # %bb.0: 11825; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11826; GENERIC-NEXT: retq # sched: [1:1.00] 11827; 11828; SKX-LABEL: test_4xfloat_unpack_low_mem_mask0: 11829; SKX: # %bb.0: 11830; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11831; SKX-NEXT: retq # sched: [7:1.00] 11832 %vec2 = load <4 x float>, <4 x float>* %vec2p 11833 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11834 ret <4 x float> %res 11835} 11836define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11837; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: 11838; GENERIC: # %bb.0: 11839; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11840; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11841; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11842; GENERIC-NEXT: retq # sched: [1:1.00] 11843; 11844; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: 11845; SKX: # %bb.0: 11846; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11847; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11848; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11849; SKX-NEXT: retq # sched: [7:1.00] 11850 %vec2 = load <4 x float>, <4 x float>* %vec2p 11851 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11852 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11853 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11854 ret <4 x float> %res 11855} 11856 11857define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11858; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: 11859; GENERIC: # %bb.0: 11860; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11861; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11862; GENERIC-NEXT: retq # sched: [1:1.00] 11863; 11864; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: 11865; SKX: # %bb.0: 11866; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 11867; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11868; SKX-NEXT: retq # sched: [7:1.00] 11869 %vec2 = load <4 x float>, <4 x float>* %vec2p 11870 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11871 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11872 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11873 ret <4 x float> %res 11874} 11875 11876define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11877; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: 11878; GENERIC: # %bb.0: 11879; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11880; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11881; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11882; GENERIC-NEXT: retq # sched: [1:1.00] 11883; 11884; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: 11885; SKX: # %bb.0: 11886; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11887; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11888; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11889; SKX-NEXT: retq # sched: [7:1.00] 11890 %vec2 = load <4 x float>, <4 x float>* %vec2p 11891 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11892 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11893 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11894 ret <4 x float> %res 11895} 11896 11897define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11898; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: 11899; GENERIC: # %bb.0: 11900; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11901; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11902; GENERIC-NEXT: retq # sched: [1:1.00] 11903; 11904; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: 11905; SKX: # %bb.0: 11906; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 11907; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11908; SKX-NEXT: retq # sched: [7:1.00] 11909 %vec2 = load <4 x float>, <4 x float>* %vec2p 11910 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11911 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11912 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11913 ret <4 x float> %res 11914} 11915 11916define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11917; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: 11918; GENERIC: # %bb.0: 11919; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11920; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11921; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11922; GENERIC-NEXT: retq # sched: [1:1.00] 11923; 11924; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: 11925; SKX: # %bb.0: 11926; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11927; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11928; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11929; SKX-NEXT: retq # sched: [7:1.00] 11930 %vec2 = load <4 x float>, <4 x float>* %vec2p 11931 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11932 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11933 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11934 ret <4 x float> %res 11935} 11936 11937define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11938; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: 11939; GENERIC: # %bb.0: 11940; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11941; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11942; GENERIC-NEXT: retq # sched: [1:1.00] 11943; 11944; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: 11945; SKX: # %bb.0: 11946; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 11947; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11948; SKX-NEXT: retq # sched: [7:1.00] 11949 %vec2 = load <4 x float>, <4 x float>* %vec2p 11950 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11951 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11952 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11953 ret <4 x float> %res 11954} 11955 11956define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 11957; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask3: 11958; GENERIC: # %bb.0: 11959; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11960; GENERIC-NEXT: retq # sched: [1:1.00] 11961; 11962; SKX-LABEL: test_4xfloat_unpack_low_mem_mask3: 11963; SKX: # %bb.0: 11964; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11965; SKX-NEXT: retq # sched: [7:1.00] 11966 %vec2 = load <4 x float>, <4 x float>* %vec2p 11967 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11968 ret <4 x float> %res 11969} 11970define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11971; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: 11972; GENERIC: # %bb.0: 11973; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11974; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11975; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11976; GENERIC-NEXT: retq # sched: [1:1.00] 11977; 11978; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: 11979; SKX: # %bb.0: 11980; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11981; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11982; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11983; SKX-NEXT: retq # sched: [7:1.00] 11984 %vec2 = load <4 x float>, <4 x float>* %vec2p 11985 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11986 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11987 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11988 ret <4 x float> %res 11989} 11990 11991define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11992; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: 11993; GENERIC: # %bb.0: 11994; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11995; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11996; GENERIC-NEXT: retq # sched: [1:1.00] 11997; 11998; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: 11999; SKX: # %bb.0: 12000; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 12001; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 12002; SKX-NEXT: retq # sched: [7:1.00] 12003 %vec2 = load <4 x float>, <4 x float>* %vec2p 12004 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 12005 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 12006 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 12007 ret <4 x float> %res 12008} 12009 12010define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) { 12011; GENERIC-LABEL: test_8xfloat_unpack_low_mask0: 12012; GENERIC: # %bb.0: 12013; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12014; GENERIC-NEXT: retq # sched: [1:1.00] 12015; 12016; SKX-LABEL: test_8xfloat_unpack_low_mask0: 12017; SKX: # %bb.0: 12018; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12019; SKX-NEXT: retq # sched: [7:1.00] 12020 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12021 ret <8 x float> %res 12022} 12023define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12024; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0: 12025; GENERIC: # %bb.0: 12026; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12027; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12028; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12029; GENERIC-NEXT: retq # sched: [1:1.00] 12030; 12031; SKX-LABEL: test_8xfloat_masked_unpack_low_mask0: 12032; SKX: # %bb.0: 12033; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12034; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12035; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12036; SKX-NEXT: retq # sched: [7:1.00] 12037 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12038 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12039 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12040 ret <8 x float> %res 12041} 12042 12043define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12044; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: 12045; GENERIC: # %bb.0: 12046; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12047; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12048; GENERIC-NEXT: retq # sched: [1:1.00] 12049; 12050; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: 12051; SKX: # %bb.0: 12052; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12053; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12054; SKX-NEXT: retq # sched: [7:1.00] 12055 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12056 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12057 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12058 ret <8 x float> %res 12059} 12060define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12061; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1: 12062; GENERIC: # %bb.0: 12063; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12064; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12065; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12066; GENERIC-NEXT: retq # sched: [1:1.00] 12067; 12068; SKX-LABEL: test_8xfloat_masked_unpack_low_mask1: 12069; SKX: # %bb.0: 12070; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12071; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12072; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12073; SKX-NEXT: retq # sched: [7:1.00] 12074 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12075 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12076 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12077 ret <8 x float> %res 12078} 12079 12080define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12081; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: 12082; GENERIC: # %bb.0: 12083; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12084; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12085; GENERIC-NEXT: retq # sched: [1:1.00] 12086; 12087; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: 12088; SKX: # %bb.0: 12089; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12090; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12091; SKX-NEXT: retq # sched: [7:1.00] 12092 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12093 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12094 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12095 ret <8 x float> %res 12096} 12097define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12098; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2: 12099; GENERIC: # %bb.0: 12100; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12101; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12102; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12103; GENERIC-NEXT: retq # sched: [1:1.00] 12104; 12105; SKX-LABEL: test_8xfloat_masked_unpack_low_mask2: 12106; SKX: # %bb.0: 12107; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12108; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12109; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12110; SKX-NEXT: retq # sched: [7:1.00] 12111 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12112 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12113 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12114 ret <8 x float> %res 12115} 12116 12117define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12118; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: 12119; GENERIC: # %bb.0: 12120; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12121; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12122; GENERIC-NEXT: retq # sched: [1:1.00] 12123; 12124; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: 12125; SKX: # %bb.0: 12126; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12127; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12128; SKX-NEXT: retq # sched: [7:1.00] 12129 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12130 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12131 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12132 ret <8 x float> %res 12133} 12134define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) { 12135; GENERIC-LABEL: test_8xfloat_unpack_low_mask3: 12136; GENERIC: # %bb.0: 12137; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12138; GENERIC-NEXT: retq # sched: [1:1.00] 12139; 12140; SKX-LABEL: test_8xfloat_unpack_low_mask3: 12141; SKX: # %bb.0: 12142; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12143; SKX-NEXT: retq # sched: [7:1.00] 12144 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12145 ret <8 x float> %res 12146} 12147define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12148; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3: 12149; GENERIC: # %bb.0: 12150; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12151; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12152; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12153; GENERIC-NEXT: retq # sched: [1:1.00] 12154; 12155; SKX-LABEL: test_8xfloat_masked_unpack_low_mask3: 12156; SKX: # %bb.0: 12157; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12158; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12159; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12160; SKX-NEXT: retq # sched: [7:1.00] 12161 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12162 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12163 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12164 ret <8 x float> %res 12165} 12166 12167define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12168; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: 12169; GENERIC: # %bb.0: 12170; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12171; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12172; GENERIC-NEXT: retq # sched: [1:1.00] 12173; 12174; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: 12175; SKX: # %bb.0: 12176; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12177; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12178; SKX-NEXT: retq # sched: [7:1.00] 12179 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12180 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12181 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12182 ret <8 x float> %res 12183} 12184define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 12185; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask0: 12186; GENERIC: # %bb.0: 12187; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12188; GENERIC-NEXT: retq # sched: [1:1.00] 12189; 12190; SKX-LABEL: test_8xfloat_unpack_low_mem_mask0: 12191; SKX: # %bb.0: 12192; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12193; SKX-NEXT: retq # sched: [7:1.00] 12194 %vec2 = load <8 x float>, <8 x float>* %vec2p 12195 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12196 ret <8 x float> %res 12197} 12198define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12199; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: 12200; GENERIC: # %bb.0: 12201; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12202; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12203; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12204; GENERIC-NEXT: retq # sched: [1:1.00] 12205; 12206; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: 12207; SKX: # %bb.0: 12208; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12209; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12210; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12211; SKX-NEXT: retq # sched: [7:1.00] 12212 %vec2 = load <8 x float>, <8 x float>* %vec2p 12213 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12214 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12215 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12216 ret <8 x float> %res 12217} 12218 12219define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12220; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: 12221; GENERIC: # %bb.0: 12222; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12223; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12224; GENERIC-NEXT: retq # sched: [1:1.00] 12225; 12226; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: 12227; SKX: # %bb.0: 12228; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12229; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12230; SKX-NEXT: retq # sched: [7:1.00] 12231 %vec2 = load <8 x float>, <8 x float>* %vec2p 12232 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12233 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12234 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12235 ret <8 x float> %res 12236} 12237 12238define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12239; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: 12240; GENERIC: # %bb.0: 12241; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12242; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12243; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12244; GENERIC-NEXT: retq # sched: [1:1.00] 12245; 12246; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: 12247; SKX: # %bb.0: 12248; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12249; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12250; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12251; SKX-NEXT: retq # sched: [7:1.00] 12252 %vec2 = load <8 x float>, <8 x float>* %vec2p 12253 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12254 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12255 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12256 ret <8 x float> %res 12257} 12258 12259define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12260; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: 12261; GENERIC: # %bb.0: 12262; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12263; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12264; GENERIC-NEXT: retq # sched: [1:1.00] 12265; 12266; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: 12267; SKX: # %bb.0: 12268; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12269; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12270; SKX-NEXT: retq # sched: [7:1.00] 12271 %vec2 = load <8 x float>, <8 x float>* %vec2p 12272 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12273 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12274 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12275 ret <8 x float> %res 12276} 12277 12278define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12279; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: 12280; GENERIC: # %bb.0: 12281; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12282; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12283; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12284; GENERIC-NEXT: retq # sched: [1:1.00] 12285; 12286; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: 12287; SKX: # %bb.0: 12288; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12289; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12290; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12291; SKX-NEXT: retq # sched: [7:1.00] 12292 %vec2 = load <8 x float>, <8 x float>* %vec2p 12293 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12294 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12295 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12296 ret <8 x float> %res 12297} 12298 12299define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12300; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: 12301; GENERIC: # %bb.0: 12302; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12303; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12304; GENERIC-NEXT: retq # sched: [1:1.00] 12305; 12306; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: 12307; SKX: # %bb.0: 12308; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12309; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12310; SKX-NEXT: retq # sched: [7:1.00] 12311 %vec2 = load <8 x float>, <8 x float>* %vec2p 12312 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12313 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12314 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12315 ret <8 x float> %res 12316} 12317 12318define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 12319; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask3: 12320; GENERIC: # %bb.0: 12321; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12322; GENERIC-NEXT: retq # sched: [1:1.00] 12323; 12324; SKX-LABEL: test_8xfloat_unpack_low_mem_mask3: 12325; SKX: # %bb.0: 12326; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12327; SKX-NEXT: retq # sched: [7:1.00] 12328 %vec2 = load <8 x float>, <8 x float>* %vec2p 12329 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12330 ret <8 x float> %res 12331} 12332define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12333; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: 12334; GENERIC: # %bb.0: 12335; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12336; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12337; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12338; GENERIC-NEXT: retq # sched: [1:1.00] 12339; 12340; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: 12341; SKX: # %bb.0: 12342; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12343; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12344; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12345; SKX-NEXT: retq # sched: [7:1.00] 12346 %vec2 = load <8 x float>, <8 x float>* %vec2p 12347 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12348 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12349 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12350 ret <8 x float> %res 12351} 12352 12353define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12354; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: 12355; GENERIC: # %bb.0: 12356; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12357; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12358; GENERIC-NEXT: retq # sched: [1:1.00] 12359; 12360; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: 12361; SKX: # %bb.0: 12362; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12363; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12364; SKX-NEXT: retq # sched: [7:1.00] 12365 %vec2 = load <8 x float>, <8 x float>* %vec2p 12366 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12367 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12368 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12369 ret <8 x float> %res 12370} 12371 12372define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) { 12373; GENERIC-LABEL: test_16xfloat_unpack_low_mask0: 12374; GENERIC: # %bb.0: 12375; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12376; GENERIC-NEXT: retq # sched: [1:1.00] 12377; 12378; SKX-LABEL: test_16xfloat_unpack_low_mask0: 12379; SKX: # %bb.0: 12380; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12381; SKX-NEXT: retq # sched: [7:1.00] 12382 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12383 ret <16 x float> %res 12384} 12385define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12386; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0: 12387; GENERIC: # %bb.0: 12388; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12389; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12390; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12391; GENERIC-NEXT: retq # sched: [1:1.00] 12392; 12393; SKX-LABEL: test_16xfloat_masked_unpack_low_mask0: 12394; SKX: # %bb.0: 12395; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12396; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12397; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12398; SKX-NEXT: retq # sched: [7:1.00] 12399 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12400 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12401 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12402 ret <16 x float> %res 12403} 12404 12405define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12406; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: 12407; GENERIC: # %bb.0: 12408; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12409; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12410; GENERIC-NEXT: retq # sched: [1:1.00] 12411; 12412; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: 12413; SKX: # %bb.0: 12414; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12415; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12416; SKX-NEXT: retq # sched: [7:1.00] 12417 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12418 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12419 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12420 ret <16 x float> %res 12421} 12422define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12423; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1: 12424; GENERIC: # %bb.0: 12425; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12426; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12427; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12428; GENERIC-NEXT: retq # sched: [1:1.00] 12429; 12430; SKX-LABEL: test_16xfloat_masked_unpack_low_mask1: 12431; SKX: # %bb.0: 12432; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12433; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12434; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12435; SKX-NEXT: retq # sched: [7:1.00] 12436 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12437 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12438 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12439 ret <16 x float> %res 12440} 12441 12442define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12443; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: 12444; GENERIC: # %bb.0: 12445; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12446; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12447; GENERIC-NEXT: retq # sched: [1:1.00] 12448; 12449; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: 12450; SKX: # %bb.0: 12451; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12452; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12453; SKX-NEXT: retq # sched: [7:1.00] 12454 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12455 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12456 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12457 ret <16 x float> %res 12458} 12459define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12460; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2: 12461; GENERIC: # %bb.0: 12462; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12463; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12464; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12465; GENERIC-NEXT: retq # sched: [1:1.00] 12466; 12467; SKX-LABEL: test_16xfloat_masked_unpack_low_mask2: 12468; SKX: # %bb.0: 12469; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12470; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12471; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12472; SKX-NEXT: retq # sched: [7:1.00] 12473 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12474 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12475 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12476 ret <16 x float> %res 12477} 12478 12479define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12480; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: 12481; GENERIC: # %bb.0: 12482; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12483; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12484; GENERIC-NEXT: retq # sched: [1:1.00] 12485; 12486; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: 12487; SKX: # %bb.0: 12488; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12489; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12490; SKX-NEXT: retq # sched: [7:1.00] 12491 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12492 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12493 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12494 ret <16 x float> %res 12495} 12496define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) { 12497; GENERIC-LABEL: test_16xfloat_unpack_low_mask3: 12498; GENERIC: # %bb.0: 12499; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12500; GENERIC-NEXT: retq # sched: [1:1.00] 12501; 12502; SKX-LABEL: test_16xfloat_unpack_low_mask3: 12503; SKX: # %bb.0: 12504; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12505; SKX-NEXT: retq # sched: [7:1.00] 12506 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12507 ret <16 x float> %res 12508} 12509define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12510; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3: 12511; GENERIC: # %bb.0: 12512; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12513; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12514; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12515; GENERIC-NEXT: retq # sched: [1:1.00] 12516; 12517; SKX-LABEL: test_16xfloat_masked_unpack_low_mask3: 12518; SKX: # %bb.0: 12519; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12520; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12521; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12522; SKX-NEXT: retq # sched: [7:1.00] 12523 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12524 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12525 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12526 ret <16 x float> %res 12527} 12528 12529define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12530; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: 12531; GENERIC: # %bb.0: 12532; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12533; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12534; GENERIC-NEXT: retq # sched: [1:1.00] 12535; 12536; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: 12537; SKX: # %bb.0: 12538; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12539; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12540; SKX-NEXT: retq # sched: [7:1.00] 12541 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12542 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12543 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12544 ret <16 x float> %res 12545} 12546define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 12547; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0: 12548; GENERIC: # %bb.0: 12549; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12550; GENERIC-NEXT: retq # sched: [1:1.00] 12551; 12552; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0: 12553; SKX: # %bb.0: 12554; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12555; SKX-NEXT: retq # sched: [7:1.00] 12556 %vec2 = load <16 x float>, <16 x float>* %vec2p 12557 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12558 ret <16 x float> %res 12559} 12560define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12561; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: 12562; GENERIC: # %bb.0: 12563; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12564; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12565; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12566; GENERIC-NEXT: retq # sched: [1:1.00] 12567; 12568; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: 12569; SKX: # %bb.0: 12570; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12571; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12572; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12573; SKX-NEXT: retq # sched: [7:1.00] 12574 %vec2 = load <16 x float>, <16 x float>* %vec2p 12575 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12576 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12577 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12578 ret <16 x float> %res 12579} 12580 12581define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12582; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: 12583; GENERIC: # %bb.0: 12584; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12585; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12586; GENERIC-NEXT: retq # sched: [1:1.00] 12587; 12588; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: 12589; SKX: # %bb.0: 12590; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12591; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12592; SKX-NEXT: retq # sched: [7:1.00] 12593 %vec2 = load <16 x float>, <16 x float>* %vec2p 12594 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12595 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12596 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12597 ret <16 x float> %res 12598} 12599 12600define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12601; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: 12602; GENERIC: # %bb.0: 12603; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12604; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12605; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12606; GENERIC-NEXT: retq # sched: [1:1.00] 12607; 12608; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: 12609; SKX: # %bb.0: 12610; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12611; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12612; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12613; SKX-NEXT: retq # sched: [7:1.00] 12614 %vec2 = load <16 x float>, <16 x float>* %vec2p 12615 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12616 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12617 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12618 ret <16 x float> %res 12619} 12620 12621define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12622; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: 12623; GENERIC: # %bb.0: 12624; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12625; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12626; GENERIC-NEXT: retq # sched: [1:1.00] 12627; 12628; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: 12629; SKX: # %bb.0: 12630; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12631; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12632; SKX-NEXT: retq # sched: [7:1.00] 12633 %vec2 = load <16 x float>, <16 x float>* %vec2p 12634 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12635 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12636 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12637 ret <16 x float> %res 12638} 12639 12640define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12641; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: 12642; GENERIC: # %bb.0: 12643; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12644; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12645; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12646; GENERIC-NEXT: retq # sched: [1:1.00] 12647; 12648; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: 12649; SKX: # %bb.0: 12650; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12651; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12652; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12653; SKX-NEXT: retq # sched: [7:1.00] 12654 %vec2 = load <16 x float>, <16 x float>* %vec2p 12655 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12656 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12657 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12658 ret <16 x float> %res 12659} 12660 12661define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12662; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: 12663; GENERIC: # %bb.0: 12664; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12665; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12666; GENERIC-NEXT: retq # sched: [1:1.00] 12667; 12668; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: 12669; SKX: # %bb.0: 12670; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12671; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12672; SKX-NEXT: retq # sched: [7:1.00] 12673 %vec2 = load <16 x float>, <16 x float>* %vec2p 12674 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12675 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12676 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12677 ret <16 x float> %res 12678} 12679 12680define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 12681; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3: 12682; GENERIC: # %bb.0: 12683; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12684; GENERIC-NEXT: retq # sched: [1:1.00] 12685; 12686; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3: 12687; SKX: # %bb.0: 12688; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12689; SKX-NEXT: retq # sched: [7:1.00] 12690 %vec2 = load <16 x float>, <16 x float>* %vec2p 12691 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12692 ret <16 x float> %res 12693} 12694define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12695; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: 12696; GENERIC: # %bb.0: 12697; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12698; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12699; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12700; GENERIC-NEXT: retq # sched: [1:1.00] 12701; 12702; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: 12703; SKX: # %bb.0: 12704; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12705; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12706; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12707; SKX-NEXT: retq # sched: [7:1.00] 12708 %vec2 = load <16 x float>, <16 x float>* %vec2p 12709 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12710 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12711 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12712 ret <16 x float> %res 12713} 12714 12715define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12716; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: 12717; GENERIC: # %bb.0: 12718; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12719; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12720; GENERIC-NEXT: retq # sched: [1:1.00] 12721; 12722; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: 12723; SKX: # %bb.0: 12724; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12725; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12726; SKX-NEXT: retq # sched: [7:1.00] 12727 %vec2 = load <16 x float>, <16 x float>* %vec2p 12728 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12729 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12730 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12731 ret <16 x float> %res 12732} 12733 12734define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) { 12735; GENERIC-LABEL: test_2xdouble_unpack_low_mask0: 12736; GENERIC: # %bb.0: 12737; GENERIC-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 12738; GENERIC-NEXT: retq # sched: [1:1.00] 12739; 12740; SKX-LABEL: test_2xdouble_unpack_low_mask0: 12741; SKX: # %bb.0: 12742; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 12743; SKX-NEXT: retq # sched: [7:1.00] 12744 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12745 ret <2 x double> %res 12746} 12747define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 12748; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0: 12749; GENERIC: # %bb.0: 12750; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 12751; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12752; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 12753; GENERIC-NEXT: retq # sched: [1:1.00] 12754; 12755; SKX-LABEL: test_2xdouble_masked_unpack_low_mask0: 12756; SKX: # %bb.0: 12757; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 12758; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12759; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 12760; SKX-NEXT: retq # sched: [7:1.00] 12761 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12762 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12763 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12764 ret <2 x double> %res 12765} 12766 12767define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 12768; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: 12769; GENERIC: # %bb.0: 12770; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12771; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12772; GENERIC-NEXT: retq # sched: [1:1.00] 12773; 12774; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: 12775; SKX: # %bb.0: 12776; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12777; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12778; SKX-NEXT: retq # sched: [7:1.00] 12779 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12780 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12781 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12782 ret <2 x double> %res 12783} 12784define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 12785; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1: 12786; GENERIC: # %bb.0: 12787; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 12788; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12789; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 12790; GENERIC-NEXT: retq # sched: [1:1.00] 12791; 12792; SKX-LABEL: test_2xdouble_masked_unpack_low_mask1: 12793; SKX: # %bb.0: 12794; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 12795; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12796; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 12797; SKX-NEXT: retq # sched: [7:1.00] 12798 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12799 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12800 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12801 ret <2 x double> %res 12802} 12803 12804define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 12805; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: 12806; GENERIC: # %bb.0: 12807; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12808; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12809; GENERIC-NEXT: retq # sched: [1:1.00] 12810; 12811; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: 12812; SKX: # %bb.0: 12813; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12814; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12815; SKX-NEXT: retq # sched: [7:1.00] 12816 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12817 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12818 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12819 ret <2 x double> %res 12820} 12821define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 12822; GENERIC-LABEL: test_2xdouble_unpack_low_mem_mask0: 12823; GENERIC: # %bb.0: 12824; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] 12825; GENERIC-NEXT: retq # sched: [1:1.00] 12826; 12827; SKX-LABEL: test_2xdouble_unpack_low_mem_mask0: 12828; SKX: # %bb.0: 12829; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] 12830; SKX-NEXT: retq # sched: [7:1.00] 12831 %vec2 = load <2 x double>, <2 x double>* %vec2p 12832 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12833 ret <2 x double> %res 12834} 12835define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 12836; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: 12837; GENERIC: # %bb.0: 12838; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12839; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12840; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 12841; GENERIC-NEXT: retq # sched: [1:1.00] 12842; 12843; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: 12844; SKX: # %bb.0: 12845; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12846; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12847; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 12848; SKX-NEXT: retq # sched: [7:1.00] 12849 %vec2 = load <2 x double>, <2 x double>* %vec2p 12850 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12851 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12852 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12853 ret <2 x double> %res 12854} 12855 12856define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 12857; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: 12858; GENERIC: # %bb.0: 12859; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 12860; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12861; GENERIC-NEXT: retq # sched: [1:1.00] 12862; 12863; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: 12864; SKX: # %bb.0: 12865; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 12866; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12867; SKX-NEXT: retq # sched: [7:1.00] 12868 %vec2 = load <2 x double>, <2 x double>* %vec2p 12869 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12870 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12871 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12872 ret <2 x double> %res 12873} 12874 12875define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 12876; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: 12877; GENERIC: # %bb.0: 12878; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12879; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12880; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 12881; GENERIC-NEXT: retq # sched: [1:1.00] 12882; 12883; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: 12884; SKX: # %bb.0: 12885; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12886; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12887; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 12888; SKX-NEXT: retq # sched: [7:1.00] 12889 %vec2 = load <2 x double>, <2 x double>* %vec2p 12890 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12891 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12892 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12893 ret <2 x double> %res 12894} 12895 12896define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 12897; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: 12898; GENERIC: # %bb.0: 12899; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 12900; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12901; GENERIC-NEXT: retq # sched: [1:1.00] 12902; 12903; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: 12904; SKX: # %bb.0: 12905; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 12906; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12907; SKX-NEXT: retq # sched: [7:1.00] 12908 %vec2 = load <2 x double>, <2 x double>* %vec2p 12909 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12910 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12911 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12912 ret <2 x double> %res 12913} 12914 12915define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) { 12916; GENERIC-LABEL: test_4xdouble_unpack_low_mask0: 12917; GENERIC: # %bb.0: 12918; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12919; GENERIC-NEXT: retq # sched: [1:1.00] 12920; 12921; SKX-LABEL: test_4xdouble_unpack_low_mask0: 12922; SKX: # %bb.0: 12923; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12924; SKX-NEXT: retq # sched: [7:1.00] 12925 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12926 ret <4 x double> %res 12927} 12928define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 12929; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0: 12930; GENERIC: # %bb.0: 12931; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 12932; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12933; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 12934; GENERIC-NEXT: retq # sched: [1:1.00] 12935; 12936; SKX-LABEL: test_4xdouble_masked_unpack_low_mask0: 12937; SKX: # %bb.0: 12938; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 12939; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12940; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 12941; SKX-NEXT: retq # sched: [7:1.00] 12942 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12943 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12944 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 12945 ret <4 x double> %res 12946} 12947 12948define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 12949; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: 12950; GENERIC: # %bb.0: 12951; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 12952; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12953; GENERIC-NEXT: retq # sched: [1:1.00] 12954; 12955; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: 12956; SKX: # %bb.0: 12957; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 12958; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12959; SKX-NEXT: retq # sched: [7:1.00] 12960 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12961 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12962 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 12963 ret <4 x double> %res 12964} 12965define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 12966; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1: 12967; GENERIC: # %bb.0: 12968; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 12969; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12970; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 12971; GENERIC-NEXT: retq # sched: [1:1.00] 12972; 12973; SKX-LABEL: test_4xdouble_masked_unpack_low_mask1: 12974; SKX: # %bb.0: 12975; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 12976; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12977; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 12978; SKX-NEXT: retq # sched: [7:1.00] 12979 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12980 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12981 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 12982 ret <4 x double> %res 12983} 12984 12985define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 12986; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: 12987; GENERIC: # %bb.0: 12988; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 12989; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12990; GENERIC-NEXT: retq # sched: [1:1.00] 12991; 12992; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: 12993; SKX: # %bb.0: 12994; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 12995; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12996; SKX-NEXT: retq # sched: [7:1.00] 12997 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12998 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12999 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13000 ret <4 x double> %res 13001} 13002define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 13003; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2: 13004; GENERIC: # %bb.0: 13005; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 13006; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13007; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 13008; GENERIC-NEXT: retq # sched: [1:1.00] 13009; 13010; SKX-LABEL: test_4xdouble_masked_unpack_low_mask2: 13011; SKX: # %bb.0: 13012; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 13013; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13014; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 13015; SKX-NEXT: retq # sched: [7:1.00] 13016 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13017 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13018 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13019 ret <4 x double> %res 13020} 13021 13022define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 13023; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: 13024; GENERIC: # %bb.0: 13025; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13026; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13027; GENERIC-NEXT: retq # sched: [1:1.00] 13028; 13029; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: 13030; SKX: # %bb.0: 13031; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13032; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13033; SKX-NEXT: retq # sched: [7:1.00] 13034 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13035 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13036 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13037 ret <4 x double> %res 13038} 13039define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) { 13040; GENERIC-LABEL: test_4xdouble_unpack_low_mask3: 13041; GENERIC: # %bb.0: 13042; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13043; GENERIC-NEXT: retq # sched: [1:1.00] 13044; 13045; SKX-LABEL: test_4xdouble_unpack_low_mask3: 13046; SKX: # %bb.0: 13047; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13048; SKX-NEXT: retq # sched: [7:1.00] 13049 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13050 ret <4 x double> %res 13051} 13052define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 13053; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3: 13054; GENERIC: # %bb.0: 13055; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 13056; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13057; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 13058; GENERIC-NEXT: retq # sched: [1:1.00] 13059; 13060; SKX-LABEL: test_4xdouble_masked_unpack_low_mask3: 13061; SKX: # %bb.0: 13062; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 13063; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13064; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 13065; SKX-NEXT: retq # sched: [7:1.00] 13066 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13067 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13068 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13069 ret <4 x double> %res 13070} 13071 13072define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 13073; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: 13074; GENERIC: # %bb.0: 13075; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13076; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13077; GENERIC-NEXT: retq # sched: [1:1.00] 13078; 13079; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: 13080; SKX: # %bb.0: 13081; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13082; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13083; SKX-NEXT: retq # sched: [7:1.00] 13084 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13085 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13086 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13087 ret <4 x double> %res 13088} 13089define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 13090; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask0: 13091; GENERIC: # %bb.0: 13092; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13093; GENERIC-NEXT: retq # sched: [1:1.00] 13094; 13095; SKX-LABEL: test_4xdouble_unpack_low_mem_mask0: 13096; SKX: # %bb.0: 13097; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13098; SKX-NEXT: retq # sched: [7:1.00] 13099 %vec2 = load <4 x double>, <4 x double>* %vec2p 13100 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13101 ret <4 x double> %res 13102} 13103define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13104; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: 13105; GENERIC: # %bb.0: 13106; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13107; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13108; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13109; GENERIC-NEXT: retq # sched: [1:1.00] 13110; 13111; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: 13112; SKX: # %bb.0: 13113; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13114; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13115; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13116; SKX-NEXT: retq # sched: [7:1.00] 13117 %vec2 = load <4 x double>, <4 x double>* %vec2p 13118 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13119 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13120 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13121 ret <4 x double> %res 13122} 13123 13124define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13125; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: 13126; GENERIC: # %bb.0: 13127; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13128; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13129; GENERIC-NEXT: retq # sched: [1:1.00] 13130; 13131; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: 13132; SKX: # %bb.0: 13133; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13134; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13135; SKX-NEXT: retq # sched: [7:1.00] 13136 %vec2 = load <4 x double>, <4 x double>* %vec2p 13137 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13138 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13139 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13140 ret <4 x double> %res 13141} 13142 13143define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13144; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: 13145; GENERIC: # %bb.0: 13146; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13147; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13148; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13149; GENERIC-NEXT: retq # sched: [1:1.00] 13150; 13151; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: 13152; SKX: # %bb.0: 13153; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13154; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13155; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13156; SKX-NEXT: retq # sched: [7:1.00] 13157 %vec2 = load <4 x double>, <4 x double>* %vec2p 13158 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13159 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13160 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13161 ret <4 x double> %res 13162} 13163 13164define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13165; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: 13166; GENERIC: # %bb.0: 13167; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13168; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13169; GENERIC-NEXT: retq # sched: [1:1.00] 13170; 13171; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: 13172; SKX: # %bb.0: 13173; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13174; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13175; SKX-NEXT: retq # sched: [7:1.00] 13176 %vec2 = load <4 x double>, <4 x double>* %vec2p 13177 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13178 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13179 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13180 ret <4 x double> %res 13181} 13182 13183define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13184; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: 13185; GENERIC: # %bb.0: 13186; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13187; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13188; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13189; GENERIC-NEXT: retq # sched: [1:1.00] 13190; 13191; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: 13192; SKX: # %bb.0: 13193; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13194; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13195; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13196; SKX-NEXT: retq # sched: [7:1.00] 13197 %vec2 = load <4 x double>, <4 x double>* %vec2p 13198 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13199 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13200 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13201 ret <4 x double> %res 13202} 13203 13204define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13205; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: 13206; GENERIC: # %bb.0: 13207; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13208; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13209; GENERIC-NEXT: retq # sched: [1:1.00] 13210; 13211; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: 13212; SKX: # %bb.0: 13213; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13214; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13215; SKX-NEXT: retq # sched: [7:1.00] 13216 %vec2 = load <4 x double>, <4 x double>* %vec2p 13217 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13218 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13219 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13220 ret <4 x double> %res 13221} 13222 13223define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 13224; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask3: 13225; GENERIC: # %bb.0: 13226; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13227; GENERIC-NEXT: retq # sched: [1:1.00] 13228; 13229; SKX-LABEL: test_4xdouble_unpack_low_mem_mask3: 13230; SKX: # %bb.0: 13231; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13232; SKX-NEXT: retq # sched: [7:1.00] 13233 %vec2 = load <4 x double>, <4 x double>* %vec2p 13234 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13235 ret <4 x double> %res 13236} 13237define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13238; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: 13239; GENERIC: # %bb.0: 13240; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13241; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13242; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13243; GENERIC-NEXT: retq # sched: [1:1.00] 13244; 13245; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: 13246; SKX: # %bb.0: 13247; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13248; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13249; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13250; SKX-NEXT: retq # sched: [7:1.00] 13251 %vec2 = load <4 x double>, <4 x double>* %vec2p 13252 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13253 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13254 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13255 ret <4 x double> %res 13256} 13257 13258define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13259; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: 13260; GENERIC: # %bb.0: 13261; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13262; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13263; GENERIC-NEXT: retq # sched: [1:1.00] 13264; 13265; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: 13266; SKX: # %bb.0: 13267; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13268; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13269; SKX-NEXT: retq # sched: [7:1.00] 13270 %vec2 = load <4 x double>, <4 x double>* %vec2p 13271 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13272 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13273 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13274 ret <4 x double> %res 13275} 13276 13277define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) { 13278; GENERIC-LABEL: test_8xdouble_unpack_low_mask0: 13279; GENERIC: # %bb.0: 13280; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13281; GENERIC-NEXT: retq # sched: [1:1.00] 13282; 13283; SKX-LABEL: test_8xdouble_unpack_low_mask0: 13284; SKX: # %bb.0: 13285; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13286; SKX-NEXT: retq # sched: [7:1.00] 13287 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13288 ret <8 x double> %res 13289} 13290define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13291; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0: 13292; GENERIC: # %bb.0: 13293; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13294; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13295; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13296; GENERIC-NEXT: retq # sched: [1:1.00] 13297; 13298; SKX-LABEL: test_8xdouble_masked_unpack_low_mask0: 13299; SKX: # %bb.0: 13300; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13301; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13302; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13303; SKX-NEXT: retq # sched: [7:1.00] 13304 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13305 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13306 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13307 ret <8 x double> %res 13308} 13309 13310define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13311; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: 13312; GENERIC: # %bb.0: 13313; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13314; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13315; GENERIC-NEXT: retq # sched: [1:1.00] 13316; 13317; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: 13318; SKX: # %bb.0: 13319; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13320; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13321; SKX-NEXT: retq # sched: [7:1.00] 13322 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13323 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13324 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13325 ret <8 x double> %res 13326} 13327define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13328; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1: 13329; GENERIC: # %bb.0: 13330; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13331; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13332; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13333; GENERIC-NEXT: retq # sched: [1:1.00] 13334; 13335; SKX-LABEL: test_8xdouble_masked_unpack_low_mask1: 13336; SKX: # %bb.0: 13337; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13338; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13339; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13340; SKX-NEXT: retq # sched: [7:1.00] 13341 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13342 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13343 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13344 ret <8 x double> %res 13345} 13346 13347define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13348; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: 13349; GENERIC: # %bb.0: 13350; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13351; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13352; GENERIC-NEXT: retq # sched: [1:1.00] 13353; 13354; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: 13355; SKX: # %bb.0: 13356; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13357; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13358; SKX-NEXT: retq # sched: [7:1.00] 13359 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13360 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13361 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13362 ret <8 x double> %res 13363} 13364define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13365; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2: 13366; GENERIC: # %bb.0: 13367; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13368; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13369; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13370; GENERIC-NEXT: retq # sched: [1:1.00] 13371; 13372; SKX-LABEL: test_8xdouble_masked_unpack_low_mask2: 13373; SKX: # %bb.0: 13374; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13375; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13376; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13377; SKX-NEXT: retq # sched: [7:1.00] 13378 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13379 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13380 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13381 ret <8 x double> %res 13382} 13383 13384define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13385; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: 13386; GENERIC: # %bb.0: 13387; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13388; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13389; GENERIC-NEXT: retq # sched: [1:1.00] 13390; 13391; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: 13392; SKX: # %bb.0: 13393; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13394; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13395; SKX-NEXT: retq # sched: [7:1.00] 13396 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13397 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13398 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13399 ret <8 x double> %res 13400} 13401define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) { 13402; GENERIC-LABEL: test_8xdouble_unpack_low_mask3: 13403; GENERIC: # %bb.0: 13404; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13405; GENERIC-NEXT: retq # sched: [1:1.00] 13406; 13407; SKX-LABEL: test_8xdouble_unpack_low_mask3: 13408; SKX: # %bb.0: 13409; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13410; SKX-NEXT: retq # sched: [7:1.00] 13411 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13412 ret <8 x double> %res 13413} 13414define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13415; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3: 13416; GENERIC: # %bb.0: 13417; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13418; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13419; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13420; GENERIC-NEXT: retq # sched: [1:1.00] 13421; 13422; SKX-LABEL: test_8xdouble_masked_unpack_low_mask3: 13423; SKX: # %bb.0: 13424; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13425; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13426; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13427; SKX-NEXT: retq # sched: [7:1.00] 13428 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13429 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13430 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13431 ret <8 x double> %res 13432} 13433 13434define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13435; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: 13436; GENERIC: # %bb.0: 13437; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13438; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13439; GENERIC-NEXT: retq # sched: [1:1.00] 13440; 13441; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: 13442; SKX: # %bb.0: 13443; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13444; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13445; SKX-NEXT: retq # sched: [7:1.00] 13446 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13447 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13448 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13449 ret <8 x double> %res 13450} 13451define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 13452; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0: 13453; GENERIC: # %bb.0: 13454; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13455; GENERIC-NEXT: retq # sched: [1:1.00] 13456; 13457; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0: 13458; SKX: # %bb.0: 13459; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13460; SKX-NEXT: retq # sched: [7:1.00] 13461 %vec2 = load <8 x double>, <8 x double>* %vec2p 13462 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13463 ret <8 x double> %res 13464} 13465define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13466; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: 13467; GENERIC: # %bb.0: 13468; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13469; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13470; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13471; GENERIC-NEXT: retq # sched: [1:1.00] 13472; 13473; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: 13474; SKX: # %bb.0: 13475; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13476; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13477; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13478; SKX-NEXT: retq # sched: [7:1.00] 13479 %vec2 = load <8 x double>, <8 x double>* %vec2p 13480 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13481 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13482 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13483 ret <8 x double> %res 13484} 13485 13486define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13487; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: 13488; GENERIC: # %bb.0: 13489; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13490; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13491; GENERIC-NEXT: retq # sched: [1:1.00] 13492; 13493; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: 13494; SKX: # %bb.0: 13495; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13496; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13497; SKX-NEXT: retq # sched: [7:1.00] 13498 %vec2 = load <8 x double>, <8 x double>* %vec2p 13499 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13500 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13501 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13502 ret <8 x double> %res 13503} 13504 13505define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13506; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: 13507; GENERIC: # %bb.0: 13508; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13509; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13510; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13511; GENERIC-NEXT: retq # sched: [1:1.00] 13512; 13513; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: 13514; SKX: # %bb.0: 13515; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13516; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13517; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13518; SKX-NEXT: retq # sched: [7:1.00] 13519 %vec2 = load <8 x double>, <8 x double>* %vec2p 13520 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13521 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13522 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13523 ret <8 x double> %res 13524} 13525 13526define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13527; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: 13528; GENERIC: # %bb.0: 13529; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13530; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13531; GENERIC-NEXT: retq # sched: [1:1.00] 13532; 13533; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: 13534; SKX: # %bb.0: 13535; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13536; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13537; SKX-NEXT: retq # sched: [7:1.00] 13538 %vec2 = load <8 x double>, <8 x double>* %vec2p 13539 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13540 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13541 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13542 ret <8 x double> %res 13543} 13544 13545define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13546; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: 13547; GENERIC: # %bb.0: 13548; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13549; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13550; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13551; GENERIC-NEXT: retq # sched: [1:1.00] 13552; 13553; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: 13554; SKX: # %bb.0: 13555; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13556; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13557; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13558; SKX-NEXT: retq # sched: [7:1.00] 13559 %vec2 = load <8 x double>, <8 x double>* %vec2p 13560 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13561 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13563 ret <8 x double> %res 13564} 13565 13566define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13567; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: 13568; GENERIC: # %bb.0: 13569; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13570; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13571; GENERIC-NEXT: retq # sched: [1:1.00] 13572; 13573; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: 13574; SKX: # %bb.0: 13575; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13576; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13577; SKX-NEXT: retq # sched: [7:1.00] 13578 %vec2 = load <8 x double>, <8 x double>* %vec2p 13579 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13580 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13581 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13582 ret <8 x double> %res 13583} 13584 13585define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 13586; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3: 13587; GENERIC: # %bb.0: 13588; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13589; GENERIC-NEXT: retq # sched: [1:1.00] 13590; 13591; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3: 13592; SKX: # %bb.0: 13593; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13594; SKX-NEXT: retq # sched: [7:1.00] 13595 %vec2 = load <8 x double>, <8 x double>* %vec2p 13596 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13597 ret <8 x double> %res 13598} 13599define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13600; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: 13601; GENERIC: # %bb.0: 13602; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13603; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13604; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13605; GENERIC-NEXT: retq # sched: [1:1.00] 13606; 13607; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: 13608; SKX: # %bb.0: 13609; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13610; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13611; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13612; SKX-NEXT: retq # sched: [7:1.00] 13613 %vec2 = load <8 x double>, <8 x double>* %vec2p 13614 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13615 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13616 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13617 ret <8 x double> %res 13618} 13619 13620define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13621; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: 13622; GENERIC: # %bb.0: 13623; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13624; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13625; GENERIC-NEXT: retq # sched: [1:1.00] 13626; 13627; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: 13628; SKX: # %bb.0: 13629; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13630; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13631; SKX-NEXT: retq # sched: [7:1.00] 13632 %vec2 = load <8 x double>, <8 x double>* %vec2p 13633 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13634 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13635 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13636 ret <8 x double> %res 13637} 13638 13639define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) { 13640; GENERIC-LABEL: test_4xfloat_unpack_high_mask0: 13641; GENERIC: # %bb.0: 13642; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13643; GENERIC-NEXT: retq # sched: [1:1.00] 13644; 13645; SKX-LABEL: test_4xfloat_unpack_high_mask0: 13646; SKX: # %bb.0: 13647; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13648; SKX-NEXT: retq # sched: [7:1.00] 13649 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13650 ret <4 x float> %res 13651} 13652define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13653; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0: 13654; GENERIC: # %bb.0: 13655; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13656; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13657; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13658; GENERIC-NEXT: retq # sched: [1:1.00] 13659; 13660; SKX-LABEL: test_4xfloat_masked_unpack_high_mask0: 13661; SKX: # %bb.0: 13662; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13663; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13664; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13665; SKX-NEXT: retq # sched: [7:1.00] 13666 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13667 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13668 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13669 ret <4 x float> %res 13670} 13671 13672define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13673; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: 13674; GENERIC: # %bb.0: 13675; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13676; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13677; GENERIC-NEXT: retq # sched: [1:1.00] 13678; 13679; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: 13680; SKX: # %bb.0: 13681; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13682; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13683; SKX-NEXT: retq # sched: [7:1.00] 13684 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13685 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13686 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13687 ret <4 x float> %res 13688} 13689define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13690; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1: 13691; GENERIC: # %bb.0: 13692; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13693; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13694; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13695; GENERIC-NEXT: retq # sched: [1:1.00] 13696; 13697; SKX-LABEL: test_4xfloat_masked_unpack_high_mask1: 13698; SKX: # %bb.0: 13699; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13700; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13701; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13702; SKX-NEXT: retq # sched: [7:1.00] 13703 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13704 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13705 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13706 ret <4 x float> %res 13707} 13708 13709define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13710; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: 13711; GENERIC: # %bb.0: 13712; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13713; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13714; GENERIC-NEXT: retq # sched: [1:1.00] 13715; 13716; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: 13717; SKX: # %bb.0: 13718; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13719; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13720; SKX-NEXT: retq # sched: [7:1.00] 13721 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13722 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13723 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13724 ret <4 x float> %res 13725} 13726define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13727; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2: 13728; GENERIC: # %bb.0: 13729; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13730; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13731; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13732; GENERIC-NEXT: retq # sched: [1:1.00] 13733; 13734; SKX-LABEL: test_4xfloat_masked_unpack_high_mask2: 13735; SKX: # %bb.0: 13736; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13737; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13738; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13739; SKX-NEXT: retq # sched: [7:1.00] 13740 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13741 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13742 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13743 ret <4 x float> %res 13744} 13745 13746define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13747; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: 13748; GENERIC: # %bb.0: 13749; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13750; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13751; GENERIC-NEXT: retq # sched: [1:1.00] 13752; 13753; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: 13754; SKX: # %bb.0: 13755; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13756; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13757; SKX-NEXT: retq # sched: [7:1.00] 13758 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13759 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13760 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13761 ret <4 x float> %res 13762} 13763define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) { 13764; GENERIC-LABEL: test_4xfloat_unpack_high_mask3: 13765; GENERIC: # %bb.0: 13766; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13767; GENERIC-NEXT: retq # sched: [1:1.00] 13768; 13769; SKX-LABEL: test_4xfloat_unpack_high_mask3: 13770; SKX: # %bb.0: 13771; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13772; SKX-NEXT: retq # sched: [7:1.00] 13773 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13774 ret <4 x float> %res 13775} 13776define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13777; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3: 13778; GENERIC: # %bb.0: 13779; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13780; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13781; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13782; GENERIC-NEXT: retq # sched: [1:1.00] 13783; 13784; SKX-LABEL: test_4xfloat_masked_unpack_high_mask3: 13785; SKX: # %bb.0: 13786; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13787; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13788; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13789; SKX-NEXT: retq # sched: [7:1.00] 13790 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13791 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13792 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13793 ret <4 x float> %res 13794} 13795 13796define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13797; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: 13798; GENERIC: # %bb.0: 13799; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13800; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13801; GENERIC-NEXT: retq # sched: [1:1.00] 13802; 13803; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: 13804; SKX: # %bb.0: 13805; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13806; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13807; SKX-NEXT: retq # sched: [7:1.00] 13808 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13809 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13810 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13811 ret <4 x float> %res 13812} 13813define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 13814; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask0: 13815; GENERIC: # %bb.0: 13816; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13817; GENERIC-NEXT: retq # sched: [1:1.00] 13818; 13819; SKX-LABEL: test_4xfloat_unpack_high_mem_mask0: 13820; SKX: # %bb.0: 13821; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13822; SKX-NEXT: retq # sched: [7:1.00] 13823 %vec2 = load <4 x float>, <4 x float>* %vec2p 13824 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13825 ret <4 x float> %res 13826} 13827define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13828; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: 13829; GENERIC: # %bb.0: 13830; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13831; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13832; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13833; GENERIC-NEXT: retq # sched: [1:1.00] 13834; 13835; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: 13836; SKX: # %bb.0: 13837; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13838; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13839; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13840; SKX-NEXT: retq # sched: [7:1.00] 13841 %vec2 = load <4 x float>, <4 x float>* %vec2p 13842 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13843 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13844 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13845 ret <4 x float> %res 13846} 13847 13848define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13849; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: 13850; GENERIC: # %bb.0: 13851; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13852; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13853; GENERIC-NEXT: retq # sched: [1:1.00] 13854; 13855; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: 13856; SKX: # %bb.0: 13857; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13858; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13859; SKX-NEXT: retq # sched: [7:1.00] 13860 %vec2 = load <4 x float>, <4 x float>* %vec2p 13861 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13862 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13863 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13864 ret <4 x float> %res 13865} 13866 13867define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13868; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: 13869; GENERIC: # %bb.0: 13870; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13871; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13872; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13873; GENERIC-NEXT: retq # sched: [1:1.00] 13874; 13875; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: 13876; SKX: # %bb.0: 13877; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13878; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13879; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13880; SKX-NEXT: retq # sched: [7:1.00] 13881 %vec2 = load <4 x float>, <4 x float>* %vec2p 13882 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13883 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13884 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13885 ret <4 x float> %res 13886} 13887 13888define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13889; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: 13890; GENERIC: # %bb.0: 13891; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13892; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13893; GENERIC-NEXT: retq # sched: [1:1.00] 13894; 13895; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: 13896; SKX: # %bb.0: 13897; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13898; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13899; SKX-NEXT: retq # sched: [7:1.00] 13900 %vec2 = load <4 x float>, <4 x float>* %vec2p 13901 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13902 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13903 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13904 ret <4 x float> %res 13905} 13906 13907define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13908; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: 13909; GENERIC: # %bb.0: 13910; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13911; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13912; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13913; GENERIC-NEXT: retq # sched: [1:1.00] 13914; 13915; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: 13916; SKX: # %bb.0: 13917; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13918; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13919; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13920; SKX-NEXT: retq # sched: [7:1.00] 13921 %vec2 = load <4 x float>, <4 x float>* %vec2p 13922 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13923 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13924 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13925 ret <4 x float> %res 13926} 13927 13928define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13929; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: 13930; GENERIC: # %bb.0: 13931; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13932; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13933; GENERIC-NEXT: retq # sched: [1:1.00] 13934; 13935; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: 13936; SKX: # %bb.0: 13937; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13938; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13939; SKX-NEXT: retq # sched: [7:1.00] 13940 %vec2 = load <4 x float>, <4 x float>* %vec2p 13941 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13942 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13943 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13944 ret <4 x float> %res 13945} 13946 13947define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 13948; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask3: 13949; GENERIC: # %bb.0: 13950; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13951; GENERIC-NEXT: retq # sched: [1:1.00] 13952; 13953; SKX-LABEL: test_4xfloat_unpack_high_mem_mask3: 13954; SKX: # %bb.0: 13955; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13956; SKX-NEXT: retq # sched: [7:1.00] 13957 %vec2 = load <4 x float>, <4 x float>* %vec2p 13958 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13959 ret <4 x float> %res 13960} 13961define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13962; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: 13963; GENERIC: # %bb.0: 13964; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13965; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13966; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13967; GENERIC-NEXT: retq # sched: [1:1.00] 13968; 13969; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: 13970; SKX: # %bb.0: 13971; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13972; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13973; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13974; SKX-NEXT: retq # sched: [7:1.00] 13975 %vec2 = load <4 x float>, <4 x float>* %vec2p 13976 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13977 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13978 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13979 ret <4 x float> %res 13980} 13981 13982define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13983; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: 13984; GENERIC: # %bb.0: 13985; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13986; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13987; GENERIC-NEXT: retq # sched: [1:1.00] 13988; 13989; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: 13990; SKX: # %bb.0: 13991; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13992; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13993; SKX-NEXT: retq # sched: [7:1.00] 13994 %vec2 = load <4 x float>, <4 x float>* %vec2p 13995 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13996 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13997 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13998 ret <4 x float> %res 13999} 14000 14001define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) { 14002; GENERIC-LABEL: test_8xfloat_unpack_high_mask0: 14003; GENERIC: # %bb.0: 14004; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14005; GENERIC-NEXT: retq # sched: [1:1.00] 14006; 14007; SKX-LABEL: test_8xfloat_unpack_high_mask0: 14008; SKX: # %bb.0: 14009; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14010; SKX-NEXT: retq # sched: [7:1.00] 14011 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14012 ret <8 x float> %res 14013} 14014define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14015; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0: 14016; GENERIC: # %bb.0: 14017; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14018; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14019; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14020; GENERIC-NEXT: retq # sched: [1:1.00] 14021; 14022; SKX-LABEL: test_8xfloat_masked_unpack_high_mask0: 14023; SKX: # %bb.0: 14024; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14025; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14026; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14027; SKX-NEXT: retq # sched: [7:1.00] 14028 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14029 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14030 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14031 ret <8 x float> %res 14032} 14033 14034define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14035; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: 14036; GENERIC: # %bb.0: 14037; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14038; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14039; GENERIC-NEXT: retq # sched: [1:1.00] 14040; 14041; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: 14042; SKX: # %bb.0: 14043; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14044; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14045; SKX-NEXT: retq # sched: [7:1.00] 14046 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14047 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14048 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14049 ret <8 x float> %res 14050} 14051define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14052; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1: 14053; GENERIC: # %bb.0: 14054; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14055; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14056; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14057; GENERIC-NEXT: retq # sched: [1:1.00] 14058; 14059; SKX-LABEL: test_8xfloat_masked_unpack_high_mask1: 14060; SKX: # %bb.0: 14061; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14062; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14063; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14064; SKX-NEXT: retq # sched: [7:1.00] 14065 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14066 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14067 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14068 ret <8 x float> %res 14069} 14070 14071define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14072; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: 14073; GENERIC: # %bb.0: 14074; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14075; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14076; GENERIC-NEXT: retq # sched: [1:1.00] 14077; 14078; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: 14079; SKX: # %bb.0: 14080; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14081; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14082; SKX-NEXT: retq # sched: [7:1.00] 14083 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14084 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14085 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14086 ret <8 x float> %res 14087} 14088define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14089; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2: 14090; GENERIC: # %bb.0: 14091; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14092; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14093; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14094; GENERIC-NEXT: retq # sched: [1:1.00] 14095; 14096; SKX-LABEL: test_8xfloat_masked_unpack_high_mask2: 14097; SKX: # %bb.0: 14098; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14099; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14100; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14101; SKX-NEXT: retq # sched: [7:1.00] 14102 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14103 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14104 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14105 ret <8 x float> %res 14106} 14107 14108define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14109; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: 14110; GENERIC: # %bb.0: 14111; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14112; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14113; GENERIC-NEXT: retq # sched: [1:1.00] 14114; 14115; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: 14116; SKX: # %bb.0: 14117; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14118; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14119; SKX-NEXT: retq # sched: [7:1.00] 14120 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14121 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14122 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14123 ret <8 x float> %res 14124} 14125define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) { 14126; GENERIC-LABEL: test_8xfloat_unpack_high_mask3: 14127; GENERIC: # %bb.0: 14128; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14129; GENERIC-NEXT: retq # sched: [1:1.00] 14130; 14131; SKX-LABEL: test_8xfloat_unpack_high_mask3: 14132; SKX: # %bb.0: 14133; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14134; SKX-NEXT: retq # sched: [7:1.00] 14135 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14136 ret <8 x float> %res 14137} 14138define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14139; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3: 14140; GENERIC: # %bb.0: 14141; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14142; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14143; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14144; GENERIC-NEXT: retq # sched: [1:1.00] 14145; 14146; SKX-LABEL: test_8xfloat_masked_unpack_high_mask3: 14147; SKX: # %bb.0: 14148; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14149; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14150; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14151; SKX-NEXT: retq # sched: [7:1.00] 14152 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14153 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14154 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14155 ret <8 x float> %res 14156} 14157 14158define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14159; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: 14160; GENERIC: # %bb.0: 14161; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14162; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14163; GENERIC-NEXT: retq # sched: [1:1.00] 14164; 14165; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: 14166; SKX: # %bb.0: 14167; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14168; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14169; SKX-NEXT: retq # sched: [7:1.00] 14170 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14171 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14172 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14173 ret <8 x float> %res 14174} 14175define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 14176; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask0: 14177; GENERIC: # %bb.0: 14178; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14179; GENERIC-NEXT: retq # sched: [1:1.00] 14180; 14181; SKX-LABEL: test_8xfloat_unpack_high_mem_mask0: 14182; SKX: # %bb.0: 14183; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14184; SKX-NEXT: retq # sched: [7:1.00] 14185 %vec2 = load <8 x float>, <8 x float>* %vec2p 14186 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14187 ret <8 x float> %res 14188} 14189define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14190; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: 14191; GENERIC: # %bb.0: 14192; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14193; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14194; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14195; GENERIC-NEXT: retq # sched: [1:1.00] 14196; 14197; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: 14198; SKX: # %bb.0: 14199; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14200; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14201; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14202; SKX-NEXT: retq # sched: [7:1.00] 14203 %vec2 = load <8 x float>, <8 x float>* %vec2p 14204 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14205 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14206 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14207 ret <8 x float> %res 14208} 14209 14210define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14211; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: 14212; GENERIC: # %bb.0: 14213; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14214; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14215; GENERIC-NEXT: retq # sched: [1:1.00] 14216; 14217; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: 14218; SKX: # %bb.0: 14219; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14220; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14221; SKX-NEXT: retq # sched: [7:1.00] 14222 %vec2 = load <8 x float>, <8 x float>* %vec2p 14223 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14224 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14225 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14226 ret <8 x float> %res 14227} 14228 14229define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14230; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: 14231; GENERIC: # %bb.0: 14232; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14233; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14234; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14235; GENERIC-NEXT: retq # sched: [1:1.00] 14236; 14237; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: 14238; SKX: # %bb.0: 14239; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14240; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14241; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14242; SKX-NEXT: retq # sched: [7:1.00] 14243 %vec2 = load <8 x float>, <8 x float>* %vec2p 14244 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14245 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14246 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14247 ret <8 x float> %res 14248} 14249 14250define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14251; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: 14252; GENERIC: # %bb.0: 14253; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14254; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14255; GENERIC-NEXT: retq # sched: [1:1.00] 14256; 14257; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: 14258; SKX: # %bb.0: 14259; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14260; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14261; SKX-NEXT: retq # sched: [7:1.00] 14262 %vec2 = load <8 x float>, <8 x float>* %vec2p 14263 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14264 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14265 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14266 ret <8 x float> %res 14267} 14268 14269define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14270; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: 14271; GENERIC: # %bb.0: 14272; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14273; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14274; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14275; GENERIC-NEXT: retq # sched: [1:1.00] 14276; 14277; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: 14278; SKX: # %bb.0: 14279; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14280; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14281; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14282; SKX-NEXT: retq # sched: [7:1.00] 14283 %vec2 = load <8 x float>, <8 x float>* %vec2p 14284 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14285 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14286 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14287 ret <8 x float> %res 14288} 14289 14290define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14291; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: 14292; GENERIC: # %bb.0: 14293; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14294; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14295; GENERIC-NEXT: retq # sched: [1:1.00] 14296; 14297; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: 14298; SKX: # %bb.0: 14299; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14300; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14301; SKX-NEXT: retq # sched: [7:1.00] 14302 %vec2 = load <8 x float>, <8 x float>* %vec2p 14303 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14304 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14305 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14306 ret <8 x float> %res 14307} 14308 14309define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 14310; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask3: 14311; GENERIC: # %bb.0: 14312; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14313; GENERIC-NEXT: retq # sched: [1:1.00] 14314; 14315; SKX-LABEL: test_8xfloat_unpack_high_mem_mask3: 14316; SKX: # %bb.0: 14317; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14318; SKX-NEXT: retq # sched: [7:1.00] 14319 %vec2 = load <8 x float>, <8 x float>* %vec2p 14320 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14321 ret <8 x float> %res 14322} 14323define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14324; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: 14325; GENERIC: # %bb.0: 14326; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14327; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14328; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14329; GENERIC-NEXT: retq # sched: [1:1.00] 14330; 14331; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: 14332; SKX: # %bb.0: 14333; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14334; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14335; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14336; SKX-NEXT: retq # sched: [7:1.00] 14337 %vec2 = load <8 x float>, <8 x float>* %vec2p 14338 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14339 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14340 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14341 ret <8 x float> %res 14342} 14343 14344define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14345; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: 14346; GENERIC: # %bb.0: 14347; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14348; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14349; GENERIC-NEXT: retq # sched: [1:1.00] 14350; 14351; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: 14352; SKX: # %bb.0: 14353; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14354; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14355; SKX-NEXT: retq # sched: [7:1.00] 14356 %vec2 = load <8 x float>, <8 x float>* %vec2p 14357 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14358 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14359 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14360 ret <8 x float> %res 14361} 14362 14363define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) { 14364; GENERIC-LABEL: test_16xfloat_unpack_high_mask0: 14365; GENERIC: # %bb.0: 14366; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14367; GENERIC-NEXT: retq # sched: [1:1.00] 14368; 14369; SKX-LABEL: test_16xfloat_unpack_high_mask0: 14370; SKX: # %bb.0: 14371; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14372; SKX-NEXT: retq # sched: [7:1.00] 14373 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14374 ret <16 x float> %res 14375} 14376define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14377; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0: 14378; GENERIC: # %bb.0: 14379; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14380; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14381; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14382; GENERIC-NEXT: retq # sched: [1:1.00] 14383; 14384; SKX-LABEL: test_16xfloat_masked_unpack_high_mask0: 14385; SKX: # %bb.0: 14386; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14387; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14388; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14389; SKX-NEXT: retq # sched: [7:1.00] 14390 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14391 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14392 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14393 ret <16 x float> %res 14394} 14395 14396define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14397; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: 14398; GENERIC: # %bb.0: 14399; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14400; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14401; GENERIC-NEXT: retq # sched: [1:1.00] 14402; 14403; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: 14404; SKX: # %bb.0: 14405; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14406; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14407; SKX-NEXT: retq # sched: [7:1.00] 14408 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14409 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14410 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14411 ret <16 x float> %res 14412} 14413define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14414; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1: 14415; GENERIC: # %bb.0: 14416; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14417; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14418; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14419; GENERIC-NEXT: retq # sched: [1:1.00] 14420; 14421; SKX-LABEL: test_16xfloat_masked_unpack_high_mask1: 14422; SKX: # %bb.0: 14423; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14424; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14425; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14426; SKX-NEXT: retq # sched: [7:1.00] 14427 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14428 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14429 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14430 ret <16 x float> %res 14431} 14432 14433define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14434; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: 14435; GENERIC: # %bb.0: 14436; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14437; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14438; GENERIC-NEXT: retq # sched: [1:1.00] 14439; 14440; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: 14441; SKX: # %bb.0: 14442; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14443; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14444; SKX-NEXT: retq # sched: [7:1.00] 14445 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14446 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14447 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14448 ret <16 x float> %res 14449} 14450define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14451; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2: 14452; GENERIC: # %bb.0: 14453; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14454; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14455; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14456; GENERIC-NEXT: retq # sched: [1:1.00] 14457; 14458; SKX-LABEL: test_16xfloat_masked_unpack_high_mask2: 14459; SKX: # %bb.0: 14460; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14461; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14462; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14463; SKX-NEXT: retq # sched: [7:1.00] 14464 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14465 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14466 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14467 ret <16 x float> %res 14468} 14469 14470define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14471; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: 14472; GENERIC: # %bb.0: 14473; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14474; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14475; GENERIC-NEXT: retq # sched: [1:1.00] 14476; 14477; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: 14478; SKX: # %bb.0: 14479; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14480; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14481; SKX-NEXT: retq # sched: [7:1.00] 14482 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14483 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14484 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14485 ret <16 x float> %res 14486} 14487define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) { 14488; GENERIC-LABEL: test_16xfloat_unpack_high_mask3: 14489; GENERIC: # %bb.0: 14490; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14491; GENERIC-NEXT: retq # sched: [1:1.00] 14492; 14493; SKX-LABEL: test_16xfloat_unpack_high_mask3: 14494; SKX: # %bb.0: 14495; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14496; SKX-NEXT: retq # sched: [7:1.00] 14497 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14498 ret <16 x float> %res 14499} 14500define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14501; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3: 14502; GENERIC: # %bb.0: 14503; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14504; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14505; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14506; GENERIC-NEXT: retq # sched: [1:1.00] 14507; 14508; SKX-LABEL: test_16xfloat_masked_unpack_high_mask3: 14509; SKX: # %bb.0: 14510; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14511; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14512; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14513; SKX-NEXT: retq # sched: [7:1.00] 14514 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14515 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14516 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14517 ret <16 x float> %res 14518} 14519 14520define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14521; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: 14522; GENERIC: # %bb.0: 14523; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14524; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14525; GENERIC-NEXT: retq # sched: [1:1.00] 14526; 14527; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: 14528; SKX: # %bb.0: 14529; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14530; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14531; SKX-NEXT: retq # sched: [7:1.00] 14532 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14533 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14534 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14535 ret <16 x float> %res 14536} 14537define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 14538; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0: 14539; GENERIC: # %bb.0: 14540; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14541; GENERIC-NEXT: retq # sched: [1:1.00] 14542; 14543; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0: 14544; SKX: # %bb.0: 14545; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14546; SKX-NEXT: retq # sched: [7:1.00] 14547 %vec2 = load <16 x float>, <16 x float>* %vec2p 14548 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14549 ret <16 x float> %res 14550} 14551define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14552; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: 14553; GENERIC: # %bb.0: 14554; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14555; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14556; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14557; GENERIC-NEXT: retq # sched: [1:1.00] 14558; 14559; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: 14560; SKX: # %bb.0: 14561; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14562; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14563; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14564; SKX-NEXT: retq # sched: [7:1.00] 14565 %vec2 = load <16 x float>, <16 x float>* %vec2p 14566 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14567 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14568 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14569 ret <16 x float> %res 14570} 14571 14572define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14573; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: 14574; GENERIC: # %bb.0: 14575; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14576; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14577; GENERIC-NEXT: retq # sched: [1:1.00] 14578; 14579; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: 14580; SKX: # %bb.0: 14581; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14582; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14583; SKX-NEXT: retq # sched: [7:1.00] 14584 %vec2 = load <16 x float>, <16 x float>* %vec2p 14585 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14586 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14587 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14588 ret <16 x float> %res 14589} 14590 14591define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14592; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: 14593; GENERIC: # %bb.0: 14594; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14595; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14596; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14597; GENERIC-NEXT: retq # sched: [1:1.00] 14598; 14599; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: 14600; SKX: # %bb.0: 14601; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14602; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14603; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14604; SKX-NEXT: retq # sched: [7:1.00] 14605 %vec2 = load <16 x float>, <16 x float>* %vec2p 14606 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14607 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14608 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14609 ret <16 x float> %res 14610} 14611 14612define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14613; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: 14614; GENERIC: # %bb.0: 14615; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14616; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14617; GENERIC-NEXT: retq # sched: [1:1.00] 14618; 14619; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: 14620; SKX: # %bb.0: 14621; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14622; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14623; SKX-NEXT: retq # sched: [7:1.00] 14624 %vec2 = load <16 x float>, <16 x float>* %vec2p 14625 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14626 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14627 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14628 ret <16 x float> %res 14629} 14630 14631define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14632; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: 14633; GENERIC: # %bb.0: 14634; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14635; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14636; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14637; GENERIC-NEXT: retq # sched: [1:1.00] 14638; 14639; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: 14640; SKX: # %bb.0: 14641; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14642; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14643; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14644; SKX-NEXT: retq # sched: [7:1.00] 14645 %vec2 = load <16 x float>, <16 x float>* %vec2p 14646 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14647 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14648 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14649 ret <16 x float> %res 14650} 14651 14652define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14653; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: 14654; GENERIC: # %bb.0: 14655; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14656; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14657; GENERIC-NEXT: retq # sched: [1:1.00] 14658; 14659; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: 14660; SKX: # %bb.0: 14661; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14662; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14663; SKX-NEXT: retq # sched: [7:1.00] 14664 %vec2 = load <16 x float>, <16 x float>* %vec2p 14665 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14666 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14667 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14668 ret <16 x float> %res 14669} 14670 14671define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 14672; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3: 14673; GENERIC: # %bb.0: 14674; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14675; GENERIC-NEXT: retq # sched: [1:1.00] 14676; 14677; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3: 14678; SKX: # %bb.0: 14679; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14680; SKX-NEXT: retq # sched: [7:1.00] 14681 %vec2 = load <16 x float>, <16 x float>* %vec2p 14682 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14683 ret <16 x float> %res 14684} 14685define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14686; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: 14687; GENERIC: # %bb.0: 14688; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14689; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14690; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14691; GENERIC-NEXT: retq # sched: [1:1.00] 14692; 14693; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: 14694; SKX: # %bb.0: 14695; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14696; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14697; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14698; SKX-NEXT: retq # sched: [7:1.00] 14699 %vec2 = load <16 x float>, <16 x float>* %vec2p 14700 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14701 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14702 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14703 ret <16 x float> %res 14704} 14705 14706define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14707; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: 14708; GENERIC: # %bb.0: 14709; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14710; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14711; GENERIC-NEXT: retq # sched: [1:1.00] 14712; 14713; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: 14714; SKX: # %bb.0: 14715; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14716; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14717; SKX-NEXT: retq # sched: [7:1.00] 14718 %vec2 = load <16 x float>, <16 x float>* %vec2p 14719 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14720 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14721 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14722 ret <16 x float> %res 14723} 14724 14725define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) { 14726; GENERIC-LABEL: test_2xdouble_unpack_high_mask0: 14727; GENERIC: # %bb.0: 14728; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14729; GENERIC-NEXT: retq # sched: [1:1.00] 14730; 14731; SKX-LABEL: test_2xdouble_unpack_high_mask0: 14732; SKX: # %bb.0: 14733; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14734; SKX-NEXT: retq # sched: [7:1.00] 14735 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14736 ret <2 x double> %res 14737} 14738define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 14739; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0: 14740; GENERIC: # %bb.0: 14741; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 14742; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14743; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 14744; GENERIC-NEXT: retq # sched: [1:1.00] 14745; 14746; SKX-LABEL: test_2xdouble_masked_unpack_high_mask0: 14747; SKX: # %bb.0: 14748; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 14749; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14750; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 14751; SKX-NEXT: retq # sched: [7:1.00] 14752 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14753 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14754 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14755 ret <2 x double> %res 14756} 14757 14758define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 14759; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: 14760; GENERIC: # %bb.0: 14761; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14762; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14763; GENERIC-NEXT: retq # sched: [1:1.00] 14764; 14765; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: 14766; SKX: # %bb.0: 14767; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14768; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14769; SKX-NEXT: retq # sched: [7:1.00] 14770 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14771 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14772 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14773 ret <2 x double> %res 14774} 14775define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 14776; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1: 14777; GENERIC: # %bb.0: 14778; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 14779; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14780; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 14781; GENERIC-NEXT: retq # sched: [1:1.00] 14782; 14783; SKX-LABEL: test_2xdouble_masked_unpack_high_mask1: 14784; SKX: # %bb.0: 14785; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 14786; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14787; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 14788; SKX-NEXT: retq # sched: [7:1.00] 14789 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14790 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14791 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14792 ret <2 x double> %res 14793} 14794 14795define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 14796; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: 14797; GENERIC: # %bb.0: 14798; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14799; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14800; GENERIC-NEXT: retq # sched: [1:1.00] 14801; 14802; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: 14803; SKX: # %bb.0: 14804; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14805; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14806; SKX-NEXT: retq # sched: [7:1.00] 14807 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14808 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14809 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14810 ret <2 x double> %res 14811} 14812define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 14813; GENERIC-LABEL: test_2xdouble_unpack_high_mem_mask0: 14814; GENERIC: # %bb.0: 14815; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] 14816; GENERIC-NEXT: retq # sched: [1:1.00] 14817; 14818; SKX-LABEL: test_2xdouble_unpack_high_mem_mask0: 14819; SKX: # %bb.0: 14820; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] 14821; SKX-NEXT: retq # sched: [7:1.00] 14822 %vec2 = load <2 x double>, <2 x double>* %vec2p 14823 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14824 ret <2 x double> %res 14825} 14826define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 14827; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: 14828; GENERIC: # %bb.0: 14829; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14830; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14831; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 14832; GENERIC-NEXT: retq # sched: [1:1.00] 14833; 14834; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: 14835; SKX: # %bb.0: 14836; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14837; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14838; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 14839; SKX-NEXT: retq # sched: [7:1.00] 14840 %vec2 = load <2 x double>, <2 x double>* %vec2p 14841 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14842 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14843 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14844 ret <2 x double> %res 14845} 14846 14847define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 14848; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: 14849; GENERIC: # %bb.0: 14850; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 14851; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14852; GENERIC-NEXT: retq # sched: [1:1.00] 14853; 14854; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: 14855; SKX: # %bb.0: 14856; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 14857; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14858; SKX-NEXT: retq # sched: [7:1.00] 14859 %vec2 = load <2 x double>, <2 x double>* %vec2p 14860 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14861 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14862 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14863 ret <2 x double> %res 14864} 14865 14866define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 14867; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: 14868; GENERIC: # %bb.0: 14869; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14870; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14871; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 14872; GENERIC-NEXT: retq # sched: [1:1.00] 14873; 14874; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: 14875; SKX: # %bb.0: 14876; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14877; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14878; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 14879; SKX-NEXT: retq # sched: [7:1.00] 14880 %vec2 = load <2 x double>, <2 x double>* %vec2p 14881 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14882 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14883 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14884 ret <2 x double> %res 14885} 14886 14887define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 14888; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: 14889; GENERIC: # %bb.0: 14890; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 14891; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14892; GENERIC-NEXT: retq # sched: [1:1.00] 14893; 14894; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: 14895; SKX: # %bb.0: 14896; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 14897; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14898; SKX-NEXT: retq # sched: [7:1.00] 14899 %vec2 = load <2 x double>, <2 x double>* %vec2p 14900 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14901 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14902 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14903 ret <2 x double> %res 14904} 14905 14906define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) { 14907; GENERIC-LABEL: test_4xdouble_unpack_high_mask0: 14908; GENERIC: # %bb.0: 14909; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14910; GENERIC-NEXT: retq # sched: [1:1.00] 14911; 14912; SKX-LABEL: test_4xdouble_unpack_high_mask0: 14913; SKX: # %bb.0: 14914; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14915; SKX-NEXT: retq # sched: [7:1.00] 14916 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14917 ret <4 x double> %res 14918} 14919define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 14920; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0: 14921; GENERIC: # %bb.0: 14922; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 14923; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14924; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 14925; GENERIC-NEXT: retq # sched: [1:1.00] 14926; 14927; SKX-LABEL: test_4xdouble_masked_unpack_high_mask0: 14928; SKX: # %bb.0: 14929; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 14930; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14931; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 14932; SKX-NEXT: retq # sched: [7:1.00] 14933 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14934 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14935 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 14936 ret <4 x double> %res 14937} 14938 14939define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 14940; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: 14941; GENERIC: # %bb.0: 14942; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 14943; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14944; GENERIC-NEXT: retq # sched: [1:1.00] 14945; 14946; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: 14947; SKX: # %bb.0: 14948; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 14949; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14950; SKX-NEXT: retq # sched: [7:1.00] 14951 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14952 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14953 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 14954 ret <4 x double> %res 14955} 14956define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 14957; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1: 14958; GENERIC: # %bb.0: 14959; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 14960; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14961; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 14962; GENERIC-NEXT: retq # sched: [1:1.00] 14963; 14964; SKX-LABEL: test_4xdouble_masked_unpack_high_mask1: 14965; SKX: # %bb.0: 14966; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 14967; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14968; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 14969; SKX-NEXT: retq # sched: [7:1.00] 14970 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14971 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14972 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 14973 ret <4 x double> %res 14974} 14975 14976define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 14977; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: 14978; GENERIC: # %bb.0: 14979; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 14980; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14981; GENERIC-NEXT: retq # sched: [1:1.00] 14982; 14983; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: 14984; SKX: # %bb.0: 14985; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 14986; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14987; SKX-NEXT: retq # sched: [7:1.00] 14988 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14989 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14990 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 14991 ret <4 x double> %res 14992} 14993define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 14994; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2: 14995; GENERIC: # %bb.0: 14996; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 14997; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14998; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 14999; GENERIC-NEXT: retq # sched: [1:1.00] 15000; 15001; SKX-LABEL: test_4xdouble_masked_unpack_high_mask2: 15002; SKX: # %bb.0: 15003; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 15004; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15005; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 15006; SKX-NEXT: retq # sched: [7:1.00] 15007 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15008 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15009 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15010 ret <4 x double> %res 15011} 15012 15013define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 15014; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: 15015; GENERIC: # %bb.0: 15016; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15017; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15018; GENERIC-NEXT: retq # sched: [1:1.00] 15019; 15020; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: 15021; SKX: # %bb.0: 15022; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15023; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15024; SKX-NEXT: retq # sched: [7:1.00] 15025 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15026 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15027 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15028 ret <4 x double> %res 15029} 15030define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) { 15031; GENERIC-LABEL: test_4xdouble_unpack_high_mask3: 15032; GENERIC: # %bb.0: 15033; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15034; GENERIC-NEXT: retq # sched: [1:1.00] 15035; 15036; SKX-LABEL: test_4xdouble_unpack_high_mask3: 15037; SKX: # %bb.0: 15038; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15039; SKX-NEXT: retq # sched: [7:1.00] 15040 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15041 ret <4 x double> %res 15042} 15043define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 15044; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3: 15045; GENERIC: # %bb.0: 15046; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 15047; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15048; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 15049; GENERIC-NEXT: retq # sched: [1:1.00] 15050; 15051; SKX-LABEL: test_4xdouble_masked_unpack_high_mask3: 15052; SKX: # %bb.0: 15053; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 15054; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15055; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 15056; SKX-NEXT: retq # sched: [7:1.00] 15057 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15058 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15059 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15060 ret <4 x double> %res 15061} 15062 15063define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 15064; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: 15065; GENERIC: # %bb.0: 15066; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15067; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15068; GENERIC-NEXT: retq # sched: [1:1.00] 15069; 15070; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: 15071; SKX: # %bb.0: 15072; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15073; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15074; SKX-NEXT: retq # sched: [7:1.00] 15075 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15076 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15077 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15078 ret <4 x double> %res 15079} 15080define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 15081; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask0: 15082; GENERIC: # %bb.0: 15083; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15084; GENERIC-NEXT: retq # sched: [1:1.00] 15085; 15086; SKX-LABEL: test_4xdouble_unpack_high_mem_mask0: 15087; SKX: # %bb.0: 15088; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15089; SKX-NEXT: retq # sched: [7:1.00] 15090 %vec2 = load <4 x double>, <4 x double>* %vec2p 15091 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15092 ret <4 x double> %res 15093} 15094define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15095; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: 15096; GENERIC: # %bb.0: 15097; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15098; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15099; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15100; GENERIC-NEXT: retq # sched: [1:1.00] 15101; 15102; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: 15103; SKX: # %bb.0: 15104; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15105; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15106; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15107; SKX-NEXT: retq # sched: [7:1.00] 15108 %vec2 = load <4 x double>, <4 x double>* %vec2p 15109 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15110 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15111 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15112 ret <4 x double> %res 15113} 15114 15115define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15116; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: 15117; GENERIC: # %bb.0: 15118; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15119; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15120; GENERIC-NEXT: retq # sched: [1:1.00] 15121; 15122; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: 15123; SKX: # %bb.0: 15124; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15125; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15126; SKX-NEXT: retq # sched: [7:1.00] 15127 %vec2 = load <4 x double>, <4 x double>* %vec2p 15128 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15129 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15130 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15131 ret <4 x double> %res 15132} 15133 15134define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15135; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: 15136; GENERIC: # %bb.0: 15137; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15138; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15139; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15140; GENERIC-NEXT: retq # sched: [1:1.00] 15141; 15142; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: 15143; SKX: # %bb.0: 15144; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15145; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15146; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15147; SKX-NEXT: retq # sched: [7:1.00] 15148 %vec2 = load <4 x double>, <4 x double>* %vec2p 15149 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15150 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15151 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15152 ret <4 x double> %res 15153} 15154 15155define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15156; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: 15157; GENERIC: # %bb.0: 15158; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15159; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15160; GENERIC-NEXT: retq # sched: [1:1.00] 15161; 15162; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: 15163; SKX: # %bb.0: 15164; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15165; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15166; SKX-NEXT: retq # sched: [7:1.00] 15167 %vec2 = load <4 x double>, <4 x double>* %vec2p 15168 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15169 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15170 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15171 ret <4 x double> %res 15172} 15173 15174define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15175; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: 15176; GENERIC: # %bb.0: 15177; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15178; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15179; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15180; GENERIC-NEXT: retq # sched: [1:1.00] 15181; 15182; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: 15183; SKX: # %bb.0: 15184; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15185; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15186; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15187; SKX-NEXT: retq # sched: [7:1.00] 15188 %vec2 = load <4 x double>, <4 x double>* %vec2p 15189 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15190 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15191 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15192 ret <4 x double> %res 15193} 15194 15195define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15196; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: 15197; GENERIC: # %bb.0: 15198; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15199; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15200; GENERIC-NEXT: retq # sched: [1:1.00] 15201; 15202; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: 15203; SKX: # %bb.0: 15204; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15205; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15206; SKX-NEXT: retq # sched: [7:1.00] 15207 %vec2 = load <4 x double>, <4 x double>* %vec2p 15208 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15209 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15210 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15211 ret <4 x double> %res 15212} 15213 15214define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 15215; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask3: 15216; GENERIC: # %bb.0: 15217; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15218; GENERIC-NEXT: retq # sched: [1:1.00] 15219; 15220; SKX-LABEL: test_4xdouble_unpack_high_mem_mask3: 15221; SKX: # %bb.0: 15222; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15223; SKX-NEXT: retq # sched: [7:1.00] 15224 %vec2 = load <4 x double>, <4 x double>* %vec2p 15225 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15226 ret <4 x double> %res 15227} 15228define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15229; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: 15230; GENERIC: # %bb.0: 15231; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15232; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15233; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15234; GENERIC-NEXT: retq # sched: [1:1.00] 15235; 15236; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: 15237; SKX: # %bb.0: 15238; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15239; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15240; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15241; SKX-NEXT: retq # sched: [7:1.00] 15242 %vec2 = load <4 x double>, <4 x double>* %vec2p 15243 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15244 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15245 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15246 ret <4 x double> %res 15247} 15248 15249define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15250; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: 15251; GENERIC: # %bb.0: 15252; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15253; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15254; GENERIC-NEXT: retq # sched: [1:1.00] 15255; 15256; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: 15257; SKX: # %bb.0: 15258; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15259; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15260; SKX-NEXT: retq # sched: [7:1.00] 15261 %vec2 = load <4 x double>, <4 x double>* %vec2p 15262 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15263 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15264 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15265 ret <4 x double> %res 15266} 15267 15268define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) { 15269; GENERIC-LABEL: test_8xdouble_unpack_high_mask0: 15270; GENERIC: # %bb.0: 15271; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15272; GENERIC-NEXT: retq # sched: [1:1.00] 15273; 15274; SKX-LABEL: test_8xdouble_unpack_high_mask0: 15275; SKX: # %bb.0: 15276; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15277; SKX-NEXT: retq # sched: [7:1.00] 15278 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15279 ret <8 x double> %res 15280} 15281define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15282; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0: 15283; GENERIC: # %bb.0: 15284; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15285; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15286; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15287; GENERIC-NEXT: retq # sched: [1:1.00] 15288; 15289; SKX-LABEL: test_8xdouble_masked_unpack_high_mask0: 15290; SKX: # %bb.0: 15291; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15292; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15293; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15294; SKX-NEXT: retq # sched: [7:1.00] 15295 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15296 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15297 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15298 ret <8 x double> %res 15299} 15300 15301define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15302; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: 15303; GENERIC: # %bb.0: 15304; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15305; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15306; GENERIC-NEXT: retq # sched: [1:1.00] 15307; 15308; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: 15309; SKX: # %bb.0: 15310; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15311; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15312; SKX-NEXT: retq # sched: [7:1.00] 15313 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15314 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15315 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15316 ret <8 x double> %res 15317} 15318define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15319; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1: 15320; GENERIC: # %bb.0: 15321; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15322; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15323; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15324; GENERIC-NEXT: retq # sched: [1:1.00] 15325; 15326; SKX-LABEL: test_8xdouble_masked_unpack_high_mask1: 15327; SKX: # %bb.0: 15328; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15329; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15330; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15331; SKX-NEXT: retq # sched: [7:1.00] 15332 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15333 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15334 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15335 ret <8 x double> %res 15336} 15337 15338define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15339; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: 15340; GENERIC: # %bb.0: 15341; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15342; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15343; GENERIC-NEXT: retq # sched: [1:1.00] 15344; 15345; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: 15346; SKX: # %bb.0: 15347; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15348; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15349; SKX-NEXT: retq # sched: [7:1.00] 15350 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15351 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15352 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15353 ret <8 x double> %res 15354} 15355define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15356; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2: 15357; GENERIC: # %bb.0: 15358; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15359; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15360; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15361; GENERIC-NEXT: retq # sched: [1:1.00] 15362; 15363; SKX-LABEL: test_8xdouble_masked_unpack_high_mask2: 15364; SKX: # %bb.0: 15365; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15366; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15367; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15368; SKX-NEXT: retq # sched: [7:1.00] 15369 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15370 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15371 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15372 ret <8 x double> %res 15373} 15374 15375define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15376; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: 15377; GENERIC: # %bb.0: 15378; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15379; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15380; GENERIC-NEXT: retq # sched: [1:1.00] 15381; 15382; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: 15383; SKX: # %bb.0: 15384; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15385; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15386; SKX-NEXT: retq # sched: [7:1.00] 15387 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15388 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15389 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15390 ret <8 x double> %res 15391} 15392define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) { 15393; GENERIC-LABEL: test_8xdouble_unpack_high_mask3: 15394; GENERIC: # %bb.0: 15395; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15396; GENERIC-NEXT: retq # sched: [1:1.00] 15397; 15398; SKX-LABEL: test_8xdouble_unpack_high_mask3: 15399; SKX: # %bb.0: 15400; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15401; SKX-NEXT: retq # sched: [7:1.00] 15402 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15403 ret <8 x double> %res 15404} 15405define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15406; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3: 15407; GENERIC: # %bb.0: 15408; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15409; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15410; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15411; GENERIC-NEXT: retq # sched: [1:1.00] 15412; 15413; SKX-LABEL: test_8xdouble_masked_unpack_high_mask3: 15414; SKX: # %bb.0: 15415; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15416; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15417; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15418; SKX-NEXT: retq # sched: [7:1.00] 15419 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15420 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15421 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15422 ret <8 x double> %res 15423} 15424 15425define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15426; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: 15427; GENERIC: # %bb.0: 15428; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15429; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15430; GENERIC-NEXT: retq # sched: [1:1.00] 15431; 15432; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: 15433; SKX: # %bb.0: 15434; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15435; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15436; SKX-NEXT: retq # sched: [7:1.00] 15437 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15438 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15439 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15440 ret <8 x double> %res 15441} 15442define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 15443; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0: 15444; GENERIC: # %bb.0: 15445; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15446; GENERIC-NEXT: retq # sched: [1:1.00] 15447; 15448; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0: 15449; SKX: # %bb.0: 15450; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15451; SKX-NEXT: retq # sched: [7:1.00] 15452 %vec2 = load <8 x double>, <8 x double>* %vec2p 15453 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15454 ret <8 x double> %res 15455} 15456define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15457; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: 15458; GENERIC: # %bb.0: 15459; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15460; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15461; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15462; GENERIC-NEXT: retq # sched: [1:1.00] 15463; 15464; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: 15465; SKX: # %bb.0: 15466; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15467; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15468; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15469; SKX-NEXT: retq # sched: [7:1.00] 15470 %vec2 = load <8 x double>, <8 x double>* %vec2p 15471 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15472 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15473 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15474 ret <8 x double> %res 15475} 15476 15477define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15478; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: 15479; GENERIC: # %bb.0: 15480; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15481; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15482; GENERIC-NEXT: retq # sched: [1:1.00] 15483; 15484; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: 15485; SKX: # %bb.0: 15486; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15487; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15488; SKX-NEXT: retq # sched: [7:1.00] 15489 %vec2 = load <8 x double>, <8 x double>* %vec2p 15490 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15491 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15492 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15493 ret <8 x double> %res 15494} 15495 15496define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15497; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: 15498; GENERIC: # %bb.0: 15499; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15500; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15501; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15502; GENERIC-NEXT: retq # sched: [1:1.00] 15503; 15504; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: 15505; SKX: # %bb.0: 15506; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15507; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15508; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15509; SKX-NEXT: retq # sched: [7:1.00] 15510 %vec2 = load <8 x double>, <8 x double>* %vec2p 15511 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15512 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15513 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15514 ret <8 x double> %res 15515} 15516 15517define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15518; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: 15519; GENERIC: # %bb.0: 15520; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15521; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15522; GENERIC-NEXT: retq # sched: [1:1.00] 15523; 15524; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: 15525; SKX: # %bb.0: 15526; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15527; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15528; SKX-NEXT: retq # sched: [7:1.00] 15529 %vec2 = load <8 x double>, <8 x double>* %vec2p 15530 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15531 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15532 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15533 ret <8 x double> %res 15534} 15535 15536define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15537; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: 15538; GENERIC: # %bb.0: 15539; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15540; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15541; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15542; GENERIC-NEXT: retq # sched: [1:1.00] 15543; 15544; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: 15545; SKX: # %bb.0: 15546; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15547; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15548; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15549; SKX-NEXT: retq # sched: [7:1.00] 15550 %vec2 = load <8 x double>, <8 x double>* %vec2p 15551 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15552 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15553 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15554 ret <8 x double> %res 15555} 15556 15557define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15558; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: 15559; GENERIC: # %bb.0: 15560; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15561; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15562; GENERIC-NEXT: retq # sched: [1:1.00] 15563; 15564; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: 15565; SKX: # %bb.0: 15566; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15567; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15568; SKX-NEXT: retq # sched: [7:1.00] 15569 %vec2 = load <8 x double>, <8 x double>* %vec2p 15570 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15571 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15572 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15573 ret <8 x double> %res 15574} 15575 15576define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 15577; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3: 15578; GENERIC: # %bb.0: 15579; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15580; GENERIC-NEXT: retq # sched: [1:1.00] 15581; 15582; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3: 15583; SKX: # %bb.0: 15584; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15585; SKX-NEXT: retq # sched: [7:1.00] 15586 %vec2 = load <8 x double>, <8 x double>* %vec2p 15587 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15588 ret <8 x double> %res 15589} 15590define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15591; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: 15592; GENERIC: # %bb.0: 15593; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15594; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15595; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15596; GENERIC-NEXT: retq # sched: [1:1.00] 15597; 15598; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: 15599; SKX: # %bb.0: 15600; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15601; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15602; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15603; SKX-NEXT: retq # sched: [7:1.00] 15604 %vec2 = load <8 x double>, <8 x double>* %vec2p 15605 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15606 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15607 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15608 ret <8 x double> %res 15609} 15610 15611define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15612; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: 15613; GENERIC: # %bb.0: 15614; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15615; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15616; GENERIC-NEXT: retq # sched: [1:1.00] 15617; 15618; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: 15619; SKX: # %bb.0: 15620; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15621; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15622; SKX-NEXT: retq # sched: [7:1.00] 15623 %vec2 = load <8 x double>, <8 x double>* %vec2p 15624 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15625 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15626 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15627 ret <8 x double> %res 15628} 15629 15630