1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s 3 4define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) { 5; CHECK-LABEL: test_16xi16_perm_mask0: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 8; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 9; CHECK-NEXT: retq 10 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 11 ret <16 x i16> %res 12} 13define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 14; CHECK-LABEL: test_masked_16xi16_perm_mask0: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 17; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 18; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} 19; CHECK-NEXT: vmovdqa %ymm1, %ymm0 20; CHECK-NEXT: retq 21 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 22 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 23 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 24 ret <16 x i16> %res 25} 26 27define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) { 28; CHECK-LABEL: test_masked_z_16xi16_perm_mask0: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 31; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 32; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} 33; CHECK-NEXT: retq 34 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 35 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 36 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 37 ret <16 x i16> %res 38} 39define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 40; CHECK-LABEL: test_masked_16xi16_perm_mask1: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] 43; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 44; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} 45; CHECK-NEXT: vmovdqa %ymm1, %ymm0 46; CHECK-NEXT: retq 47 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 48 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 49 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 50 ret <16 x i16> %res 51} 52 53define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) { 54; CHECK-LABEL: test_masked_z_16xi16_perm_mask1: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] 57; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 58; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} 59; CHECK-NEXT: retq 60 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 61 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 62 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 63 ret <16 x i16> %res 64} 65define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 66; CHECK-LABEL: test_masked_16xi16_perm_mask2: 67; CHECK: # %bb.0: 68; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] 69; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 70; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} 71; CHECK-NEXT: vmovdqa %ymm1, %ymm0 72; CHECK-NEXT: retq 73 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 74 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 75 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 76 ret <16 x i16> %res 77} 78 79define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) { 80; CHECK-LABEL: test_masked_z_16xi16_perm_mask2: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] 83; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 84; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} 85; CHECK-NEXT: retq 86 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 87 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 88 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 89 ret <16 x i16> %res 90} 91define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) { 92; CHECK-LABEL: test_16xi16_perm_mask3: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 95; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 96; CHECK-NEXT: retq 97 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 98 ret <16 x i16> %res 99} 100define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 101; CHECK-LABEL: test_masked_16xi16_perm_mask3: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 104; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 105; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} 106; CHECK-NEXT: vmovdqa %ymm1, %ymm0 107; CHECK-NEXT: retq 108 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 109 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 110 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 111 ret <16 x i16> %res 112} 113 114define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) { 115; CHECK-LABEL: test_masked_z_16xi16_perm_mask3: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 118; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 119; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} 120; CHECK-NEXT: retq 121 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 122 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 123 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 124 ret <16 x i16> %res 125} 126define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { 127; CHECK-LABEL: test_16xi16_perm_mem_mask0: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 130; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 131; CHECK-NEXT: retq 132 %vec = load <16 x i16>, <16 x i16>* %vp 133 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 134 ret <16 x i16> %res 135} 136define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 137; CHECK-LABEL: test_masked_16xi16_perm_mem_mask0: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 140; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 141; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} 142; CHECK-NEXT: retq 143 %vec = load <16 x i16>, <16 x i16>* %vp 144 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 145 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 146 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 147 ret <16 x i16> %res 148} 149 150define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 151; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask0: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 154; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 155; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} 156; CHECK-NEXT: retq 157 %vec = load <16 x i16>, <16 x i16>* %vp 158 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 159 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 160 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 161 ret <16 x i16> %res 162} 163 164define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 165; CHECK-LABEL: test_masked_16xi16_perm_mem_mask1: 166; CHECK: # %bb.0: 167; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] 168; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 169; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} 170; CHECK-NEXT: retq 171 %vec = load <16 x i16>, <16 x i16>* %vp 172 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 173 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 174 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 175 ret <16 x i16> %res 176} 177 178define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 179; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask1: 180; CHECK: # %bb.0: 181; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] 182; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 183; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} 184; CHECK-NEXT: retq 185 %vec = load <16 x i16>, <16 x i16>* %vp 186 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 187 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 188 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 189 ret <16 x i16> %res 190} 191 192define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 193; CHECK-LABEL: test_masked_16xi16_perm_mem_mask2: 194; CHECK: # %bb.0: 195; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] 196; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 197; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} 198; CHECK-NEXT: retq 199 %vec = load <16 x i16>, <16 x i16>* %vp 200 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 201 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 202 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 203 ret <16 x i16> %res 204} 205 206define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 207; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask2: 208; CHECK: # %bb.0: 209; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] 210; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 211; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} 212; CHECK-NEXT: retq 213 %vec = load <16 x i16>, <16 x i16>* %vp 214 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 215 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 216 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 217 ret <16 x i16> %res 218} 219 220define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { 221; CHECK-LABEL: test_16xi16_perm_mem_mask3: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 224; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 225; CHECK-NEXT: retq 226 %vec = load <16 x i16>, <16 x i16>* %vp 227 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 228 ret <16 x i16> %res 229} 230define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 231; CHECK-LABEL: test_masked_16xi16_perm_mem_mask3: 232; CHECK: # %bb.0: 233; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 234; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 235; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} 236; CHECK-NEXT: retq 237 %vec = load <16 x i16>, <16 x i16>* %vp 238 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 239 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 240 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 241 ret <16 x i16> %res 242} 243 244define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 245; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask3: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 248; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 249; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} 250; CHECK-NEXT: retq 251 %vec = load <16 x i16>, <16 x i16>* %vp 252 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 253 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 254 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 255 ret <16 x i16> %res 256} 257 258define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) { 259; CHECK-LABEL: test_32xi16_perm_mask0: 260; CHECK: # %bb.0: 261; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] 262; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 263; CHECK-NEXT: retq 264 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 265 ret <32 x i16> %res 266} 267define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 268; CHECK-LABEL: test_masked_32xi16_perm_mask0: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] 271; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 272; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} 273; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 274; CHECK-NEXT: retq 275 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 276 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 277 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 278 ret <32 x i16> %res 279} 280 281define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) { 282; CHECK-LABEL: test_masked_z_32xi16_perm_mask0: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] 285; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 286; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} 287; CHECK-NEXT: retq 288 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 289 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 290 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 291 ret <32 x i16> %res 292} 293define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 294; CHECK-LABEL: test_masked_32xi16_perm_mask1: 295; CHECK: # %bb.0: 296; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] 297; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 298; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} 299; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 300; CHECK-NEXT: retq 301 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16> 302 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 303 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 304 ret <32 x i16> %res 305} 306 307define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) { 308; CHECK-LABEL: test_masked_z_32xi16_perm_mask1: 309; CHECK: # %bb.0: 310; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] 311; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 312; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} 313; CHECK-NEXT: retq 314 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16> 315 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 316 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 317 ret <32 x i16> %res 318} 319define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 320; CHECK-LABEL: test_masked_32xi16_perm_mask2: 321; CHECK: # %bb.0: 322; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] 323; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 324; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} 325; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 326; CHECK-NEXT: retq 327 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27> 328 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 329 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 330 ret <32 x i16> %res 331} 332 333define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) { 334; CHECK-LABEL: test_masked_z_32xi16_perm_mask2: 335; CHECK: # %bb.0: 336; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] 337; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 338; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} 339; CHECK-NEXT: retq 340 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27> 341 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 342 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 343 ret <32 x i16> %res 344} 345define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) { 346; CHECK-LABEL: test_32xi16_perm_mask3: 347; CHECK: # %bb.0: 348; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] 349; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 350; CHECK-NEXT: retq 351 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 352 ret <32 x i16> %res 353} 354define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 355; CHECK-LABEL: test_masked_32xi16_perm_mask3: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] 358; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 359; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} 360; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 361; CHECK-NEXT: retq 362 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 363 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 364 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 365 ret <32 x i16> %res 366} 367 368define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) { 369; CHECK-LABEL: test_masked_z_32xi16_perm_mask3: 370; CHECK: # %bb.0: 371; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] 372; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 373; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} 374; CHECK-NEXT: retq 375 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 376 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 377 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 378 ret <32 x i16> %res 379} 380define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { 381; CHECK-LABEL: test_32xi16_perm_mem_mask0: 382; CHECK: # %bb.0: 383; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] 384; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 385; CHECK-NEXT: retq 386 %vec = load <32 x i16>, <32 x i16>* %vp 387 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 388 ret <32 x i16> %res 389} 390define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 391; CHECK-LABEL: test_masked_32xi16_perm_mem_mask0: 392; CHECK: # %bb.0: 393; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] 394; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 395; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} 396; CHECK-NEXT: retq 397 %vec = load <32 x i16>, <32 x i16>* %vp 398 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 399 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 400 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 401 ret <32 x i16> %res 402} 403 404define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 405; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask0: 406; CHECK: # %bb.0: 407; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] 408; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 409; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} 410; CHECK-NEXT: retq 411 %vec = load <32 x i16>, <32 x i16>* %vp 412 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 413 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 414 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 415 ret <32 x i16> %res 416} 417 418define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 419; CHECK-LABEL: test_masked_32xi16_perm_mem_mask1: 420; CHECK: # %bb.0: 421; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] 422; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 423; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} 424; CHECK-NEXT: retq 425 %vec = load <32 x i16>, <32 x i16>* %vp 426 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6> 427 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 428 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 429 ret <32 x i16> %res 430} 431 432define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 433; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask1: 434; CHECK: # %bb.0: 435; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] 436; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 437; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} 438; CHECK-NEXT: retq 439 %vec = load <32 x i16>, <32 x i16>* %vp 440 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6> 441 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 442 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 443 ret <32 x i16> %res 444} 445 446define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 447; CHECK-LABEL: test_masked_32xi16_perm_mem_mask2: 448; CHECK: # %bb.0: 449; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] 450; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 451; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} 452; CHECK-NEXT: retq 453 %vec = load <32 x i16>, <32 x i16>* %vp 454 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25> 455 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 456 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 457 ret <32 x i16> %res 458} 459 460define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 461; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask2: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] 464; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 465; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} 466; CHECK-NEXT: retq 467 %vec = load <32 x i16>, <32 x i16>* %vp 468 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25> 469 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 470 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 471 ret <32 x i16> %res 472} 473 474define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { 475; CHECK-LABEL: test_32xi16_perm_mem_mask3: 476; CHECK: # %bb.0: 477; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] 478; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 479; CHECK-NEXT: retq 480 %vec = load <32 x i16>, <32 x i16>* %vp 481 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 482 ret <32 x i16> %res 483} 484define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 485; CHECK-LABEL: test_masked_32xi16_perm_mem_mask3: 486; CHECK: # %bb.0: 487; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] 488; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 489; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} 490; CHECK-NEXT: retq 491 %vec = load <32 x i16>, <32 x i16>* %vp 492 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 493 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 494 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 495 ret <32 x i16> %res 496} 497 498define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 499; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask3: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] 502; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 503; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} 504; CHECK-NEXT: retq 505 %vec = load <32 x i16>, <32 x i16>* %vp 506 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 507 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 508 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 509 ret <32 x i16> %res 510} 511 512define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { 513; CHECK-LABEL: test_8xi32_perm_mask0: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] 516; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 517; CHECK-NEXT: retq 518 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 519 ret <8 x i32> %res 520} 521define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 522; CHECK-LABEL: test_masked_8xi32_perm_mask0: 523; CHECK: # %bb.0: 524; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] 525; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 526; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} 527; CHECK-NEXT: vmovdqa %ymm1, %ymm0 528; CHECK-NEXT: retq 529 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 530 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 531 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 532 ret <8 x i32> %res 533} 534 535define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 536; CHECK-LABEL: test_masked_z_8xi32_perm_mask0: 537; CHECK: # %bb.0: 538; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] 539; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 540; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} 541; CHECK-NEXT: retq 542 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 543 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 544 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 545 ret <8 x i32> %res 546} 547define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 548; CHECK-LABEL: test_masked_8xi32_perm_mask1: 549; CHECK: # %bb.0: 550; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] 551; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 552; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} 553; CHECK-NEXT: vmovdqa %ymm1, %ymm0 554; CHECK-NEXT: retq 555 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3> 556 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 557 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 558 ret <8 x i32> %res 559} 560 561define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 562; CHECK-LABEL: test_masked_z_8xi32_perm_mask1: 563; CHECK: # %bb.0: 564; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] 565; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 566; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} 567; CHECK-NEXT: retq 568 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3> 569 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 570 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 571 ret <8 x i32> %res 572} 573define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 574; CHECK-LABEL: test_masked_8xi32_perm_mask2: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] 577; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 578; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} 579; CHECK-NEXT: vmovdqa %ymm1, %ymm0 580; CHECK-NEXT: retq 581 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4> 582 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 583 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 584 ret <8 x i32> %res 585} 586 587define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 588; CHECK-LABEL: test_masked_z_8xi32_perm_mask2: 589; CHECK: # %bb.0: 590; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] 591; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 592; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} 593; CHECK-NEXT: retq 594 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4> 595 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 596 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 597 ret <8 x i32> %res 598} 599define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { 600; CHECK-LABEL: test_8xi32_perm_mask3: 601; CHECK: # %bb.0: 602; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] 603; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 604; CHECK-NEXT: retq 605 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 606 ret <8 x i32> %res 607} 608define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 609; CHECK-LABEL: test_masked_8xi32_perm_mask3: 610; CHECK: # %bb.0: 611; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] 612; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 613; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} 614; CHECK-NEXT: vmovdqa %ymm1, %ymm0 615; CHECK-NEXT: retq 616 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 617 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 618 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 619 ret <8 x i32> %res 620} 621 622define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 623; CHECK-LABEL: test_masked_z_8xi32_perm_mask3: 624; CHECK: # %bb.0: 625; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] 626; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 627; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} 628; CHECK-NEXT: retq 629 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 630 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 631 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 632 ret <8 x i32> %res 633} 634define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 635; CHECK-LABEL: test_8xi32_perm_mem_mask0: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] 638; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 639; CHECK-NEXT: retq 640 %vec = load <8 x i32>, <8 x i32>* %vp 641 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 642 ret <8 x i32> %res 643} 644define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 645; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0: 646; CHECK: # %bb.0: 647; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] 648; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 649; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} 650; CHECK-NEXT: retq 651 %vec = load <8 x i32>, <8 x i32>* %vp 652 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 653 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 654 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 655 ret <8 x i32> %res 656} 657 658define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 659; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0: 660; CHECK: # %bb.0: 661; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] 662; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 663; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} 664; CHECK-NEXT: retq 665 %vec = load <8 x i32>, <8 x i32>* %vp 666 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 667 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 668 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 669 ret <8 x i32> %res 670} 671 672define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 673; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1: 674; CHECK: # %bb.0: 675; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] 676; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 677; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} 678; CHECK-NEXT: retq 679 %vec = load <8 x i32>, <8 x i32>* %vp 680 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5> 681 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 682 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 683 ret <8 x i32> %res 684} 685 686define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 687; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1: 688; CHECK: # %bb.0: 689; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] 690; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 691; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} 692; CHECK-NEXT: retq 693 %vec = load <8 x i32>, <8 x i32>* %vp 694 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5> 695 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 696 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 697 ret <8 x i32> %res 698} 699 700define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 701; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2: 702; CHECK: # %bb.0: 703; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] 704; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 705; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} 706; CHECK-NEXT: retq 707 %vec = load <8 x i32>, <8 x i32>* %vp 708 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3> 709 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 710 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 711 ret <8 x i32> %res 712} 713 714define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 715; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] 718; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 719; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} 720; CHECK-NEXT: retq 721 %vec = load <8 x i32>, <8 x i32>* %vp 722 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3> 723 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 724 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 725 ret <8 x i32> %res 726} 727 728define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 729; CHECK-LABEL: test_8xi32_perm_mem_mask3: 730; CHECK: # %bb.0: 731; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] 732; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 733; CHECK-NEXT: retq 734 %vec = load <8 x i32>, <8 x i32>* %vp 735 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 736 ret <8 x i32> %res 737} 738define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 739; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3: 740; CHECK: # %bb.0: 741; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] 742; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 743; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} 744; CHECK-NEXT: retq 745 %vec = load <8 x i32>, <8 x i32>* %vp 746 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 747 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 748 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 749 ret <8 x i32> %res 750} 751 752define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 753; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3: 754; CHECK: # %bb.0: 755; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] 756; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 757; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} 758; CHECK-NEXT: retq 759 %vec = load <8 x i32>, <8 x i32>* %vp 760 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 761 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 762 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 763 ret <8 x i32> %res 764} 765 766define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) { 767; CHECK-LABEL: test_16xi32_perm_mask0: 768; CHECK: # %bb.0: 769; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] 770; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 771; CHECK-NEXT: retq 772 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 773 ret <16 x i32> %res 774} 775define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 776; CHECK-LABEL: test_masked_16xi32_perm_mask0: 777; CHECK: # %bb.0: 778; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] 779; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 780; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} 781; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 782; CHECK-NEXT: retq 783 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 784 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 785 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 786 ret <16 x i32> %res 787} 788 789define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 790; CHECK-LABEL: test_masked_z_16xi32_perm_mask0: 791; CHECK: # %bb.0: 792; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] 793; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 794; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} 795; CHECK-NEXT: retq 796 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 797 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 798 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 799 ret <16 x i32> %res 800} 801define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 802; CHECK-LABEL: test_masked_16xi32_perm_mask1: 803; CHECK: # %bb.0: 804; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] 805; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 806; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} 807; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 808; CHECK-NEXT: retq 809 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3> 810 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 811 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 812 ret <16 x i32> %res 813} 814 815define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 816; CHECK-LABEL: test_masked_z_16xi32_perm_mask1: 817; CHECK: # %bb.0: 818; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] 819; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 820; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} 821; CHECK-NEXT: retq 822 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3> 823 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 824 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 825 ret <16 x i32> %res 826} 827define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 828; CHECK-LABEL: test_masked_16xi32_perm_mask2: 829; CHECK: # %bb.0: 830; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] 831; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 832; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} 833; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 834; CHECK-NEXT: retq 835 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5> 836 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 837 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 838 ret <16 x i32> %res 839} 840 841define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 842; CHECK-LABEL: test_masked_z_16xi32_perm_mask2: 843; CHECK: # %bb.0: 844; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] 845; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 846; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} 847; CHECK-NEXT: retq 848 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5> 849 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 850 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 851 ret <16 x i32> %res 852} 853define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { 854; CHECK-LABEL: test_16xi32_perm_mask3: 855; CHECK: # %bb.0: 856; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] 857; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 858; CHECK-NEXT: retq 859 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 860 ret <16 x i32> %res 861} 862define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 863; CHECK-LABEL: test_masked_16xi32_perm_mask3: 864; CHECK: # %bb.0: 865; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] 866; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 867; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} 868; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 869; CHECK-NEXT: retq 870 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 871 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 872 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 873 ret <16 x i32> %res 874} 875 876define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 877; CHECK-LABEL: test_masked_z_16xi32_perm_mask3: 878; CHECK: # %bb.0: 879; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] 880; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 881; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} 882; CHECK-NEXT: retq 883 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 884 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 885 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 886 ret <16 x i32> %res 887} 888define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 889; CHECK-LABEL: test_16xi32_perm_mem_mask0: 890; CHECK: # %bb.0: 891; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] 892; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 893; CHECK-NEXT: retq 894 %vec = load <16 x i32>, <16 x i32>* %vp 895 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 896 ret <16 x i32> %res 897} 898define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 899; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0: 900; CHECK: # %bb.0: 901; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] 902; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 903; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} 904; CHECK-NEXT: retq 905 %vec = load <16 x i32>, <16 x i32>* %vp 906 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 907 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 908 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 909 ret <16 x i32> %res 910} 911 912define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 913; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0: 914; CHECK: # %bb.0: 915; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] 916; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 917; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} 918; CHECK-NEXT: retq 919 %vec = load <16 x i32>, <16 x i32>* %vp 920 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 921 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 922 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 923 ret <16 x i32> %res 924} 925 926define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 927; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1: 928; CHECK: # %bb.0: 929; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] 930; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 931; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} 932; CHECK-NEXT: retq 933 %vec = load <16 x i32>, <16 x i32>* %vp 934 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3> 935 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 936 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 937 ret <16 x i32> %res 938} 939 940define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 941; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1: 942; CHECK: # %bb.0: 943; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] 944; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 945; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} 946; CHECK-NEXT: retq 947 %vec = load <16 x i32>, <16 x i32>* %vp 948 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3> 949 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 950 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 951 ret <16 x i32> %res 952} 953 954define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 955; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2: 956; CHECK: # %bb.0: 957; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] 958; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 959; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} 960; CHECK-NEXT: retq 961 %vec = load <16 x i32>, <16 x i32>* %vp 962 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2> 963 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 964 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 965 ret <16 x i32> %res 966} 967 968define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 969; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2: 970; CHECK: # %bb.0: 971; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] 972; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 973; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} 974; CHECK-NEXT: retq 975 %vec = load <16 x i32>, <16 x i32>* %vp 976 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2> 977 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 978 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 979 ret <16 x i32> %res 980} 981 982define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 983; CHECK-LABEL: test_16xi32_perm_mem_mask3: 984; CHECK: # %bb.0: 985; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] 986; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 987; CHECK-NEXT: retq 988 %vec = load <16 x i32>, <16 x i32>* %vp 989 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 990 ret <16 x i32> %res 991} 992define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 993; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3: 994; CHECK: # %bb.0: 995; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] 996; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 997; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} 998; CHECK-NEXT: retq 999 %vec = load <16 x i32>, <16 x i32>* %vp 1000 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1001 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1002 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1003 ret <16 x i32> %res 1004} 1005 1006define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 1007; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3: 1008; CHECK: # %bb.0: 1009; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] 1010; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1011; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} 1012; CHECK-NEXT: retq 1013 %vec = load <16 x i32>, <16 x i32>* %vp 1014 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1015 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1016 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1017 ret <16 x i32> %res 1018} 1019 1020define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) { 1021; CHECK-LABEL: test_4xi64_perm_mask0: 1022; CHECK: # %bb.0: 1023; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] 1024; CHECK-NEXT: retq 1025 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1026 ret <4 x i64> %res 1027} 1028define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1029; CHECK-LABEL: test_masked_4xi64_perm_mask0: 1030; CHECK: # %bb.0: 1031; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1032; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] 1033; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1034; CHECK-NEXT: retq 1035 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1036 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1037 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1038 ret <4 x i64> %res 1039} 1040 1041define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) { 1042; CHECK-LABEL: test_masked_z_4xi64_perm_mask0: 1043; CHECK: # %bb.0: 1044; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1045; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] 1046; CHECK-NEXT: retq 1047 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1048 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1049 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1050 ret <4 x i64> %res 1051} 1052define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1053; CHECK-LABEL: test_masked_4xi64_perm_mask1: 1054; CHECK: # %bb.0: 1055; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1056; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] 1057; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1058; CHECK-NEXT: retq 1059 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3> 1060 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1061 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1062 ret <4 x i64> %res 1063} 1064 1065define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) { 1066; CHECK-LABEL: test_masked_z_4xi64_perm_mask1: 1067; CHECK: # %bb.0: 1068; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1069; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] 1070; CHECK-NEXT: retq 1071 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3> 1072 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1073 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1074 ret <4 x i64> %res 1075} 1076define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1077; CHECK-LABEL: test_masked_4xi64_perm_mask2: 1078; CHECK: # %bb.0: 1079; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1080; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] 1081; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1082; CHECK-NEXT: retq 1083 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1> 1084 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1085 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1086 ret <4 x i64> %res 1087} 1088 1089define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) { 1090; CHECK-LABEL: test_masked_z_4xi64_perm_mask2: 1091; CHECK: # %bb.0: 1092; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1093; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] 1094; CHECK-NEXT: retq 1095 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1> 1096 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1097 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1098 ret <4 x i64> %res 1099} 1100define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) { 1101; CHECK-LABEL: test_4xi64_perm_mask3: 1102; CHECK: # %bb.0: 1103; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] 1104; CHECK-NEXT: retq 1105 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1106 ret <4 x i64> %res 1107} 1108define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1109; CHECK-LABEL: test_masked_4xi64_perm_mask3: 1110; CHECK: # %bb.0: 1111; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1112; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] 1113; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1114; CHECK-NEXT: retq 1115 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1116 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1117 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1118 ret <4 x i64> %res 1119} 1120 1121define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) { 1122; CHECK-LABEL: test_masked_z_4xi64_perm_mask3: 1123; CHECK: # %bb.0: 1124; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1125; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] 1126; CHECK-NEXT: retq 1127 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1128 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1129 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1130 ret <4 x i64> %res 1131} 1132define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { 1133; CHECK-LABEL: test_4xi64_perm_mem_mask0: 1134; CHECK: # %bb.0: 1135; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] 1136; CHECK-NEXT: retq 1137 %vec = load <4 x i64>, <4 x i64>* %vp 1138 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1139 ret <4 x i64> %res 1140} 1141define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1142; CHECK-LABEL: test_masked_4xi64_perm_mem_mask0: 1143; CHECK: # %bb.0: 1144; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1145; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] 1146; CHECK-NEXT: retq 1147 %vec = load <4 x i64>, <4 x i64>* %vp 1148 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1149 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1150 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1151 ret <4 x i64> %res 1152} 1153 1154define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) { 1155; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask0: 1156; CHECK: # %bb.0: 1157; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1158; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] 1159; CHECK-NEXT: retq 1160 %vec = load <4 x i64>, <4 x i64>* %vp 1161 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1162 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1163 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1164 ret <4 x i64> %res 1165} 1166 1167define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1168; CHECK-LABEL: test_masked_4xi64_perm_mem_mask1: 1169; CHECK: # %bb.0: 1170; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1171; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] 1172; CHECK-NEXT: retq 1173 %vec = load <4 x i64>, <4 x i64>* %vp 1174 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 1175 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1176 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1177 ret <4 x i64> %res 1178} 1179 1180define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) { 1181; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask1: 1182; CHECK: # %bb.0: 1183; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1184; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] 1185; CHECK-NEXT: retq 1186 %vec = load <4 x i64>, <4 x i64>* %vp 1187 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 1188 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1189 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1190 ret <4 x i64> %res 1191} 1192 1193define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1194; CHECK-LABEL: test_masked_4xi64_perm_mem_mask2: 1195; CHECK: # %bb.0: 1196; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1197; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] 1198; CHECK-NEXT: retq 1199 %vec = load <4 x i64>, <4 x i64>* %vp 1200 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 1201 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1202 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1203 ret <4 x i64> %res 1204} 1205 1206define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) { 1207; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask2: 1208; CHECK: # %bb.0: 1209; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1210; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] 1211; CHECK-NEXT: retq 1212 %vec = load <4 x i64>, <4 x i64>* %vp 1213 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 1214 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1215 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1216 ret <4 x i64> %res 1217} 1218 1219define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { 1220; CHECK-LABEL: test_4xi64_perm_mem_mask3: 1221; CHECK: # %bb.0: 1222; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] 1223; CHECK-NEXT: retq 1224 %vec = load <4 x i64>, <4 x i64>* %vp 1225 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1226 ret <4 x i64> %res 1227} 1228define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1229; CHECK-LABEL: test_masked_4xi64_perm_mem_mask3: 1230; CHECK: # %bb.0: 1231; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1232; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] 1233; CHECK-NEXT: retq 1234 %vec = load <4 x i64>, <4 x i64>* %vp 1235 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1236 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1237 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1238 ret <4 x i64> %res 1239} 1240 1241define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) { 1242; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask3: 1243; CHECK: # %bb.0: 1244; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1245; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] 1246; CHECK-NEXT: retq 1247 %vec = load <4 x i64>, <4 x i64>* %vp 1248 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1249 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1250 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1251 ret <4 x i64> %res 1252} 1253 1254define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) { 1255; CHECK-LABEL: test_8xi64_perm_mask0: 1256; CHECK: # %bb.0: 1257; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] 1258; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 1259; CHECK-NEXT: retq 1260 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1261 ret <8 x i64> %res 1262} 1263define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1264; CHECK-LABEL: test_masked_8xi64_perm_mask0: 1265; CHECK: # %bb.0: 1266; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] 1267; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1268; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} 1269; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1270; CHECK-NEXT: retq 1271 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1272 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1273 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1274 ret <8 x i64> %res 1275} 1276 1277define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask) { 1278; CHECK-LABEL: test_masked_z_8xi64_perm_mask0: 1279; CHECK: # %bb.0: 1280; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] 1281; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1282; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} 1283; CHECK-NEXT: retq 1284 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1285 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1286 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1287 ret <8 x i64> %res 1288} 1289define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1290; CHECK-LABEL: test_masked_8xi64_perm_imm_mask1: 1291; CHECK: # %bb.0: 1292; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1293; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] 1294; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1295; CHECK-NEXT: retq 1296 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5> 1297 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1298 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1299 ret <8 x i64> %res 1300} 1301 1302define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { 1303; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask1: 1304; CHECK: # %bb.0: 1305; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1306; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] 1307; CHECK-NEXT: retq 1308 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5> 1309 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1310 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1311 ret <8 x i64> %res 1312} 1313define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1314; CHECK-LABEL: test_masked_8xi64_perm_mask2: 1315; CHECK: # %bb.0: 1316; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] 1317; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1318; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} 1319; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1320; CHECK-NEXT: retq 1321 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1> 1322 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1323 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1324 ret <8 x i64> %res 1325} 1326 1327define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask) { 1328; CHECK-LABEL: test_masked_z_8xi64_perm_mask2: 1329; CHECK: # %bb.0: 1330; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] 1331; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1332; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} 1333; CHECK-NEXT: retq 1334 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1> 1335 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1336 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1337 ret <8 x i64> %res 1338} 1339define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { 1340; CHECK-LABEL: test_8xi64_perm_imm_mask3: 1341; CHECK: # %bb.0: 1342; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] 1343; CHECK-NEXT: retq 1344 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 1345 ret <8 x i64> %res 1346} 1347define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1348; CHECK-LABEL: test_masked_8xi64_perm_imm_mask3: 1349; CHECK: # %bb.0: 1350; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1351; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] 1352; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1353; CHECK-NEXT: retq 1354 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 1355 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1356 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1357 ret <8 x i64> %res 1358} 1359 1360define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { 1361; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask3: 1362; CHECK: # %bb.0: 1363; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1364; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] 1365; CHECK-NEXT: retq 1366 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 1367 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1368 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1369 ret <8 x i64> %res 1370} 1371define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1372; CHECK-LABEL: test_masked_8xi64_perm_mask4: 1373; CHECK: # %bb.0: 1374; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] 1375; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1376; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} 1377; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1378; CHECK-NEXT: retq 1379 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3> 1380 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1381 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1382 ret <8 x i64> %res 1383} 1384 1385define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask) { 1386; CHECK-LABEL: test_masked_z_8xi64_perm_mask4: 1387; CHECK: # %bb.0: 1388; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] 1389; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1390; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} 1391; CHECK-NEXT: retq 1392 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3> 1393 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1394 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1395 ret <8 x i64> %res 1396} 1397define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1398; CHECK-LABEL: test_masked_8xi64_perm_imm_mask5: 1399; CHECK: # %bb.0: 1400; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1401; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] 1402; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1403; CHECK-NEXT: retq 1404 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1405 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1406 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1407 ret <8 x i64> %res 1408} 1409 1410define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { 1411; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask5: 1412; CHECK: # %bb.0: 1413; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1414; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] 1415; CHECK-NEXT: retq 1416 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1417 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1418 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1419 ret <8 x i64> %res 1420} 1421define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) { 1422; CHECK-LABEL: test_8xi64_perm_mask6: 1423; CHECK: # %bb.0: 1424; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] 1425; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 1426; CHECK-NEXT: retq 1427 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 1428 ret <8 x i64> %res 1429} 1430define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1431; CHECK-LABEL: test_masked_8xi64_perm_mask6: 1432; CHECK: # %bb.0: 1433; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] 1434; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1435; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} 1436; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1437; CHECK-NEXT: retq 1438 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 1439 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1440 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1441 ret <8 x i64> %res 1442} 1443 1444define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask) { 1445; CHECK-LABEL: test_masked_z_8xi64_perm_mask6: 1446; CHECK: # %bb.0: 1447; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] 1448; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1449; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} 1450; CHECK-NEXT: retq 1451 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 1452 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1453 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1454 ret <8 x i64> %res 1455} 1456define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1457; CHECK-LABEL: test_masked_8xi64_perm_imm_mask7: 1458; CHECK: # %bb.0: 1459; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1460; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] 1461; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1462; CHECK-NEXT: retq 1463 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7> 1464 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1465 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1466 ret <8 x i64> %res 1467} 1468 1469define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { 1470; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask7: 1471; CHECK: # %bb.0: 1472; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1473; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] 1474; CHECK-NEXT: retq 1475 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7> 1476 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1477 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1478 ret <8 x i64> %res 1479} 1480define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { 1481; CHECK-LABEL: test_8xi64_perm_mem_mask0: 1482; CHECK: # %bb.0: 1483; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] 1484; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 1485; CHECK-NEXT: retq 1486 %vec = load <8 x i64>, <8 x i64>* %vp 1487 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 1488 ret <8 x i64> %res 1489} 1490define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1491; CHECK-LABEL: test_masked_8xi64_perm_mem_mask0: 1492; CHECK: # %bb.0: 1493; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] 1494; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1495; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} 1496; CHECK-NEXT: retq 1497 %vec = load <8 x i64>, <8 x i64>* %vp 1498 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 1499 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1500 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1501 ret <8 x i64> %res 1502} 1503 1504define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %mask) { 1505; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask0: 1506; CHECK: # %bb.0: 1507; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] 1508; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1509; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} 1510; CHECK-NEXT: retq 1511 %vec = load <8 x i64>, <8 x i64>* %vp 1512 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 1513 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1514 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1515 ret <8 x i64> %res 1516} 1517 1518define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1519; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask1: 1520; CHECK: # %bb.0: 1521; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1522; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] 1523; CHECK-NEXT: retq 1524 %vec = load <8 x i64>, <8 x i64>* %vp 1525 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4> 1526 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1527 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1528 ret <8 x i64> %res 1529} 1530 1531define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { 1532; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: 1533; CHECK: # %bb.0: 1534; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1535; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] 1536; CHECK-NEXT: retq 1537 %vec = load <8 x i64>, <8 x i64>* %vp 1538 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4> 1539 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1540 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1541 ret <8 x i64> %res 1542} 1543 1544define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1545; CHECK-LABEL: test_masked_8xi64_perm_mem_mask2: 1546; CHECK: # %bb.0: 1547; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] 1548; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1549; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} 1550; CHECK-NEXT: retq 1551 %vec = load <8 x i64>, <8 x i64>* %vp 1552 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5> 1553 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1554 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1555 ret <8 x i64> %res 1556} 1557 1558define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %mask) { 1559; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask2: 1560; CHECK: # %bb.0: 1561; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] 1562; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1563; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} 1564; CHECK-NEXT: retq 1565 %vec = load <8 x i64>, <8 x i64>* %vp 1566 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5> 1567 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1568 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1569 ret <8 x i64> %res 1570} 1571 1572define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { 1573; CHECK-LABEL: test_8xi64_perm_imm_mem_mask3: 1574; CHECK: # %bb.0: 1575; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] 1576; CHECK-NEXT: retq 1577 %vec = load <8 x i64>, <8 x i64>* %vp 1578 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 1579 ret <8 x i64> %res 1580} 1581define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1582; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask3: 1583; CHECK: # %bb.0: 1584; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1585; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] 1586; CHECK-NEXT: retq 1587 %vec = load <8 x i64>, <8 x i64>* %vp 1588 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 1589 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1590 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1591 ret <8 x i64> %res 1592} 1593 1594define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { 1595; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: 1596; CHECK: # %bb.0: 1597; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1598; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] 1599; CHECK-NEXT: retq 1600 %vec = load <8 x i64>, <8 x i64>* %vp 1601 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 1602 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1603 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1604 ret <8 x i64> %res 1605} 1606 1607define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1608; CHECK-LABEL: test_masked_8xi64_perm_mem_mask4: 1609; CHECK: # %bb.0: 1610; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] 1611; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1612; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} 1613; CHECK-NEXT: retq 1614 %vec = load <8 x i64>, <8 x i64>* %vp 1615 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6> 1616 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1617 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1618 ret <8 x i64> %res 1619} 1620 1621define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %mask) { 1622; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask4: 1623; CHECK: # %bb.0: 1624; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] 1625; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1626; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} 1627; CHECK-NEXT: retq 1628 %vec = load <8 x i64>, <8 x i64>* %vp 1629 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6> 1630 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1631 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1632 ret <8 x i64> %res 1633} 1634 1635define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1636; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask5: 1637; CHECK: # %bb.0: 1638; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1639; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] 1640; CHECK-NEXT: retq 1641 %vec = load <8 x i64>, <8 x i64>* %vp 1642 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4> 1643 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1644 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1645 ret <8 x i64> %res 1646} 1647 1648define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { 1649; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: 1650; CHECK: # %bb.0: 1651; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1652; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] 1653; CHECK-NEXT: retq 1654 %vec = load <8 x i64>, <8 x i64>* %vp 1655 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4> 1656 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1657 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1658 ret <8 x i64> %res 1659} 1660 1661define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { 1662; CHECK-LABEL: test_8xi64_perm_mem_mask6: 1663; CHECK: # %bb.0: 1664; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] 1665; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 1666; CHECK-NEXT: retq 1667 %vec = load <8 x i64>, <8 x i64>* %vp 1668 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 1669 ret <8 x i64> %res 1670} 1671define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1672; CHECK-LABEL: test_masked_8xi64_perm_mem_mask6: 1673; CHECK: # %bb.0: 1674; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] 1675; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1676; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} 1677; CHECK-NEXT: retq 1678 %vec = load <8 x i64>, <8 x i64>* %vp 1679 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 1680 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1681 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1682 ret <8 x i64> %res 1683} 1684 1685define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %mask) { 1686; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask6: 1687; CHECK: # %bb.0: 1688; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] 1689; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1690; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} 1691; CHECK-NEXT: retq 1692 %vec = load <8 x i64>, <8 x i64>* %vp 1693 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 1694 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1695 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1696 ret <8 x i64> %res 1697} 1698 1699define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 1700; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask7: 1701; CHECK: # %bb.0: 1702; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1703; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] 1704; CHECK-NEXT: retq 1705 %vec = load <8 x i64>, <8 x i64>* %vp 1706 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5> 1707 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1708 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1709 ret <8 x i64> %res 1710} 1711 1712define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { 1713; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: 1714; CHECK: # %bb.0: 1715; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1716; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] 1717; CHECK-NEXT: retq 1718 %vec = load <8 x i64>, <8 x i64>* %vp 1719 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5> 1720 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1721 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1722 ret <8 x i64> %res 1723} 1724 1725define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) { 1726; CHECK-LABEL: test_8xfloat_perm_mask0: 1727; CHECK: # %bb.0: 1728; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] 1729; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 1730; CHECK-NEXT: retq 1731 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 1732 ret <8 x float> %res 1733} 1734define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { 1735; CHECK-LABEL: test_masked_8xfloat_perm_mask0: 1736; CHECK: # %bb.0: 1737; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] 1738; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1739; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 1740; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} 1741; CHECK-NEXT: vmovaps %ymm1, %ymm0 1742; CHECK-NEXT: retq 1743 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 1744 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1745 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1746 ret <8 x float> %res 1747} 1748 1749define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %mask) { 1750; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0: 1751; CHECK: # %bb.0: 1752; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] 1753; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1754; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1755; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} 1756; CHECK-NEXT: retq 1757 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 1758 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1759 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1760 ret <8 x float> %res 1761} 1762define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { 1763; CHECK-LABEL: test_masked_8xfloat_perm_mask1: 1764; CHECK: # %bb.0: 1765; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] 1766; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1767; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 1768; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} 1769; CHECK-NEXT: vmovaps %ymm1, %ymm0 1770; CHECK-NEXT: retq 1771 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1> 1772 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1773 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1774 ret <8 x float> %res 1775} 1776 1777define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %mask) { 1778; CHECK-LABEL: test_masked_z_8xfloat_perm_mask1: 1779; CHECK: # %bb.0: 1780; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] 1781; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1782; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1783; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} 1784; CHECK-NEXT: retq 1785 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1> 1786 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1787 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1788 ret <8 x float> %res 1789} 1790define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { 1791; CHECK-LABEL: test_masked_8xfloat_perm_mask2: 1792; CHECK: # %bb.0: 1793; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] 1794; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1795; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 1796; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} 1797; CHECK-NEXT: vmovaps %ymm1, %ymm0 1798; CHECK-NEXT: retq 1799 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5> 1800 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1801 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1802 ret <8 x float> %res 1803} 1804 1805define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %mask) { 1806; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2: 1807; CHECK: # %bb.0: 1808; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] 1809; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1810; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1811; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} 1812; CHECK-NEXT: retq 1813 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5> 1814 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1815 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1816 ret <8 x float> %res 1817} 1818define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) { 1819; CHECK-LABEL: test_8xfloat_perm_mask3: 1820; CHECK: # %bb.0: 1821; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] 1822; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 1823; CHECK-NEXT: retq 1824 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 1825 ret <8 x float> %res 1826} 1827define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { 1828; CHECK-LABEL: test_masked_8xfloat_perm_mask3: 1829; CHECK: # %bb.0: 1830; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] 1831; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 1832; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 1833; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} 1834; CHECK-NEXT: vmovaps %ymm1, %ymm0 1835; CHECK-NEXT: retq 1836 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 1837 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1838 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1839 ret <8 x float> %res 1840} 1841 1842define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %mask) { 1843; CHECK-LABEL: test_masked_z_8xfloat_perm_mask3: 1844; CHECK: # %bb.0: 1845; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] 1846; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1847; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1848; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} 1849; CHECK-NEXT: retq 1850 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 1851 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1852 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1853 ret <8 x float> %res 1854} 1855define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { 1856; CHECK-LABEL: test_8xfloat_perm_mem_mask0: 1857; CHECK: # %bb.0: 1858; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] 1859; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 1860; CHECK-NEXT: retq 1861 %vec = load <8 x float>, <8 x float>* %vp 1862 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 1863 ret <8 x float> %res 1864} 1865define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { 1866; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0: 1867; CHECK: # %bb.0: 1868; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] 1869; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1870; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1871; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} 1872; CHECK-NEXT: retq 1873 %vec = load <8 x float>, <8 x float>* %vp 1874 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 1875 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1876 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1877 ret <8 x float> %res 1878} 1879 1880define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %mask) { 1881; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0: 1882; CHECK: # %bb.0: 1883; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] 1884; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1885; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 1886; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} 1887; CHECK-NEXT: retq 1888 %vec = load <8 x float>, <8 x float>* %vp 1889 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 1890 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1891 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1892 ret <8 x float> %res 1893} 1894 1895define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { 1896; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask1: 1897; CHECK: # %bb.0: 1898; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] 1899; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1900; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1901; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} 1902; CHECK-NEXT: retq 1903 %vec = load <8 x float>, <8 x float>* %vp 1904 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6> 1905 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1906 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1907 ret <8 x float> %res 1908} 1909 1910define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %mask) { 1911; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask1: 1912; CHECK: # %bb.0: 1913; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] 1914; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1915; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 1916; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} 1917; CHECK-NEXT: retq 1918 %vec = load <8 x float>, <8 x float>* %vp 1919 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6> 1920 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1921 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1922 ret <8 x float> %res 1923} 1924 1925define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { 1926; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2: 1927; CHECK: # %bb.0: 1928; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] 1929; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1930; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1931; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} 1932; CHECK-NEXT: retq 1933 %vec = load <8 x float>, <8 x float>* %vp 1934 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4> 1935 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1936 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1937 ret <8 x float> %res 1938} 1939 1940define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %mask) { 1941; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2: 1942; CHECK: # %bb.0: 1943; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] 1944; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1945; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 1946; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} 1947; CHECK-NEXT: retq 1948 %vec = load <8 x float>, <8 x float>* %vp 1949 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4> 1950 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1951 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1952 ret <8 x float> %res 1953} 1954 1955define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp) { 1956; CHECK-LABEL: test_8xfloat_perm_mem_mask3: 1957; CHECK: # %bb.0: 1958; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] 1959; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 1960; CHECK-NEXT: retq 1961 %vec = load <8 x float>, <8 x float>* %vp 1962 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 1963 ret <8 x float> %res 1964} 1965define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { 1966; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask3: 1967; CHECK: # %bb.0: 1968; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] 1969; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 1970; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 1971; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} 1972; CHECK-NEXT: retq 1973 %vec = load <8 x float>, <8 x float>* %vp 1974 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 1975 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1976 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 1977 ret <8 x float> %res 1978} 1979 1980define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %mask) { 1981; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask3: 1982; CHECK: # %bb.0: 1983; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] 1984; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1985; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 1986; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} 1987; CHECK-NEXT: retq 1988 %vec = load <8 x float>, <8 x float>* %vp 1989 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 1990 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 1991 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 1992 ret <8 x float> %res 1993} 1994 1995define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) { 1996; CHECK-LABEL: test_16xfloat_perm_mask0: 1997; CHECK: # %bb.0: 1998; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] 1999; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 2000; CHECK-NEXT: retq 2001 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 2002 ret <16 x float> %res 2003} 2004define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { 2005; CHECK-LABEL: test_masked_16xfloat_perm_mask0: 2006; CHECK: # %bb.0: 2007; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] 2008; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 2009; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 2010; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} 2011; CHECK-NEXT: vmovaps %zmm1, %zmm0 2012; CHECK-NEXT: retq 2013 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 2014 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2015 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2016 ret <16 x float> %res 2017} 2018 2019define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %mask) { 2020; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0: 2021; CHECK: # %bb.0: 2022; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] 2023; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2024; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2025; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} 2026; CHECK-NEXT: retq 2027 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 2028 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2029 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2030 ret <16 x float> %res 2031} 2032define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { 2033; CHECK-LABEL: test_masked_16xfloat_perm_mask1: 2034; CHECK: # %bb.0: 2035; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] 2036; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 2037; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 2038; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} 2039; CHECK-NEXT: vmovaps %zmm1, %zmm0 2040; CHECK-NEXT: retq 2041 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1> 2042 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2043 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2044 ret <16 x float> %res 2045} 2046 2047define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %mask) { 2048; CHECK-LABEL: test_masked_z_16xfloat_perm_mask1: 2049; CHECK: # %bb.0: 2050; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] 2051; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2052; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2053; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} 2054; CHECK-NEXT: retq 2055 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1> 2056 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2057 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2058 ret <16 x float> %res 2059} 2060define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { 2061; CHECK-LABEL: test_masked_16xfloat_perm_mask2: 2062; CHECK: # %bb.0: 2063; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] 2064; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 2065; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 2066; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} 2067; CHECK-NEXT: vmovaps %zmm1, %zmm0 2068; CHECK-NEXT: retq 2069 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11> 2070 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2071 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2072 ret <16 x float> %res 2073} 2074 2075define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %mask) { 2076; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2: 2077; CHECK: # %bb.0: 2078; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] 2079; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2080; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2081; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} 2082; CHECK-NEXT: retq 2083 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11> 2084 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2085 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2086 ret <16 x float> %res 2087} 2088define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) { 2089; CHECK-LABEL: test_16xfloat_perm_mask3: 2090; CHECK: # %bb.0: 2091; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] 2092; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 2093; CHECK-NEXT: retq 2094 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 2095 ret <16 x float> %res 2096} 2097define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { 2098; CHECK-LABEL: test_masked_16xfloat_perm_mask3: 2099; CHECK: # %bb.0: 2100; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] 2101; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 2102; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 2103; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} 2104; CHECK-NEXT: vmovaps %zmm1, %zmm0 2105; CHECK-NEXT: retq 2106 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 2107 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2108 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2109 ret <16 x float> %res 2110} 2111 2112define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %mask) { 2113; CHECK-LABEL: test_masked_z_16xfloat_perm_mask3: 2114; CHECK: # %bb.0: 2115; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] 2116; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2117; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2118; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} 2119; CHECK-NEXT: retq 2120 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 2121 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2122 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2123 ret <16 x float> %res 2124} 2125define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { 2126; CHECK-LABEL: test_16xfloat_perm_mem_mask0: 2127; CHECK: # %bb.0: 2128; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] 2129; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 2130; CHECK-NEXT: retq 2131 %vec = load <16 x float>, <16 x float>* %vp 2132 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 2133 ret <16 x float> %res 2134} 2135define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { 2136; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0: 2137; CHECK: # %bb.0: 2138; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] 2139; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2140; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2141; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} 2142; CHECK-NEXT: retq 2143 %vec = load <16 x float>, <16 x float>* %vp 2144 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 2145 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2146 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2147 ret <16 x float> %res 2148} 2149 2150define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %mask) { 2151; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0: 2152; CHECK: # %bb.0: 2153; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] 2154; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2155; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 2156; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} 2157; CHECK-NEXT: retq 2158 %vec = load <16 x float>, <16 x float>* %vp 2159 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 2160 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2161 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2162 ret <16 x float> %res 2163} 2164 2165define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { 2166; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask1: 2167; CHECK: # %bb.0: 2168; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] 2169; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2170; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2171; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} 2172; CHECK-NEXT: retq 2173 %vec = load <16 x float>, <16 x float>* %vp 2174 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4> 2175 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2176 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2177 ret <16 x float> %res 2178} 2179 2180define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %mask) { 2181; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask1: 2182; CHECK: # %bb.0: 2183; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] 2184; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2185; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 2186; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} 2187; CHECK-NEXT: retq 2188 %vec = load <16 x float>, <16 x float>* %vp 2189 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4> 2190 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2191 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2192 ret <16 x float> %res 2193} 2194 2195define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { 2196; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2: 2197; CHECK: # %bb.0: 2198; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] 2199; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2200; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2201; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} 2202; CHECK-NEXT: retq 2203 %vec = load <16 x float>, <16 x float>* %vp 2204 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5> 2205 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2206 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2207 ret <16 x float> %res 2208} 2209 2210define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %mask) { 2211; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2: 2212; CHECK: # %bb.0: 2213; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] 2214; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2215; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 2216; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} 2217; CHECK-NEXT: retq 2218 %vec = load <16 x float>, <16 x float>* %vp 2219 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5> 2220 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2221 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2222 ret <16 x float> %res 2223} 2224 2225define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { 2226; CHECK-LABEL: test_16xfloat_perm_mem_mask3: 2227; CHECK: # %bb.0: 2228; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] 2229; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 2230; CHECK-NEXT: retq 2231 %vec = load <16 x float>, <16 x float>* %vp 2232 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 2233 ret <16 x float> %res 2234} 2235define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { 2236; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask3: 2237; CHECK: # %bb.0: 2238; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] 2239; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 2240; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 2241; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} 2242; CHECK-NEXT: retq 2243 %vec = load <16 x float>, <16 x float>* %vp 2244 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 2245 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2246 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 2247 ret <16 x float> %res 2248} 2249 2250define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %mask) { 2251; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask3: 2252; CHECK: # %bb.0: 2253; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] 2254; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 2255; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 2256; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} 2257; CHECK-NEXT: retq 2258 %vec = load <16 x float>, <16 x float>* %vp 2259 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 2260 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 2261 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 2262 ret <16 x float> %res 2263} 2264 2265define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { 2266; CHECK-LABEL: test_4xdouble_perm_mask0: 2267; CHECK: # %bb.0: 2268; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] 2269; CHECK-NEXT: retq 2270 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 2271 ret <4 x double> %res 2272} 2273define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { 2274; CHECK-LABEL: test_masked_4xdouble_perm_mask0: 2275; CHECK: # %bb.0: 2276; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2277; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2278; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] 2279; CHECK-NEXT: vmovapd %ymm1, %ymm0 2280; CHECK-NEXT: retq 2281 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 2282 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2283 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2284 ret <4 x double> %res 2285} 2286 2287define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) { 2288; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0: 2289; CHECK: # %bb.0: 2290; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2291; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2292; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] 2293; CHECK-NEXT: retq 2294 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 2295 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2296 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2297 ret <4 x double> %res 2298} 2299define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { 2300; CHECK-LABEL: test_masked_4xdouble_perm_mask1: 2301; CHECK: # %bb.0: 2302; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2303; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2304; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] 2305; CHECK-NEXT: vmovapd %ymm1, %ymm0 2306; CHECK-NEXT: retq 2307 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 2308 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2309 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2310 ret <4 x double> %res 2311} 2312 2313define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) { 2314; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1: 2315; CHECK: # %bb.0: 2316; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2317; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2318; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] 2319; CHECK-NEXT: retq 2320 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 2321 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2322 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2323 ret <4 x double> %res 2324} 2325define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { 2326; CHECK-LABEL: test_masked_4xdouble_perm_mask2: 2327; CHECK: # %bb.0: 2328; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2329; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2330; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] 2331; CHECK-NEXT: vmovapd %ymm1, %ymm0 2332; CHECK-NEXT: retq 2333 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1> 2334 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2335 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2336 ret <4 x double> %res 2337} 2338 2339define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) { 2340; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2: 2341; CHECK: # %bb.0: 2342; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2343; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2344; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] 2345; CHECK-NEXT: retq 2346 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1> 2347 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2348 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2349 ret <4 x double> %res 2350} 2351define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { 2352; CHECK-LABEL: test_4xdouble_perm_mask3: 2353; CHECK: # %bb.0: 2354; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] 2355; CHECK-NEXT: retq 2356 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 2357 ret <4 x double> %res 2358} 2359define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { 2360; CHECK-LABEL: test_masked_4xdouble_perm_mask3: 2361; CHECK: # %bb.0: 2362; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2363; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 2364; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] 2365; CHECK-NEXT: vmovapd %ymm1, %ymm0 2366; CHECK-NEXT: retq 2367 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 2368 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2369 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2370 ret <4 x double> %res 2371} 2372 2373define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) { 2374; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3: 2375; CHECK: # %bb.0: 2376; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2377; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2378; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] 2379; CHECK-NEXT: retq 2380 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 2381 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2382 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2383 ret <4 x double> %res 2384} 2385define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { 2386; CHECK-LABEL: test_4xdouble_perm_mem_mask0: 2387; CHECK: # %bb.0: 2388; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] 2389; CHECK-NEXT: retq 2390 %vec = load <4 x double>, <4 x double>* %vp 2391 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 2392 ret <4 x double> %res 2393} 2394define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { 2395; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0: 2396; CHECK: # %bb.0: 2397; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2398; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2399; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] 2400; CHECK-NEXT: retq 2401 %vec = load <4 x double>, <4 x double>* %vp 2402 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 2403 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2404 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2405 ret <4 x double> %res 2406} 2407 2408define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) { 2409; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0: 2410; CHECK: # %bb.0: 2411; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2412; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 2413; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] 2414; CHECK-NEXT: retq 2415 %vec = load <4 x double>, <4 x double>* %vp 2416 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 2417 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2418 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2419 ret <4 x double> %res 2420} 2421 2422define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { 2423; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1: 2424; CHECK: # %bb.0: 2425; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2426; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2427; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] 2428; CHECK-NEXT: retq 2429 %vec = load <4 x double>, <4 x double>* %vp 2430 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2> 2431 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2432 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2433 ret <4 x double> %res 2434} 2435 2436define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) { 2437; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1: 2438; CHECK: # %bb.0: 2439; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2440; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 2441; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] 2442; CHECK-NEXT: retq 2443 %vec = load <4 x double>, <4 x double>* %vp 2444 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2> 2445 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2446 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2447 ret <4 x double> %res 2448} 2449 2450define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { 2451; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2: 2452; CHECK: # %bb.0: 2453; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2454; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2455; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] 2456; CHECK-NEXT: retq 2457 %vec = load <4 x double>, <4 x double>* %vp 2458 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 2459 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2460 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2461 ret <4 x double> %res 2462} 2463 2464define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) { 2465; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2: 2466; CHECK: # %bb.0: 2467; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2468; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 2469; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] 2470; CHECK-NEXT: retq 2471 %vec = load <4 x double>, <4 x double>* %vp 2472 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 2473 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2474 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2475 ret <4 x double> %res 2476} 2477 2478define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { 2479; CHECK-LABEL: test_4xdouble_perm_mem_mask3: 2480; CHECK: # %bb.0: 2481; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] 2482; CHECK-NEXT: retq 2483 %vec = load <4 x double>, <4 x double>* %vp 2484 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 2485 ret <4 x double> %res 2486} 2487define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { 2488; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3: 2489; CHECK: # %bb.0: 2490; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2491; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 2492; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] 2493; CHECK-NEXT: retq 2494 %vec = load <4 x double>, <4 x double>* %vp 2495 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 2496 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2497 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 2498 ret <4 x double> %res 2499} 2500 2501define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) { 2502; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3: 2503; CHECK: # %bb.0: 2504; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2505; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 2506; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] 2507; CHECK-NEXT: retq 2508 %vec = load <4 x double>, <4 x double>* %vp 2509 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 2510 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 2511 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 2512 ret <4 x double> %res 2513} 2514 2515define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) { 2516; CHECK-LABEL: test_8xdouble_perm_mask0: 2517; CHECK: # %bb.0: 2518; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] 2519; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 2520; CHECK-NEXT: retq 2521 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 2522 ret <8 x double> %res 2523} 2524define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2525; CHECK-LABEL: test_masked_8xdouble_perm_mask0: 2526; CHECK: # %bb.0: 2527; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] 2528; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2529; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 2530; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} 2531; CHECK-NEXT: vmovapd %zmm1, %zmm0 2532; CHECK-NEXT: retq 2533 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 2534 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2535 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2536 ret <8 x double> %res 2537} 2538 2539define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %mask) { 2540; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0: 2541; CHECK: # %bb.0: 2542; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] 2543; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2544; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2545; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} 2546; CHECK-NEXT: retq 2547 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 2548 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2549 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2550 ret <8 x double> %res 2551} 2552define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2553; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask1: 2554; CHECK: # %bb.0: 2555; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2556; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2557; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] 2558; CHECK-NEXT: vmovapd %zmm1, %zmm0 2559; CHECK-NEXT: retq 2560 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6> 2561 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2563 ret <8 x double> %res 2564} 2565 2566define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %mask) { 2567; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask1: 2568; CHECK: # %bb.0: 2569; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2570; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2571; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] 2572; CHECK-NEXT: retq 2573 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6> 2574 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2575 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2576 ret <8 x double> %res 2577} 2578define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2579; CHECK-LABEL: test_masked_8xdouble_perm_mask2: 2580; CHECK: # %bb.0: 2581; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] 2582; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2583; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 2584; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} 2585; CHECK-NEXT: vmovapd %zmm1, %zmm0 2586; CHECK-NEXT: retq 2587 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7> 2588 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2589 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2590 ret <8 x double> %res 2591} 2592 2593define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %mask) { 2594; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2: 2595; CHECK: # %bb.0: 2596; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] 2597; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2598; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2599; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} 2600; CHECK-NEXT: retq 2601 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7> 2602 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2603 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2604 ret <8 x double> %res 2605} 2606define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { 2607; CHECK-LABEL: test_8xdouble_perm_imm_mask3: 2608; CHECK: # %bb.0: 2609; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] 2610; CHECK-NEXT: retq 2611 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 2612 ret <8 x double> %res 2613} 2614define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2615; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask3: 2616; CHECK: # %bb.0: 2617; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2618; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2619; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] 2620; CHECK-NEXT: vmovapd %zmm1, %zmm0 2621; CHECK-NEXT: retq 2622 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 2623 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2624 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2625 ret <8 x double> %res 2626} 2627 2628define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %mask) { 2629; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask3: 2630; CHECK: # %bb.0: 2631; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2632; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2633; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] 2634; CHECK-NEXT: retq 2635 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 2636 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2637 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2638 ret <8 x double> %res 2639} 2640define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2641; CHECK-LABEL: test_masked_8xdouble_perm_mask4: 2642; CHECK: # %bb.0: 2643; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] 2644; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2645; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 2646; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} 2647; CHECK-NEXT: vmovapd %zmm1, %zmm0 2648; CHECK-NEXT: retq 2649 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1> 2650 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2651 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2652 ret <8 x double> %res 2653} 2654 2655define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %mask) { 2656; CHECK-LABEL: test_masked_z_8xdouble_perm_mask4: 2657; CHECK: # %bb.0: 2658; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] 2659; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2660; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2661; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} 2662; CHECK-NEXT: retq 2663 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1> 2664 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2665 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2666 ret <8 x double> %res 2667} 2668define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2669; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask5: 2670; CHECK: # %bb.0: 2671; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2672; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2673; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] 2674; CHECK-NEXT: vmovapd %zmm1, %zmm0 2675; CHECK-NEXT: retq 2676 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7> 2677 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2678 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2679 ret <8 x double> %res 2680} 2681 2682define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %mask) { 2683; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask5: 2684; CHECK: # %bb.0: 2685; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2686; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2687; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] 2688; CHECK-NEXT: retq 2689 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7> 2690 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2691 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2692 ret <8 x double> %res 2693} 2694define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) { 2695; CHECK-LABEL: test_8xdouble_perm_mask6: 2696; CHECK: # %bb.0: 2697; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] 2698; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 2699; CHECK-NEXT: retq 2700 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 2701 ret <8 x double> %res 2702} 2703define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2704; CHECK-LABEL: test_masked_8xdouble_perm_mask6: 2705; CHECK: # %bb.0: 2706; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] 2707; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 2708; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 2709; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} 2710; CHECK-NEXT: vmovapd %zmm1, %zmm0 2711; CHECK-NEXT: retq 2712 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 2713 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2714 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2715 ret <8 x double> %res 2716} 2717 2718define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %mask) { 2719; CHECK-LABEL: test_masked_z_8xdouble_perm_mask6: 2720; CHECK: # %bb.0: 2721; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] 2722; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2723; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2724; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} 2725; CHECK-NEXT: retq 2726 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 2727 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2728 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2729 ret <8 x double> %res 2730} 2731define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { 2732; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask7: 2733; CHECK: # %bb.0: 2734; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2735; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 2736; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] 2737; CHECK-NEXT: vmovapd %zmm1, %zmm0 2738; CHECK-NEXT: retq 2739 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6> 2740 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2741 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2742 ret <8 x double> %res 2743} 2744 2745define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %mask) { 2746; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask7: 2747; CHECK: # %bb.0: 2748; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2749; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2750; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] 2751; CHECK-NEXT: retq 2752 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6> 2753 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2754 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2755 ret <8 x double> %res 2756} 2757define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { 2758; CHECK-LABEL: test_8xdouble_perm_mem_mask0: 2759; CHECK: # %bb.0: 2760; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] 2761; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 2762; CHECK-NEXT: retq 2763 %vec = load <8 x double>, <8 x double>* %vp 2764 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 2765 ret <8 x double> %res 2766} 2767define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2768; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0: 2769; CHECK: # %bb.0: 2770; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] 2771; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2772; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2773; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} 2774; CHECK-NEXT: retq 2775 %vec = load <8 x double>, <8 x double>* %vp 2776 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 2777 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2778 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2779 ret <8 x double> %res 2780} 2781 2782define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %mask) { 2783; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0: 2784; CHECK: # %bb.0: 2785; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] 2786; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2787; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 2788; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} 2789; CHECK-NEXT: retq 2790 %vec = load <8 x double>, <8 x double>* %vp 2791 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 2792 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2793 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2794 ret <8 x double> %res 2795} 2796 2797define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2798; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: 2799; CHECK: # %bb.0: 2800; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2801; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2802; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] 2803; CHECK-NEXT: retq 2804 %vec = load <8 x double>, <8 x double>* %vp 2805 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7> 2806 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2807 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2808 ret <8 x double> %res 2809} 2810 2811define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) { 2812; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: 2813; CHECK: # %bb.0: 2814; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2815; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 2816; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] 2817; CHECK-NEXT: retq 2818 %vec = load <8 x double>, <8 x double>* %vp 2819 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7> 2820 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2821 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2822 ret <8 x double> %res 2823} 2824 2825define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2826; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2: 2827; CHECK: # %bb.0: 2828; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] 2829; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2830; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2831; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} 2832; CHECK-NEXT: retq 2833 %vec = load <8 x double>, <8 x double>* %vp 2834 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5> 2835 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2836 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2837 ret <8 x double> %res 2838} 2839 2840define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %mask) { 2841; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2: 2842; CHECK: # %bb.0: 2843; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] 2844; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2845; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 2846; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} 2847; CHECK-NEXT: retq 2848 %vec = load <8 x double>, <8 x double>* %vp 2849 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5> 2850 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2851 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2852 ret <8 x double> %res 2853} 2854 2855define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { 2856; CHECK-LABEL: test_8xdouble_perm_imm_mem_mask3: 2857; CHECK: # %bb.0: 2858; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] 2859; CHECK-NEXT: retq 2860 %vec = load <8 x double>, <8 x double>* %vp 2861 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 2862 ret <8 x double> %res 2863} 2864define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2865; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: 2866; CHECK: # %bb.0: 2867; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2868; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2869; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] 2870; CHECK-NEXT: retq 2871 %vec = load <8 x double>, <8 x double>* %vp 2872 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 2873 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2874 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2875 ret <8 x double> %res 2876} 2877 2878define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) { 2879; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: 2880; CHECK: # %bb.0: 2881; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2882; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 2883; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] 2884; CHECK-NEXT: retq 2885 %vec = load <8 x double>, <8 x double>* %vp 2886 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 2887 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2888 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2889 ret <8 x double> %res 2890} 2891 2892define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2893; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask4: 2894; CHECK: # %bb.0: 2895; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] 2896; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2897; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2898; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} 2899; CHECK-NEXT: retq 2900 %vec = load <8 x double>, <8 x double>* %vp 2901 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0> 2902 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2903 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2904 ret <8 x double> %res 2905} 2906 2907define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %mask) { 2908; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask4: 2909; CHECK: # %bb.0: 2910; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] 2911; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2912; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 2913; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} 2914; CHECK-NEXT: retq 2915 %vec = load <8 x double>, <8 x double>* %vp 2916 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0> 2917 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2918 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2919 ret <8 x double> %res 2920} 2921 2922define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2923; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: 2924; CHECK: # %bb.0: 2925; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2926; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2927; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] 2928; CHECK-NEXT: retq 2929 %vec = load <8 x double>, <8 x double>* %vp 2930 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7> 2931 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2932 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2933 ret <8 x double> %res 2934} 2935 2936define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %mask) { 2937; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: 2938; CHECK: # %bb.0: 2939; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2940; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 2941; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] 2942; CHECK-NEXT: retq 2943 %vec = load <8 x double>, <8 x double>* %vp 2944 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7> 2945 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2946 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2947 ret <8 x double> %res 2948} 2949 2950define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { 2951; CHECK-LABEL: test_8xdouble_perm_mem_mask6: 2952; CHECK: # %bb.0: 2953; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] 2954; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 2955; CHECK-NEXT: retq 2956 %vec = load <8 x double>, <8 x double>* %vp 2957 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 2958 ret <8 x double> %res 2959} 2960define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2961; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask6: 2962; CHECK: # %bb.0: 2963; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] 2964; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2965; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 2966; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} 2967; CHECK-NEXT: retq 2968 %vec = load <8 x double>, <8 x double>* %vp 2969 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 2970 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2971 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 2972 ret <8 x double> %res 2973} 2974 2975define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %mask) { 2976; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask6: 2977; CHECK: # %bb.0: 2978; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] 2979; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2980; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 2981; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} 2982; CHECK-NEXT: retq 2983 %vec = load <8 x double>, <8 x double>* %vp 2984 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 2985 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 2986 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 2987 ret <8 x double> %res 2988} 2989 2990define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { 2991; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: 2992; CHECK: # %bb.0: 2993; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2994; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 2995; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] 2996; CHECK-NEXT: retq 2997 %vec = load <8 x double>, <8 x double>* %vp 2998 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 2999 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 3000 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3001 ret <8 x double> %res 3002} 3003 3004define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %mask) { 3005; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: 3006; CHECK: # %bb.0: 3007; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 3008; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 3009; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] 3010; CHECK-NEXT: retq 3011 %vec = load <8 x double>, <8 x double>* %vp 3012 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 3013 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 3014 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3015 ret <8 x double> %res 3016} 3017 3018