1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=CHECK 3 4define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) { 5; CHECK-LABEL: mask_shuffle_v4i32_1234: 6; CHECK: # %bb.0: 7; CHECK-NEXT: kmovd %edi, %k1 8; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0] 9; CHECK-NEXT: vmovdqa %xmm2, %xmm0 10; CHECK-NEXT: retq 11 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 12 %mask.cast = bitcast i8 %mask to <8 x i1> 13 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 15 ret <4 x i32> %res 16} 17 18define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 19; CHECK-LABEL: maskz_shuffle_v4i32_1234: 20; CHECK: # %bb.0: 21; CHECK-NEXT: kmovd %edi, %k1 22; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0] 23; CHECK-NEXT: retq 24 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 25 %mask.cast = bitcast i8 %mask to <8 x i1> 26 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 27 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 28 ret <4 x i32> %res 29} 30 31define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) { 32; CHECK-LABEL: mask_shuffle_v4i32_2345: 33; CHECK: # %bb.0: 34; CHECK-NEXT: kmovd %edi, %k1 35; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1] 36; CHECK-NEXT: vmovdqa %xmm2, %xmm0 37; CHECK-NEXT: retq 38 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 39 %mask.cast = bitcast i8 %mask to <8 x i1> 40 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 41 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 42 ret <4 x i32> %res 43} 44 45define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 46; CHECK-LABEL: maskz_shuffle_v4i32_2345: 47; CHECK: # %bb.0: 48; CHECK-NEXT: kmovd %edi, %k1 49; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1] 50; CHECK-NEXT: retq 51 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 52 %mask.cast = bitcast i8 %mask to <8 x i1> 53 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 54 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 55 ret <4 x i32> %res 56} 57 58define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) { 59; CHECK-LABEL: mask_shuffle_v2i64_12: 60; CHECK: # %bb.0: 61; CHECK-NEXT: kmovd %edi, %k1 62; CHECK-NEXT: valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] 63; CHECK-NEXT: vmovdqa %xmm2, %xmm0 64; CHECK-NEXT: retq 65 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 66 %mask.cast = bitcast i8 %mask to <8 x i1> 67 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 68 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 69 ret <2 x i64> %res 70} 71 72define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 73; CHECK-LABEL: maskz_shuffle_v2i64_12: 74; CHECK: # %bb.0: 75; CHECK-NEXT: kmovd %edi, %k1 76; CHECK-NEXT: valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] 77; CHECK-NEXT: retq 78 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 79 %mask.cast = bitcast i8 %mask to <8 x i1> 80 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 81 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 82 ret <2 x i64> %res 83} 84 85define <4 x i64> @mask_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passthru, i8 %mask) { 86; CHECK-LABEL: mask_shuffle_v4i64_1234: 87; CHECK: # %bb.0: 88; CHECK-NEXT: kmovd %edi, %k1 89; CHECK-NEXT: valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0] 90; CHECK-NEXT: vmovdqa %ymm2, %ymm0 91; CHECK-NEXT: retq 92 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 93 %mask.cast = bitcast i8 %mask to <8 x i1> 94 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 95 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru 96 ret <4 x i64> %res 97} 98 99define <4 x i64> @maskz_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 100; CHECK-LABEL: maskz_shuffle_v4i64_1234: 101; CHECK: # %bb.0: 102; CHECK-NEXT: kmovd %edi, %k1 103; CHECK-NEXT: valignq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3],ymm1[0] 104; CHECK-NEXT: retq 105 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 106 %mask.cast = bitcast i8 %mask to <8 x i1> 107 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 108 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer 109 ret <4 x i64> %res 110} 111 112define <4 x i64> @mask_shuffle_v4i64_1230(<4 x i64> %a, <4 x i64> %passthru, i8 %mask) { 113; CHECK-LABEL: mask_shuffle_v4i64_1230: 114; CHECK: # %bb.0: 115; CHECK-NEXT: kmovd %edi, %k1 116; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0] 117; CHECK-NEXT: vmovdqa %ymm1, %ymm0 118; CHECK-NEXT: retq 119 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> 120 %mask.cast = bitcast i8 %mask to <8 x i1> 121 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 122 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru 123 ret <4 x i64> %res 124} 125 126define <4 x i64> @maskz_shuffle_v4i64_1230(<4 x i64> %a, i8 %mask) { 127; CHECK-LABEL: maskz_shuffle_v4i64_1230: 128; CHECK: # %bb.0: 129; CHECK-NEXT: kmovd %edi, %k1 130; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,0] 131; CHECK-NEXT: retq 132 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> 133 %mask.cast = bitcast i8 %mask to <8 x i1> 134 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 135 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer 136 ret <4 x i64> %res 137} 138 139define <8 x i32> @mask_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) { 140; CHECK-LABEL: mask_shuffle_v8i32_12345678: 141; CHECK: # %bb.0: 142; CHECK-NEXT: kmovd %edi, %k1 143; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0] 144; CHECK-NEXT: vmovdqa %ymm2, %ymm0 145; CHECK-NEXT: retq 146 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 147 %mask.cast = bitcast i8 %mask to <8 x i1> 148 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru 149 ret <8 x i32> %res 150} 151 152define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 153; CHECK-LABEL: maskz_shuffle_v8i32_12345678: 154; CHECK: # %bb.0: 155; CHECK-NEXT: kmovd %edi, %k1 156; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7],ymm1[0] 157; CHECK-NEXT: retq 158 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 159 %mask.cast = bitcast i8 %mask to <8 x i1> 160 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer 161 ret <8 x i32> %res 162} 163 164define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) { 165; CHECK-LABEL: mask_shuffle_v8i32_23456789: 166; CHECK: # %bb.0: 167; CHECK-NEXT: kmovd %edi, %k1 168; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1] 169; CHECK-NEXT: vmovdqa %ymm2, %ymm0 170; CHECK-NEXT: retq 171 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> 172 %mask.cast = bitcast i8 %mask to <8 x i1> 173 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru 174 ret <8 x i32> %res 175} 176 177define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 178; CHECK-LABEL: maskz_shuffle_v8i32_23456789: 179; CHECK: # %bb.0: 180; CHECK-NEXT: kmovd %edi, %k1 181; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1] 182; CHECK-NEXT: retq 183 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> 184 %mask.cast = bitcast i8 %mask to <8 x i1> 185 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer 186 ret <8 x i32> %res 187} 188 189define <8 x i32> @mask_shuffle_v8i32_12345670(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) { 190; CHECK-LABEL: mask_shuffle_v8i32_12345670: 191; CHECK: # %bb.0: 192; CHECK-NEXT: kmovd %edi, %k1 193; CHECK-NEXT: valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0] 194; CHECK-NEXT: vmovdqa %ymm1, %ymm0 195; CHECK-NEXT: retq 196 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0> 197 %mask.cast = bitcast i8 %mask to <8 x i1> 198 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru 199 ret <8 x i32> %res 200} 201 202define <8 x i32> @maskz_shuffle_v8i32_12345670(<8 x i32> %a, i8 %mask) { 203; CHECK-LABEL: maskz_shuffle_v8i32_12345670: 204; CHECK: # %bb.0: 205; CHECK-NEXT: kmovd %edi, %k1 206; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7,0] 207; CHECK-NEXT: retq 208 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0> 209 %mask.cast = bitcast i8 %mask to <8 x i1> 210 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer 211 ret <8 x i32> %res 212} 213 214define <8 x i32> @mask_shuffle_v8i32_23456701(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) { 215; CHECK-LABEL: mask_shuffle_v8i32_23456701: 216; CHECK: # %bb.0: 217; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] 218; CHECK-NEXT: kmovd %edi, %k1 219; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 220; CHECK-NEXT: retq 221 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1> 222 %mask.cast = bitcast i8 %mask to <8 x i1> 223 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru 224 ret <8 x i32> %res 225} 226 227define <8 x i32> @maskz_shuffle_v8i32_23456701(<8 x i32> %a, i8 %mask) { 228; CHECK-LABEL: maskz_shuffle_v8i32_23456701: 229; CHECK: # %bb.0: 230; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] 231; CHECK-NEXT: kmovd %edi, %k1 232; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 233; CHECK-NEXT: retq 234 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1> 235 %mask.cast = bitcast i8 %mask to <8 x i1> 236 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer 237 ret <8 x i32> %res 238} 239 240define <4 x i32> @mask_extract_v8i32_v4i32_0(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) { 241; CHECK-LABEL: mask_extract_v8i32_v4i32_0: 242; CHECK: # %bb.0: 243; CHECK-NEXT: kmovd %edi, %k1 244; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 245; CHECK-NEXT: vzeroupper 246; CHECK-NEXT: retq 247 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 248 %mask.cast = bitcast i8 %mask to <8 x i1> 249 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 250 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 251 ret <4 x i32> %res 252} 253 254define <4 x i32> @mask_extract_v8i32_v4i32_0_z(<8 x i32> %a, i8 %mask) { 255; CHECK-LABEL: mask_extract_v8i32_v4i32_0_z: 256; CHECK: # %bb.0: 257; CHECK-NEXT: kmovd %edi, %k1 258; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 259; CHECK-NEXT: vzeroupper 260; CHECK-NEXT: retq 261 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 262 %mask.cast = bitcast i8 %mask to <8 x i1> 263 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 264 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 265 ret <4 x i32> %res 266} 267 268define <4 x i32> @mask_extract_v8i32_v4i32_1(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) { 269; CHECK-LABEL: mask_extract_v8i32_v4i32_1: 270; CHECK: # %bb.0: 271; CHECK-NEXT: kmovd %edi, %k1 272; CHECK-NEXT: vextracti32x4 $1, %ymm0, %xmm1 {%k1} 273; CHECK-NEXT: vmovdqa %xmm1, %xmm0 274; CHECK-NEXT: vzeroupper 275; CHECK-NEXT: retq 276 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 277 %mask.cast = bitcast i8 %mask to <8 x i1> 278 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 279 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 280 ret <4 x i32> %res 281} 282 283define <4 x i32> @mask_extract_v8i32_v4i32_1_z(<8 x i32> %a, i8 %mask) { 284; CHECK-LABEL: mask_extract_v8i32_v4i32_1_z: 285; CHECK: # %bb.0: 286; CHECK-NEXT: kmovd %edi, %k1 287; CHECK-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} 288; CHECK-NEXT: vzeroupper 289; CHECK-NEXT: retq 290 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 291 %mask.cast = bitcast i8 %mask to <8 x i1> 292 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 293 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 294 ret <4 x i32> %res 295} 296 297define <4 x float> @mask_extract_v8f32_v4f32_0(<8 x float> %a, <4 x float> %passthru, i8 %mask) { 298; CHECK-LABEL: mask_extract_v8f32_v4f32_0: 299; CHECK: # %bb.0: 300; CHECK-NEXT: kmovd %edi, %k1 301; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 302; CHECK-NEXT: vzeroupper 303; CHECK-NEXT: retq 304 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 305 %mask.cast = bitcast i8 %mask to <8 x i1> 306 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 307 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru 308 ret <4 x float> %res 309} 310 311define <4 x float> @mask_extract_v8f32_v4f32_0_z(<8 x float> %a, i8 %mask) { 312; CHECK-LABEL: mask_extract_v8f32_v4f32_0_z: 313; CHECK: # %bb.0: 314; CHECK-NEXT: kmovd %edi, %k1 315; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} 316; CHECK-NEXT: vzeroupper 317; CHECK-NEXT: retq 318 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 319 %mask.cast = bitcast i8 %mask to <8 x i1> 320 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 321 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer 322 ret <4 x float> %res 323} 324 325define <4 x float> @mask_extract_v8f32_v4f32_1(<8 x float> %a, <4 x float> %passthru, i8 %mask) { 326; CHECK-LABEL: mask_extract_v8f32_v4f32_1: 327; CHECK: # %bb.0: 328; CHECK-NEXT: kmovd %edi, %k1 329; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} 330; CHECK-NEXT: vmovaps %xmm1, %xmm0 331; CHECK-NEXT: vzeroupper 332; CHECK-NEXT: retq 333 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 334 %mask.cast = bitcast i8 %mask to <8 x i1> 335 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 336 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru 337 ret <4 x float> %res 338} 339 340define <4 x float> @mask_extract_v8f32_v4f32_1_z(<8 x float> %a, i8 %mask) { 341; CHECK-LABEL: mask_extract_v8f32_v4f32_1_z: 342; CHECK: # %bb.0: 343; CHECK-NEXT: kmovd %edi, %k1 344; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} 345; CHECK-NEXT: vzeroupper 346; CHECK-NEXT: retq 347 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 348 %mask.cast = bitcast i8 %mask to <8 x i1> 349 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 350 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer 351 ret <4 x float> %res 352} 353 354define <2 x i64> @mask_extract_v4i64_v2i64_0(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) { 355; CHECK-LABEL: mask_extract_v4i64_v2i64_0: 356; CHECK: # %bb.0: 357; CHECK-NEXT: kmovd %edi, %k1 358; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 359; CHECK-NEXT: vzeroupper 360; CHECK-NEXT: retq 361 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 362 %mask.cast = bitcast i8 %mask to <8 x i1> 363 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 364 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 365 ret <2 x i64> %res 366} 367 368define <2 x i64> @mask_extract_v4i64_v2i64_0_z(<4 x i64> %a, i8 %mask) { 369; CHECK-LABEL: mask_extract_v4i64_v2i64_0_z: 370; CHECK: # %bb.0: 371; CHECK-NEXT: kmovd %edi, %k1 372; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 373; CHECK-NEXT: vzeroupper 374; CHECK-NEXT: retq 375 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 376 %mask.cast = bitcast i8 %mask to <8 x i1> 377 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 378 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 379 ret <2 x i64> %res 380} 381 382define <2 x i64> @mask_extract_v4i64_v2i64_1(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) { 383; CHECK-LABEL: mask_extract_v4i64_v2i64_1: 384; CHECK: # %bb.0: 385; CHECK-NEXT: kmovd %edi, %k1 386; CHECK-NEXT: vextracti64x2 $1, %ymm0, %xmm1 {%k1} 387; CHECK-NEXT: vmovdqa %xmm1, %xmm0 388; CHECK-NEXT: vzeroupper 389; CHECK-NEXT: retq 390 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 391 %mask.cast = bitcast i8 %mask to <8 x i1> 392 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 393 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 394 ret <2 x i64> %res 395} 396 397define <2 x i64> @mask_extract_v4i64_v2i64_1_z(<4 x i64> %a, i8 %mask) { 398; CHECK-LABEL: mask_extract_v4i64_v2i64_1_z: 399; CHECK: # %bb.0: 400; CHECK-NEXT: kmovd %edi, %k1 401; CHECK-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} 402; CHECK-NEXT: vzeroupper 403; CHECK-NEXT: retq 404 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 405 %mask.cast = bitcast i8 %mask to <8 x i1> 406 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 407 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 408 ret <2 x i64> %res 409} 410 411define <2 x double> @mask_extract_v4f64_v2f64_0(<4 x double> %a, <2 x double> %passthru, i8 %mask) { 412; CHECK-LABEL: mask_extract_v4f64_v2f64_0: 413; CHECK: # %bb.0: 414; CHECK-NEXT: kmovd %edi, %k1 415; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 416; CHECK-NEXT: vzeroupper 417; CHECK-NEXT: retq 418 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 419 %mask.cast = bitcast i8 %mask to <8 x i1> 420 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 421 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru 422 ret <2 x double> %res 423} 424 425define <2 x double> @mask_extract_v4f64_v2f64_0_z(<4 x double> %a, i8 %mask) { 426; CHECK-LABEL: mask_extract_v4f64_v2f64_0_z: 427; CHECK: # %bb.0: 428; CHECK-NEXT: kmovd %edi, %k1 429; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} 430; CHECK-NEXT: vzeroupper 431; CHECK-NEXT: retq 432 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 433 %mask.cast = bitcast i8 %mask to <8 x i1> 434 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 435 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer 436 ret <2 x double> %res 437} 438 439define <2 x double> @mask_extract_v4f64_v2f64_1(<4 x double> %a, <2 x double> %passthru, i8 %mask) { 440; CHECK-LABEL: mask_extract_v4f64_v2f64_1: 441; CHECK: # %bb.0: 442; CHECK-NEXT: kmovd %edi, %k1 443; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} 444; CHECK-NEXT: vmovapd %xmm1, %xmm0 445; CHECK-NEXT: vzeroupper 446; CHECK-NEXT: retq 447 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3> 448 %mask.cast = bitcast i8 %mask to <8 x i1> 449 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 450 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru 451 ret <2 x double> %res 452} 453 454define <2 x double> @mask_extract_v4f64_v2f64_1_z(<4 x double> %a, i8 %mask) { 455; CHECK-LABEL: mask_extract_v4f64_v2f64_1_z: 456; CHECK: # %bb.0: 457; CHECK-NEXT: kmovd %edi, %k1 458; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} 459; CHECK-NEXT: vzeroupper 460; CHECK-NEXT: retq 461 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3> 462 %mask.cast = bitcast i8 %mask to <8 x i1> 463 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 464 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer 465 ret <2 x double> %res 466} 467 468define <4 x i32> @mask_extract_v16i32_v4i32_0(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { 469; CHECK-LABEL: mask_extract_v16i32_v4i32_0: 470; CHECK: # %bb.0: 471; CHECK-NEXT: kmovd %edi, %k1 472; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 473; CHECK-NEXT: vzeroupper 474; CHECK-NEXT: retq 475 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 476 %mask.cast = bitcast i8 %mask to <8 x i1> 477 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 478 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 479 ret <4 x i32> %res 480} 481 482define <4 x i32> @mask_extract_v16i32_v4i32_0_z(<16 x i32> %a, i8 %mask) { 483; CHECK-LABEL: mask_extract_v16i32_v4i32_0_z: 484; CHECK: # %bb.0: 485; CHECK-NEXT: kmovd %edi, %k1 486; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 487; CHECK-NEXT: vzeroupper 488; CHECK-NEXT: retq 489 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 490 %mask.cast = bitcast i8 %mask to <8 x i1> 491 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 492 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 493 ret <4 x i32> %res 494} 495 496define <4 x i32> @mask_extract_v16i32_v4i32_1(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { 497; CHECK-LABEL: mask_extract_v16i32_v4i32_1: 498; CHECK: # %bb.0: 499; CHECK-NEXT: kmovd %edi, %k1 500; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1} 501; CHECK-NEXT: vmovdqa %xmm1, %xmm0 502; CHECK-NEXT: vzeroupper 503; CHECK-NEXT: retq 504 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 505 %mask.cast = bitcast i8 %mask to <8 x i1> 506 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 507 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 508 ret <4 x i32> %res 509} 510 511define <4 x i32> @mask_extract_v16i32_v4i32_1_z(<16 x i32> %a, i8 %mask) { 512; CHECK-LABEL: mask_extract_v16i32_v4i32_1_z: 513; CHECK: # %bb.0: 514; CHECK-NEXT: kmovd %edi, %k1 515; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z} 516; CHECK-NEXT: vzeroupper 517; CHECK-NEXT: retq 518 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 519 %mask.cast = bitcast i8 %mask to <8 x i1> 520 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 521 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 522 ret <4 x i32> %res 523} 524 525define <4 x i32> @mask_extract_v16i32_v4i32_2(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { 526; CHECK-LABEL: mask_extract_v16i32_v4i32_2: 527; CHECK: # %bb.0: 528; CHECK-NEXT: kmovd %edi, %k1 529; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 {%k1} 530; CHECK-NEXT: vmovdqa %xmm1, %xmm0 531; CHECK-NEXT: vzeroupper 532; CHECK-NEXT: retq 533 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 534 %mask.cast = bitcast i8 %mask to <8 x i1> 535 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 536 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 537 ret <4 x i32> %res 538} 539 540define <4 x i32> @mask_extract_v16i32_v4i32_3(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { 541; CHECK-LABEL: mask_extract_v16i32_v4i32_3: 542; CHECK: # %bb.0: 543; CHECK-NEXT: kmovd %edi, %k1 544; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm1 {%k1} 545; CHECK-NEXT: vmovdqa %xmm1, %xmm0 546; CHECK-NEXT: vzeroupper 547; CHECK-NEXT: retq 548 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 549 %mask.cast = bitcast i8 %mask to <8 x i1> 550 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 551 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru 552 ret <4 x i32> %res 553} 554 555define <4 x float> @mask_extract_v16f32_v4f32_0(<16 x float> %a, <4 x float> %passthru, i8 %mask) { 556; CHECK-LABEL: mask_extract_v16f32_v4f32_0: 557; CHECK: # %bb.0: 558; CHECK-NEXT: kmovd %edi, %k1 559; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 560; CHECK-NEXT: vzeroupper 561; CHECK-NEXT: retq 562 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 563 %mask.cast = bitcast i8 %mask to <8 x i1> 564 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 565 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru 566 ret <4 x float> %res 567} 568 569define <4 x float> @mask_extract_v16f32_v4f32_0_z(<16 x float> %a, i8 %mask) { 570; CHECK-LABEL: mask_extract_v16f32_v4f32_0_z: 571; CHECK: # %bb.0: 572; CHECK-NEXT: kmovd %edi, %k1 573; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} 574; CHECK-NEXT: vzeroupper 575; CHECK-NEXT: retq 576 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 577 %mask.cast = bitcast i8 %mask to <8 x i1> 578 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 579 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer 580 ret <4 x float> %res 581} 582 583define <4 x float> @mask_extract_v16f32_v4f32_1(<16 x float> %a, <4 x float> %passthru, i8 %mask) { 584; CHECK-LABEL: mask_extract_v16f32_v4f32_1: 585; CHECK: # %bb.0: 586; CHECK-NEXT: kmovd %edi, %k1 587; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1} 588; CHECK-NEXT: vmovaps %xmm1, %xmm0 589; CHECK-NEXT: vzeroupper 590; CHECK-NEXT: retq 591 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 592 %mask.cast = bitcast i8 %mask to <8 x i1> 593 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 594 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru 595 ret <4 x float> %res 596} 597 598define <4 x float> @mask_extract_v16f32_v4f32_1_z(<16 x float> %a, i8 %mask) { 599; CHECK-LABEL: mask_extract_v16f32_v4f32_1_z: 600; CHECK: # %bb.0: 601; CHECK-NEXT: kmovd %edi, %k1 602; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} 603; CHECK-NEXT: vzeroupper 604; CHECK-NEXT: retq 605 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 606 %mask.cast = bitcast i8 %mask to <8 x i1> 607 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 608 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer 609 ret <4 x float> %res 610} 611 612define <4 x float> @mask_extract_v16f32_v4f32_2(<16 x float> %a, <4 x float> %passthru, i8 %mask) { 613; CHECK-LABEL: mask_extract_v16f32_v4f32_2: 614; CHECK: # %bb.0: 615; CHECK-NEXT: kmovd %edi, %k1 616; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 {%k1} 617; CHECK-NEXT: vmovaps %xmm1, %xmm0 618; CHECK-NEXT: vzeroupper 619; CHECK-NEXT: retq 620 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 621 %mask.cast = bitcast i8 %mask to <8 x i1> 622 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 623 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru 624 ret <4 x float> %res 625} 626 627define <4 x float> @mask_extract_v16f32_v4f32_3(<16 x float> %a, <4 x float> %passthru, i8 %mask) { 628; CHECK-LABEL: mask_extract_v16f32_v4f32_3: 629; CHECK: # %bb.0: 630; CHECK-NEXT: kmovd %edi, %k1 631; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm1 {%k1} 632; CHECK-NEXT: vmovaps %xmm1, %xmm0 633; CHECK-NEXT: vzeroupper 634; CHECK-NEXT: retq 635 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 636 %mask.cast = bitcast i8 %mask to <8 x i1> 637 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 638 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru 639 ret <4 x float> %res 640} 641 642define <8 x i32> @mask_extract_v16i32_v8i32_0(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) { 643; CHECK-LABEL: mask_extract_v16i32_v8i32_0: 644; CHECK: # %bb.0: 645; CHECK-NEXT: kmovd %edi, %k1 646; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 647; CHECK-NEXT: retq 648 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 649 %mask.cast = bitcast i8 %mask to <8 x i1> 650 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru 651 ret <8 x i32> %res 652} 653 654define <8 x i32> @mask_extract_v16i32_v8i32_0_z(<16 x i32> %a, i8 %mask) { 655; CHECK-LABEL: mask_extract_v16i32_v8i32_0_z: 656; CHECK: # %bb.0: 657; CHECK-NEXT: kmovd %edi, %k1 658; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 659; CHECK-NEXT: retq 660 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 661 %mask.cast = bitcast i8 %mask to <8 x i1> 662 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer 663 ret <8 x i32> %res 664} 665 666define <8 x i32> @mask_extract_v16i32_v8i32_1(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) { 667; CHECK-LABEL: mask_extract_v16i32_v8i32_1: 668; CHECK: # %bb.0: 669; CHECK-NEXT: kmovd %edi, %k1 670; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1} 671; CHECK-NEXT: vmovdqa %ymm1, %ymm0 672; CHECK-NEXT: retq 673 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 674 %mask.cast = bitcast i8 %mask to <8 x i1> 675 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru 676 ret <8 x i32> %res 677} 678 679define <8 x i32> @mask_extract_v16i32_v8i32_1_z(<16 x i32> %a, i8 %mask) { 680; CHECK-LABEL: mask_extract_v16i32_v8i32_1_z: 681; CHECK: # %bb.0: 682; CHECK-NEXT: kmovd %edi, %k1 683; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z} 684; CHECK-NEXT: retq 685 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 686 %mask.cast = bitcast i8 %mask to <8 x i1> 687 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer 688 ret <8 x i32> %res 689} 690 691define <8 x float> @mask_extract_v16f32_v8f32_0(<16 x float> %a, <8 x float> %passthru, i8 %mask) { 692; CHECK-LABEL: mask_extract_v16f32_v8f32_0: 693; CHECK: # %bb.0: 694; CHECK-NEXT: kmovd %edi, %k1 695; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 696; CHECK-NEXT: retq 697 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 698 %mask.cast = bitcast i8 %mask to <8 x i1> 699 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru 700 ret <8 x float> %res 701} 702 703define <8 x float> @mask_extract_v16f32_v8f32_0_z(<16 x float> %a, i8 %mask) { 704; CHECK-LABEL: mask_extract_v16f32_v8f32_0_z: 705; CHECK: # %bb.0: 706; CHECK-NEXT: kmovd %edi, %k1 707; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} 708; CHECK-NEXT: retq 709 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 710 %mask.cast = bitcast i8 %mask to <8 x i1> 711 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer 712 ret <8 x float> %res 713} 714 715define <8 x float> @mask_extract_v16f32_v8f32_1(<16 x float> %a, <8 x float> %passthru, i8 %mask) { 716; CHECK-LABEL: mask_extract_v16f32_v8f32_1: 717; CHECK: # %bb.0: 718; CHECK-NEXT: kmovd %edi, %k1 719; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} 720; CHECK-NEXT: vmovaps %ymm1, %ymm0 721; CHECK-NEXT: retq 722 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 723 %mask.cast = bitcast i8 %mask to <8 x i1> 724 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru 725 ret <8 x float> %res 726} 727 728define <8 x float> @mask_extract_v16f32_v8f32_1_z(<16 x float> %a, i8 %mask) { 729; CHECK-LABEL: mask_extract_v16f32_v8f32_1_z: 730; CHECK: # %bb.0: 731; CHECK-NEXT: kmovd %edi, %k1 732; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} 733; CHECK-NEXT: retq 734 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 735 %mask.cast = bitcast i8 %mask to <8 x i1> 736 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer 737 ret <8 x float> %res 738} 739 740define <2 x i64> @mask_extract_v8i64_v2i64_0(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { 741; CHECK-LABEL: mask_extract_v8i64_v2i64_0: 742; CHECK: # %bb.0: 743; CHECK-NEXT: kmovd %edi, %k1 744; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 745; CHECK-NEXT: vzeroupper 746; CHECK-NEXT: retq 747 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 748 %mask.cast = bitcast i8 %mask to <8 x i1> 749 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 750 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 751 ret <2 x i64> %res 752} 753 754define <2 x i64> @mask_extract_v8i64_v2i64_0_z(<8 x i64> %a, i8 %mask) { 755; CHECK-LABEL: mask_extract_v8i64_v2i64_0_z: 756; CHECK: # %bb.0: 757; CHECK-NEXT: kmovd %edi, %k1 758; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 759; CHECK-NEXT: vzeroupper 760; CHECK-NEXT: retq 761 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 762 %mask.cast = bitcast i8 %mask to <8 x i1> 763 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 764 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 765 ret <2 x i64> %res 766} 767 768define <2 x i64> @mask_extract_v8i64_v2i64_1(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { 769; CHECK-LABEL: mask_extract_v8i64_v2i64_1: 770; CHECK: # %bb.0: 771; CHECK-NEXT: kmovd %edi, %k1 772; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1} 773; CHECK-NEXT: vmovdqa %xmm1, %xmm0 774; CHECK-NEXT: vzeroupper 775; CHECK-NEXT: retq 776 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3> 777 %mask.cast = bitcast i8 %mask to <8 x i1> 778 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 779 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 780 ret <2 x i64> %res 781} 782 783define <2 x i64> @mask_extract_v8i64_v2i64_1_z(<8 x i64> %a, i8 %mask) { 784; CHECK-LABEL: mask_extract_v8i64_v2i64_1_z: 785; CHECK: # %bb.0: 786; CHECK-NEXT: kmovd %edi, %k1 787; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z} 788; CHECK-NEXT: vzeroupper 789; CHECK-NEXT: retq 790 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3> 791 %mask.cast = bitcast i8 %mask to <8 x i1> 792 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 793 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 794 ret <2 x i64> %res 795} 796 797define <2 x i64> @mask_extract_v8i64_v2i64_2(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { 798; CHECK-LABEL: mask_extract_v8i64_v2i64_2: 799; CHECK: # %bb.0: 800; CHECK-NEXT: kmovd %edi, %k1 801; CHECK-NEXT: vextracti64x2 $2, %zmm0, %xmm1 {%k1} 802; CHECK-NEXT: vmovdqa %xmm1, %xmm0 803; CHECK-NEXT: vzeroupper 804; CHECK-NEXT: retq 805 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 4, i32 5> 806 %mask.cast = bitcast i8 %mask to <8 x i1> 807 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 808 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 809 ret <2 x i64> %res 810} 811 812define <2 x i64> @mask_extract_v8i64_v2i64_3(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { 813; CHECK-LABEL: mask_extract_v8i64_v2i64_3: 814; CHECK: # %bb.0: 815; CHECK-NEXT: kmovd %edi, %k1 816; CHECK-NEXT: vextracti64x2 $3, %zmm0, %xmm1 {%k1} 817; CHECK-NEXT: vmovdqa %xmm1, %xmm0 818; CHECK-NEXT: vzeroupper 819; CHECK-NEXT: retq 820 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 6, i32 7> 821 %mask.cast = bitcast i8 %mask to <8 x i1> 822 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 823 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru 824 ret <2 x i64> %res 825} 826 827define <2 x double> @mask_extract_v8f64_v2f64_0(<8 x double> %a, <2 x double> %passthru, i8 %mask) { 828; CHECK-LABEL: mask_extract_v8f64_v2f64_0: 829; CHECK: # %bb.0: 830; CHECK-NEXT: kmovd %edi, %k1 831; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 832; CHECK-NEXT: vzeroupper 833; CHECK-NEXT: retq 834 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 835 %mask.cast = bitcast i8 %mask to <8 x i1> 836 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 837 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru 838 ret <2 x double> %res 839} 840 841define <2 x double> @mask_extract_v8f64_v2f64_0_z(<8 x double> %a, i8 %mask) { 842; CHECK-LABEL: mask_extract_v8f64_v2f64_0_z: 843; CHECK: # %bb.0: 844; CHECK-NEXT: kmovd %edi, %k1 845; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} 846; CHECK-NEXT: vzeroupper 847; CHECK-NEXT: retq 848 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 849 %mask.cast = bitcast i8 %mask to <8 x i1> 850 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 851 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer 852 ret <2 x double> %res 853} 854 855define <2 x double> @mask_extract_v8f64_v2f64_1(<8 x double> %a, <2 x double> %passthru, i8 %mask) { 856; CHECK-LABEL: mask_extract_v8f64_v2f64_1: 857; CHECK: # %bb.0: 858; CHECK-NEXT: kmovd %edi, %k1 859; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} 860; CHECK-NEXT: vmovapd %xmm1, %xmm0 861; CHECK-NEXT: vzeroupper 862; CHECK-NEXT: retq 863 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3> 864 %mask.cast = bitcast i8 %mask to <8 x i1> 865 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 866 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru 867 ret <2 x double> %res 868} 869 870define <2 x double> @mask_extract_v8f64_v2f64_1_z(<8 x double> %a, i8 %mask) { 871; CHECK-LABEL: mask_extract_v8f64_v2f64_1_z: 872; CHECK: # %bb.0: 873; CHECK-NEXT: kmovd %edi, %k1 874; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} 875; CHECK-NEXT: vzeroupper 876; CHECK-NEXT: retq 877 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3> 878 %mask.cast = bitcast i8 %mask to <8 x i1> 879 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 880 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer 881 ret <2 x double> %res 882} 883 884define <2 x double> @mask_extract_v8f64_v2f64_2(<8 x double> %a, <2 x double> %passthru, i8 %mask) { 885; CHECK-LABEL: mask_extract_v8f64_v2f64_2: 886; CHECK: # %bb.0: 887; CHECK-NEXT: kmovd %edi, %k1 888; CHECK-NEXT: vextractf64x2 $2, %zmm0, %xmm1 {%k1} 889; CHECK-NEXT: vmovapd %xmm1, %xmm0 890; CHECK-NEXT: vzeroupper 891; CHECK-NEXT: retq 892 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 4, i32 5> 893 %mask.cast = bitcast i8 %mask to <8 x i1> 894 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 895 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru 896 ret <2 x double> %res 897} 898 899define <2 x double> @mask_extract_v8f64_v2f64_3(<8 x double> %a, <2 x double> %passthru, i8 %mask) { 900; CHECK-LABEL: mask_extract_v8f64_v2f64_3: 901; CHECK: # %bb.0: 902; CHECK-NEXT: kmovd %edi, %k1 903; CHECK-NEXT: vextractf64x2 $3, %zmm0, %xmm1 {%k1} 904; CHECK-NEXT: vmovapd %xmm1, %xmm0 905; CHECK-NEXT: vzeroupper 906; CHECK-NEXT: retq 907 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 6, i32 7> 908 %mask.cast = bitcast i8 %mask to <8 x i1> 909 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 910 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru 911 ret <2 x double> %res 912} 913 914define <4 x i64> @mask_extract_v8i64_v4i64_0(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) { 915; CHECK-LABEL: mask_extract_v8i64_v4i64_0: 916; CHECK: # %bb.0: 917; CHECK-NEXT: kmovd %edi, %k1 918; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 919; CHECK-NEXT: retq 920 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 921 %mask.cast = bitcast i8 %mask to <8 x i1> 922 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 923 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru 924 ret <4 x i64> %res 925} 926 927define <4 x i64> @mask_extract_v8i64_v4i64_0_z(<8 x i64> %a, i8 %mask) { 928; CHECK-LABEL: mask_extract_v8i64_v4i64_0_z: 929; CHECK: # %bb.0: 930; CHECK-NEXT: kmovd %edi, %k1 931; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 932; CHECK-NEXT: retq 933 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 934 %mask.cast = bitcast i8 %mask to <8 x i1> 935 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 936 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer 937 ret <4 x i64> %res 938} 939 940define <4 x i64> @mask_extract_v8i64_v4i64_1(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) { 941; CHECK-LABEL: mask_extract_v8i64_v4i64_1: 942; CHECK: # %bb.0: 943; CHECK-NEXT: kmovd %edi, %k1 944; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1} 945; CHECK-NEXT: vmovdqa %ymm1, %ymm0 946; CHECK-NEXT: retq 947 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 948 %mask.cast = bitcast i8 %mask to <8 x i1> 949 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 950 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru 951 ret <4 x i64> %res 952} 953 954define <4 x i64> @mask_extract_v8i64_v4i64_1_z(<8 x i64> %a, i8 %mask) { 955; CHECK-LABEL: mask_extract_v8i64_v4i64_1_z: 956; CHECK: # %bb.0: 957; CHECK-NEXT: kmovd %edi, %k1 958; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z} 959; CHECK-NEXT: retq 960 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 961 %mask.cast = bitcast i8 %mask to <8 x i1> 962 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 963 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer 964 ret <4 x i64> %res 965} 966 967define <4 x double> @mask_extract_v8f64_v4f64_0(<8 x double> %a, <4 x double> %passthru, i8 %mask) { 968; CHECK-LABEL: mask_extract_v8f64_v4f64_0: 969; CHECK: # %bb.0: 970; CHECK-NEXT: kmovd %edi, %k1 971; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 972; CHECK-NEXT: retq 973 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 974 %mask.cast = bitcast i8 %mask to <8 x i1> 975 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 976 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru 977 ret <4 x double> %res 978} 979 980define <4 x double> @mask_extract_v8f64_v4f64_0_z(<8 x double> %a, i8 %mask) { 981; CHECK-LABEL: mask_extract_v8f64_v4f64_0_z: 982; CHECK: # %bb.0: 983; CHECK-NEXT: kmovd %edi, %k1 984; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} 985; CHECK-NEXT: retq 986 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 987 %mask.cast = bitcast i8 %mask to <8 x i1> 988 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 989 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer 990 ret <4 x double> %res 991} 992 993define <4 x double> @mask_extract_v8f64_v4f64_1(<8 x double> %a, <4 x double> %passthru, i8 %mask) { 994; CHECK-LABEL: mask_extract_v8f64_v4f64_1: 995; CHECK: # %bb.0: 996; CHECK-NEXT: kmovd %edi, %k1 997; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1} 998; CHECK-NEXT: vmovapd %ymm1, %ymm0 999; CHECK-NEXT: retq 1000 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1001 %mask.cast = bitcast i8 %mask to <8 x i1> 1002 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1003 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru 1004 ret <4 x double> %res 1005} 1006 1007define <4 x double> @mask_extract_v8f64_v4f64_1_z(<8 x double> %a, i8 %mask) { 1008; CHECK-LABEL: mask_extract_v8f64_v4f64_1_z: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: kmovd %edi, %k1 1011; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} 1012; CHECK-NEXT: retq 1013 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1014 %mask.cast = bitcast i8 %mask to <8 x i1> 1015 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1016 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer 1017 ret <4 x double> %res 1018} 1019 1020define <8 x i32> @mask_cast_extract_v8i64_v8i32_0(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) { 1021; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0: 1022; CHECK: # %bb.0: 1023; CHECK-NEXT: kmovd %edi, %k1 1024; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 1025; CHECK-NEXT: retq 1026 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1027 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32> 1028 %mask.cast = bitcast i8 %mask to <8 x i1> 1029 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru 1030 ret <8 x i32> %res 1031} 1032 1033define <8 x i32> @mask_cast_extract_v8i64_v8i32_0_z(<8 x i64> %a, i8 %mask) { 1034; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0_z: 1035; CHECK: # %bb.0: 1036; CHECK-NEXT: kmovd %edi, %k1 1037; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 1038; CHECK-NEXT: retq 1039 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1040 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32> 1041 %mask.cast = bitcast i8 %mask to <8 x i1> 1042 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer 1043 ret <8 x i32> %res 1044} 1045 1046define <8 x i32> @mask_cast_extract_v8i64_v8i32_1(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) { 1047; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1: 1048; CHECK: # %bb.0: 1049; CHECK-NEXT: kmovd %edi, %k1 1050; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1} 1051; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1052; CHECK-NEXT: retq 1053 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1054 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32> 1055 %mask.cast = bitcast i8 %mask to <8 x i1> 1056 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru 1057 ret <8 x i32> %res 1058} 1059 1060define <8 x i32> @mask_cast_extract_v8i64_v8i32_1_z(<8 x i64> %a, i8 %mask) { 1061; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1_z: 1062; CHECK: # %bb.0: 1063; CHECK-NEXT: kmovd %edi, %k1 1064; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z} 1065; CHECK-NEXT: retq 1066 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1067 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32> 1068 %mask.cast = bitcast i8 %mask to <8 x i1> 1069 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer 1070 ret <8 x i32> %res 1071} 1072 1073define <8 x float> @mask_cast_extract_v8f64_v8f32_0(<8 x double> %a, <8 x float> %passthru, i8 %mask) { 1074; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0: 1075; CHECK: # %bb.0: 1076; CHECK-NEXT: kmovd %edi, %k1 1077; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 1078; CHECK-NEXT: retq 1079 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1080 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float> 1081 %mask.cast = bitcast i8 %mask to <8 x i1> 1082 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru 1083 ret <8 x float> %res 1084} 1085 1086define <8 x float> @mask_cast_extract_v8f64_v8f32_0_z(<8 x double> %a, i8 %mask) { 1087; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0_z: 1088; CHECK: # %bb.0: 1089; CHECK-NEXT: kmovd %edi, %k1 1090; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} 1091; CHECK-NEXT: retq 1092 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1093 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float> 1094 %mask.cast = bitcast i8 %mask to <8 x i1> 1095 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer 1096 ret <8 x float> %res 1097} 1098 1099define <8 x float> @mask_cast_extract_v8f64_v8f32_1(<8 x double> %a, <8 x float> %passthru, i8 %mask) { 1100; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1: 1101; CHECK: # %bb.0: 1102; CHECK-NEXT: kmovd %edi, %k1 1103; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} 1104; CHECK-NEXT: vmovaps %ymm1, %ymm0 1105; CHECK-NEXT: retq 1106 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1107 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float> 1108 %mask.cast = bitcast i8 %mask to <8 x i1> 1109 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru 1110 ret <8 x float> %res 1111} 1112 1113define <8 x float> @mask_cast_extract_v8f64_v8f32_1_z(<8 x double> %a, i8 %mask) { 1114; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1_z: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: kmovd %edi, %k1 1117; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} 1118; CHECK-NEXT: retq 1119 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1120 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float> 1121 %mask.cast = bitcast i8 %mask to <8 x i1> 1122 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer 1123 ret <8 x float> %res 1124} 1125 1126define <4 x i32> @mask_cast_extract_v8i64_v4i32_0(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) { 1127; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0: 1128; CHECK: # %bb.0: 1129; CHECK-NEXT: kmovd %edi, %k1 1130; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 1131; CHECK-NEXT: vzeroupper 1132; CHECK-NEXT: retq 1133 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 1134 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32> 1135 %mask.cast = bitcast i8 %mask to <8 x i1> 1136 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1137 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru 1138 ret <4 x i32> %res 1139} 1140 1141define <4 x i32> @mask_cast_extract_v8i64_v4i32_0_z(<8 x i64> %a, i8 %mask) { 1142; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0_z: 1143; CHECK: # %bb.0: 1144; CHECK-NEXT: kmovd %edi, %k1 1145; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1146; CHECK-NEXT: vzeroupper 1147; CHECK-NEXT: retq 1148 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 1149 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32> 1150 %mask.cast = bitcast i8 %mask to <8 x i1> 1151 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1152 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer 1153 ret <4 x i32> %res 1154} 1155 1156define <4 x i32> @mask_cast_extract_v8i64_v4i32_1(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) { 1157; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1: 1158; CHECK: # %bb.0: 1159; CHECK-NEXT: kmovd %edi, %k1 1160; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1} 1161; CHECK-NEXT: vmovdqa %xmm1, %xmm0 1162; CHECK-NEXT: vzeroupper 1163; CHECK-NEXT: retq 1164 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3> 1165 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32> 1166 %mask.cast = bitcast i8 %mask to <8 x i1> 1167 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1168 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru 1169 ret <4 x i32> %res 1170} 1171 1172define <4 x i32> @mask_cast_extract_v8i64_v4i32_1_z(<8 x i64> %a, i8 %mask) { 1173; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1_z: 1174; CHECK: # %bb.0: 1175; CHECK-NEXT: kmovd %edi, %k1 1176; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z} 1177; CHECK-NEXT: vzeroupper 1178; CHECK-NEXT: retq 1179 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3> 1180 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32> 1181 %mask.cast = bitcast i8 %mask to <8 x i1> 1182 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1183 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer 1184 ret <4 x i32> %res 1185} 1186 1187define <4 x float> @mask_cast_extract_v8f64_v4f32_0(<8 x double> %a, <4 x float> %passthru, i8 %mask) { 1188; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0: 1189; CHECK: # %bb.0: 1190; CHECK-NEXT: kmovd %edi, %k1 1191; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 1192; CHECK-NEXT: vzeroupper 1193; CHECK-NEXT: retq 1194 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1195 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float> 1196 %mask.cast = bitcast i8 %mask to <8 x i1> 1197 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1198 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru 1199 ret <4 x float> %res 1200} 1201 1202define <4 x float> @mask_cast_extract_v8f64_v4f32_0_z(<8 x double> %a, i8 %mask) { 1203; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0_z: 1204; CHECK: # %bb.0: 1205; CHECK-NEXT: kmovd %edi, %k1 1206; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} 1207; CHECK-NEXT: vzeroupper 1208; CHECK-NEXT: retq 1209 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1210 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float> 1211 %mask.cast = bitcast i8 %mask to <8 x i1> 1212 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1213 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer 1214 ret <4 x float> %res 1215} 1216 1217define <4 x float> @mask_cast_extract_v8f64_v4f32_1(<8 x double> %a, <4 x float> %passthru, i8 %mask) { 1218; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1: 1219; CHECK: # %bb.0: 1220; CHECK-NEXT: kmovd %edi, %k1 1221; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1} 1222; CHECK-NEXT: vmovaps %xmm1, %xmm0 1223; CHECK-NEXT: vzeroupper 1224; CHECK-NEXT: retq 1225 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1226 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float> 1227 %mask.cast = bitcast i8 %mask to <8 x i1> 1228 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1229 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru 1230 ret <4 x float> %res 1231} 1232 1233define <4 x float> @mask_cast_extract_v8f64_v4f32_1_z(<8 x double> %a, i8 %mask) { 1234; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1_z: 1235; CHECK: # %bb.0: 1236; CHECK-NEXT: kmovd %edi, %k1 1237; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} 1238; CHECK-NEXT: vzeroupper 1239; CHECK-NEXT: retq 1240 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1241 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float> 1242 %mask.cast = bitcast i8 %mask to <8 x i1> 1243 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1244 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer 1245 ret <4 x float> %res 1246} 1247 1248define <4 x i64> @mask_cast_extract_v16i32_v4i64_0(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) { 1249; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0: 1250; CHECK: # %bb.0: 1251; CHECK-NEXT: kmovd %edi, %k1 1252; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 1253; CHECK-NEXT: retq 1254 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1255 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64> 1256 %mask.cast = bitcast i8 %mask to <8 x i1> 1257 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1258 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru 1259 ret <4 x i64> %res 1260} 1261 1262define <4 x i64> @mask_cast_extract_v16i32_v4i64_0_z(<16 x i32> %a, i8 %mask) { 1263; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0_z: 1264; CHECK: # %bb.0: 1265; CHECK-NEXT: kmovd %edi, %k1 1266; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 1267; CHECK-NEXT: retq 1268 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1269 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64> 1270 %mask.cast = bitcast i8 %mask to <8 x i1> 1271 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1272 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer 1273 ret <4 x i64> %res 1274} 1275 1276define <4 x i64> @mask_cast_extract_v16i32_v4i64_1(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) { 1277; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1: 1278; CHECK: # %bb.0: 1279; CHECK-NEXT: kmovd %edi, %k1 1280; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1} 1281; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1282; CHECK-NEXT: retq 1283 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1284 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64> 1285 %mask.cast = bitcast i8 %mask to <8 x i1> 1286 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1287 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru 1288 ret <4 x i64> %res 1289} 1290 1291define <4 x i64> @mask_cast_extract_v16i32_v4i64_1_z(<16 x i32> %a, i8 %mask) { 1292; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1_z: 1293; CHECK: # %bb.0: 1294; CHECK-NEXT: kmovd %edi, %k1 1295; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z} 1296; CHECK-NEXT: retq 1297 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1298 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64> 1299 %mask.cast = bitcast i8 %mask to <8 x i1> 1300 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1301 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer 1302 ret <4 x i64> %res 1303} 1304 1305define <4 x double> @mask_cast_extract_v16f32_v4f64_0(<16 x float> %a, <4 x double> %passthru, i8 %mask) { 1306; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0: 1307; CHECK: # %bb.0: 1308; CHECK-NEXT: kmovd %edi, %k1 1309; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 1310; CHECK-NEXT: retq 1311 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1312 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double> 1313 %mask.cast = bitcast i8 %mask to <8 x i1> 1314 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1315 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru 1316 ret <4 x double> %res 1317} 1318 1319define <4 x double> @mask_cast_extract_v16f32_v4f64_0_z(<16 x float> %a, i8 %mask) { 1320; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0_z: 1321; CHECK: # %bb.0: 1322; CHECK-NEXT: kmovd %edi, %k1 1323; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} 1324; CHECK-NEXT: retq 1325 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1326 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double> 1327 %mask.cast = bitcast i8 %mask to <8 x i1> 1328 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1329 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer 1330 ret <4 x double> %res 1331} 1332 1333define <4 x double> @mask_cast_extract_v16f32_v4f64_1(<16 x float> %a, <4 x double> %passthru, i8 %mask) { 1334; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1: 1335; CHECK: # %bb.0: 1336; CHECK-NEXT: kmovd %edi, %k1 1337; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1} 1338; CHECK-NEXT: vmovapd %ymm1, %ymm0 1339; CHECK-NEXT: retq 1340 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1341 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double> 1342 %mask.cast = bitcast i8 %mask to <8 x i1> 1343 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1344 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru 1345 ret <4 x double> %res 1346} 1347 1348define <4 x double> @mask_cast_extract_v16f32_v4f64_1_z(<16 x float> %a, i8 %mask) { 1349; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1_z: 1350; CHECK: # %bb.0: 1351; CHECK-NEXT: kmovd %edi, %k1 1352; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} 1353; CHECK-NEXT: retq 1354 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1355 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double> 1356 %mask.cast = bitcast i8 %mask to <8 x i1> 1357 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1358 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer 1359 ret <4 x double> %res 1360} 1361 1362define <2 x i64> @mask_cast_extract_v16i32_v2i64_0(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) { 1363; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0: 1364; CHECK: # %bb.0: 1365; CHECK-NEXT: kmovd %edi, %k1 1366; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 1367; CHECK-NEXT: vzeroupper 1368; CHECK-NEXT: retq 1369 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1370 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64> 1371 %mask.cast = bitcast i8 %mask to <8 x i1> 1372 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1373 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru 1374 ret <2 x i64> %res 1375} 1376 1377define <2 x i64> @mask_cast_extract_v16i32_v2i64_0_z(<16 x i32> %a, i8 %mask) { 1378; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0_z: 1379; CHECK: # %bb.0: 1380; CHECK-NEXT: kmovd %edi, %k1 1381; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 1382; CHECK-NEXT: vzeroupper 1383; CHECK-NEXT: retq 1384 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1385 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64> 1386 %mask.cast = bitcast i8 %mask to <8 x i1> 1387 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1388 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer 1389 ret <2 x i64> %res 1390} 1391 1392define <2 x i64> @mask_cast_extract_v16i32_v2i64_1(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) { 1393; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1: 1394; CHECK: # %bb.0: 1395; CHECK-NEXT: kmovd %edi, %k1 1396; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1} 1397; CHECK-NEXT: vmovdqa %xmm1, %xmm0 1398; CHECK-NEXT: vzeroupper 1399; CHECK-NEXT: retq 1400 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1401 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64> 1402 %mask.cast = bitcast i8 %mask to <8 x i1> 1403 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1404 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru 1405 ret <2 x i64> %res 1406} 1407 1408define <2 x i64> @mask_cast_extract_v16i32_v2i64_1_z(<16 x i32> %a, i8 %mask) { 1409; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1_z: 1410; CHECK: # %bb.0: 1411; CHECK-NEXT: kmovd %edi, %k1 1412; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z} 1413; CHECK-NEXT: vzeroupper 1414; CHECK-NEXT: retq 1415 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1416 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64> 1417 %mask.cast = bitcast i8 %mask to <8 x i1> 1418 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1419 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer 1420 ret <2 x i64> %res 1421} 1422 1423define <2 x double> @mask_cast_extract_v16f32_v2f64_0(<16 x float> %a, <2 x double> %passthru, i8 %mask) { 1424; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0: 1425; CHECK: # %bb.0: 1426; CHECK-NEXT: kmovd %edi, %k1 1427; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 1428; CHECK-NEXT: vzeroupper 1429; CHECK-NEXT: retq 1430 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1431 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double> 1432 %mask.cast = bitcast i8 %mask to <8 x i1> 1433 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1434 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru 1435 ret <2 x double> %res 1436} 1437 1438define <2 x double> @mask_cast_extract_v16f32_v2f64_0_z(<16 x float> %a, i8 %mask) { 1439; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0_z: 1440; CHECK: # %bb.0: 1441; CHECK-NEXT: kmovd %edi, %k1 1442; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} 1443; CHECK-NEXT: vzeroupper 1444; CHECK-NEXT: retq 1445 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1446 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double> 1447 %mask.cast = bitcast i8 %mask to <8 x i1> 1448 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1449 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer 1450 ret <2 x double> %res 1451} 1452 1453define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x double> %passthru, i8 %mask) { 1454; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1: 1455; CHECK: # %bb.0: 1456; CHECK-NEXT: kmovd %edi, %k1 1457; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} 1458; CHECK-NEXT: vmovapd %xmm1, %xmm0 1459; CHECK-NEXT: vzeroupper 1460; CHECK-NEXT: retq 1461 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1462 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double> 1463 %mask.cast = bitcast i8 %mask to <8 x i1> 1464 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1465 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru 1466 ret <2 x double> %res 1467} 1468 1469define <2 x double> @mask_cast_extract_v16f32_v2f64_1_z(<16 x float> %a, i8 %mask) { 1470; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1_z: 1471; CHECK: # %bb.0: 1472; CHECK-NEXT: kmovd %edi, %k1 1473; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} 1474; CHECK-NEXT: vzeroupper 1475; CHECK-NEXT: retq 1476 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1477 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double> 1478 %mask.cast = bitcast i8 %mask to <8 x i1> 1479 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1480 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer 1481 ret <2 x double> %res 1482} 1483 1484define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) { 1485; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask: 1486; CHECK: # %bb.0: 1487; CHECK-NEXT: kmovd %esi, %k1 1488; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] 1489; CHECK-NEXT: retq 1490 %q = load double, double* %x, align 1 1491 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 1492 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 1493 %mask.cast = bitcast i8 %mask to <8 x i1> 1494 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1495 %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru 1496 ret <2 x double> %res 1497} 1498 1499define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) { 1500; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz: 1501; CHECK: # %bb.0: 1502; CHECK-NEXT: kmovd %esi, %k1 1503; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] 1504; CHECK-NEXT: retq 1505 %q = load double, double* %x, align 1 1506 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 1507 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 1508 %mask.cast = bitcast i8 %mask to <8 x i1> 1509 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1510 %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer 1511 ret <2 x double> %res 1512} 1513 1514define <8 x float> @test_broadcast_2f64_8f32_mask(<2 x double> *%p, i8 %mask, <8 x float> %passthru) nounwind { 1515; CHECK-LABEL: test_broadcast_2f64_8f32_mask: 1516; CHECK: # %bb.0: 1517; CHECK-NEXT: kmovd %esi, %k1 1518; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1519; CHECK-NEXT: retq 1520 %1 = load <2 x double>, <2 x double> *%p 1521 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1522 %3 = bitcast <4 x double> %2 to <8 x float> 1523 %mask.cast = bitcast i8 %mask to <8 x i1> 1524 %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> %passthru 1525 ret <8 x float> %res 1526} 1527 1528define <8 x float> @test_broadcast_2f64_8f32_maskz(<2 x double> *%p, i8 %mask) nounwind { 1529; CHECK-LABEL: test_broadcast_2f64_8f32_maskz: 1530; CHECK: # %bb.0: 1531; CHECK-NEXT: kmovd %esi, %k1 1532; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1533; CHECK-NEXT: retq 1534 %1 = load <2 x double>, <2 x double> *%p 1535 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1536 %3 = bitcast <4 x double> %2 to <8 x float> 1537 %mask.cast = bitcast i8 %mask to <8 x i1> 1538 %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> zeroinitializer 1539 ret <8 x float> %res 1540} 1541 1542define <8 x i32> @test_broadcast_2i64_8i32_mask(<2 x i64> *%p, i8 %mask, <8 x i32> %passthru) nounwind { 1543; CHECK-LABEL: test_broadcast_2i64_8i32_mask: 1544; CHECK: # %bb.0: 1545; CHECK-NEXT: kmovd %esi, %k1 1546; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1547; CHECK-NEXT: retq 1548 %1 = load <2 x i64>, <2 x i64> *%p 1549 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1550 %3 = bitcast <4 x i64> %2 to <8 x i32> 1551 %mask.cast = bitcast i8 %mask to <8 x i1> 1552 %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> %passthru 1553 ret <8 x i32> %res 1554} 1555 1556define <8 x i32> @test_broadcast_2i64_8i32_maskz(<2 x i64> *%p, i8 %mask) nounwind { 1557; CHECK-LABEL: test_broadcast_2i64_8i32_maskz: 1558; CHECK: # %bb.0: 1559; CHECK-NEXT: kmovd %esi, %k1 1560; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1561; CHECK-NEXT: retq 1562 %1 = load <2 x i64>, <2 x i64> *%p 1563 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1564 %3 = bitcast <4 x i64> %2 to <8 x i32> 1565 %mask.cast = bitcast i8 %mask to <8 x i1> 1566 %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> zeroinitializer 1567 ret <8 x i32> %res 1568} 1569 1570define <16 x float> @test_broadcast_2f64_16f32_mask(<2 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind { 1571; CHECK-LABEL: test_broadcast_2f64_16f32_mask: 1572; CHECK: # %bb.0: 1573; CHECK-NEXT: kmovd %esi, %k1 1574; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1575; CHECK-NEXT: retq 1576 %1 = load <2 x double>, <2 x double> *%p 1577 %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1578 %3 = bitcast <8 x double> %2 to <16 x float> 1579 %mask.cast = bitcast i16 %mask to <16 x i1> 1580 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru 1581 ret <16 x float> %res 1582} 1583 1584define <16 x float> @test_broadcast_2f64_16f32_maskz(<2 x double> *%p, i16 %mask) nounwind { 1585; CHECK-LABEL: test_broadcast_2f64_16f32_maskz: 1586; CHECK: # %bb.0: 1587; CHECK-NEXT: kmovd %esi, %k1 1588; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1589; CHECK-NEXT: retq 1590 %1 = load <2 x double>, <2 x double> *%p 1591 %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1592 %3 = bitcast <8 x double> %2 to <16 x float> 1593 %mask.cast = bitcast i16 %mask to <16 x i1> 1594 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer 1595 ret <16 x float> %res 1596} 1597 1598define <16 x i32> @test_broadcast_2i64_16i32_mask(<2 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind { 1599; CHECK-LABEL: test_broadcast_2i64_16i32_mask: 1600; CHECK: # %bb.0: 1601; CHECK-NEXT: kmovd %esi, %k1 1602; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1603; CHECK-NEXT: retq 1604 %1 = load <2 x i64>, <2 x i64> *%p 1605 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1606 %3 = bitcast <8 x i64> %2 to <16 x i32> 1607 %mask.cast = bitcast i16 %mask to <16 x i1> 1608 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru 1609 ret <16 x i32> %res 1610} 1611 1612define <16 x i32> @test_broadcast_2i64_16i32_maskz(<2 x i64> *%p, i16 %mask) nounwind { 1613; CHECK-LABEL: test_broadcast_2i64_16i32_maskz: 1614; CHECK: # %bb.0: 1615; CHECK-NEXT: kmovd %esi, %k1 1616; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1617; CHECK-NEXT: retq 1618 %1 = load <2 x i64>, <2 x i64> *%p 1619 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1620 %3 = bitcast <8 x i64> %2 to <16 x i32> 1621 %mask.cast = bitcast i16 %mask to <16 x i1> 1622 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer 1623 ret <16 x i32> %res 1624} 1625 1626define <16 x float> @test_broadcast_4f64_16f32_mask(<4 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind { 1627; CHECK-LABEL: test_broadcast_4f64_16f32_mask: 1628; CHECK: # %bb.0: 1629; CHECK-NEXT: kmovd %esi, %k1 1630; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1631; CHECK-NEXT: retq 1632 %1 = load <4 x double>, <4 x double> *%p 1633 %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1634 %3 = bitcast <8 x double> %2 to <16 x float> 1635 %mask.cast = bitcast i16 %mask to <16 x i1> 1636 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru 1637 ret <16 x float> %res 1638} 1639 1640define <16 x float> @test_broadcast_4f64_16f32_maskz(<4 x double> *%p, i16 %mask) nounwind { 1641; CHECK-LABEL: test_broadcast_4f64_16f32_maskz: 1642; CHECK: # %bb.0: 1643; CHECK-NEXT: kmovd %esi, %k1 1644; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1645; CHECK-NEXT: retq 1646 %1 = load <4 x double>, <4 x double> *%p 1647 %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1648 %3 = bitcast <8 x double> %2 to <16 x float> 1649 %mask.cast = bitcast i16 %mask to <16 x i1> 1650 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer 1651 ret <16 x float> %res 1652} 1653 1654define <16 x i32> @test_broadcast_4i64_16i32_mask(<4 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind { 1655; CHECK-LABEL: test_broadcast_4i64_16i32_mask: 1656; CHECK: # %bb.0: 1657; CHECK-NEXT: kmovd %esi, %k1 1658; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1659; CHECK-NEXT: retq 1660 %1 = load <4 x i64>, <4 x i64> *%p 1661 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1662 %3 = bitcast <8 x i64> %2 to <16 x i32> 1663 %mask.cast = bitcast i16 %mask to <16 x i1> 1664 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru 1665 ret <16 x i32> %res 1666} 1667 1668define <16 x i32> @test_broadcast_4i64_16i32_maskz(<4 x i64> *%p, i16 %mask) nounwind { 1669; CHECK-LABEL: test_broadcast_4i64_16i32_maskz: 1670; CHECK: # %bb.0: 1671; CHECK-NEXT: kmovd %esi, %k1 1672; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1673; CHECK-NEXT: retq 1674 %1 = load <4 x i64>, <4 x i64> *%p 1675 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1676 %3 = bitcast <8 x i64> %2 to <16 x i32> 1677 %mask.cast = bitcast i16 %mask to <16 x i1> 1678 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer 1679 ret <16 x i32> %res 1680} 1681 1682define <4 x double> @test_broadcast_4f32_4f64_mask(<4 x float> *%p, i8 %mask, <4 x double> %passthru) nounwind { 1683; CHECK-LABEL: test_broadcast_4f32_4f64_mask: 1684; CHECK: # %bb.0: 1685; CHECK-NEXT: kmovd %esi, %k1 1686; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 1687; CHECK-NEXT: retq 1688 %1 = load <4 x float>, <4 x float> *%p 1689 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1690 %3 = bitcast <8 x float> %2 to <4 x double> 1691 %mask.cast = bitcast i8 %mask to <8 x i1> 1692 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1693 %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> %passthru 1694 ret <4 x double> %res 1695} 1696 1697define <4 x double> @test_broadcast_4f32_4f64_maskz(<4 x float> *%p, i8 %mask) nounwind { 1698; CHECK-LABEL: test_broadcast_4f32_4f64_maskz: 1699; CHECK: # %bb.0: 1700; CHECK-NEXT: kmovd %esi, %k1 1701; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 1702; CHECK-NEXT: retq 1703 %1 = load <4 x float>, <4 x float> *%p 1704 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1705 %3 = bitcast <8 x float> %2 to <4 x double> 1706 %mask.cast = bitcast i8 %mask to <8 x i1> 1707 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1708 %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> zeroinitializer 1709 ret <4 x double> %res 1710} 1711 1712define <4 x i64> @test_broadcast_4i32_4i64_mask(<4 x i32> *%p, i8 %mask, <4 x i64> %passthru) nounwind { 1713; CHECK-LABEL: test_broadcast_4i32_4i64_mask: 1714; CHECK: # %bb.0: 1715; CHECK-NEXT: kmovd %esi, %k1 1716; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 1717; CHECK-NEXT: retq 1718 %1 = load <4 x i32>, <4 x i32> *%p 1719 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1720 %3 = bitcast <8 x i32> %2 to <4 x i64> 1721 %mask.cast = bitcast i8 %mask to <8 x i1> 1722 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1723 %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> %passthru 1724 ret <4 x i64> %res 1725} 1726 1727define <4 x i64> @test_broadcast_4i32_4i64_maskz(<4 x i32> *%p, i8 %mask) nounwind { 1728; CHECK-LABEL: test_broadcast_4i32_4i64_maskz: 1729; CHECK: # %bb.0: 1730; CHECK-NEXT: kmovd %esi, %k1 1731; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 1732; CHECK-NEXT: retq 1733 %1 = load <4 x i32>, <4 x i32> *%p 1734 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1735 %3 = bitcast <8 x i32> %2 to <4 x i64> 1736 %mask.cast = bitcast i8 %mask to <8 x i1> 1737 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1738 %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> zeroinitializer 1739 ret <4 x i64> %res 1740} 1741 1742define <8 x double> @test_broadcast_4f32_8f64_mask(<4 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind { 1743; CHECK-LABEL: test_broadcast_4f32_8f64_mask: 1744; CHECK: # %bb.0: 1745; CHECK-NEXT: kmovd %esi, %k1 1746; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 1747; CHECK-NEXT: retq 1748 %1 = load <4 x float>, <4 x float> *%p 1749 %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1750 %3 = bitcast <16 x float> %2 to <8 x double> 1751 %mask.cast = bitcast i8 %mask to <8 x i1> 1752 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru 1753 ret <8 x double> %res 1754} 1755 1756define <8 x double> @test_broadcast_4f32_8f64_maskz(<4 x float> *%p, i8 %mask) nounwind { 1757; CHECK-LABEL: test_broadcast_4f32_8f64_maskz: 1758; CHECK: # %bb.0: 1759; CHECK-NEXT: kmovd %esi, %k1 1760; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 1761; CHECK-NEXT: retq 1762 %1 = load <4 x float>, <4 x float> *%p 1763 %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1764 %3 = bitcast <16 x float> %2 to <8 x double> 1765 %mask.cast = bitcast i8 %mask to <8 x i1> 1766 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer 1767 ret <8 x double> %res 1768} 1769 1770define <8 x i64> @test_broadcast_4i32_8i64_mask(<4 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind { 1771; CHECK-LABEL: test_broadcast_4i32_8i64_mask: 1772; CHECK: # %bb.0: 1773; CHECK-NEXT: kmovd %esi, %k1 1774; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 1775; CHECK-NEXT: retq 1776 %1 = load <4 x i32>, <4 x i32> *%p 1777 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1778 %3 = bitcast <16 x i32> %2 to <8 x i64> 1779 %mask.cast = bitcast i8 %mask to <8 x i1> 1780 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru 1781 ret <8 x i64> %res 1782} 1783 1784define <8 x i64> @test_broadcast_4i32_8i64_maskz(<4 x i32> *%p, i8 %mask) nounwind { 1785; CHECK-LABEL: test_broadcast_4i32_8i64_maskz: 1786; CHECK: # %bb.0: 1787; CHECK-NEXT: kmovd %esi, %k1 1788; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 1789; CHECK-NEXT: retq 1790 %1 = load <4 x i32>, <4 x i32> *%p 1791 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1792 %3 = bitcast <16 x i32> %2 to <8 x i64> 1793 %mask.cast = bitcast i8 %mask to <8 x i1> 1794 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer 1795 ret <8 x i64> %res 1796} 1797 1798define <8 x double> @test_broadcast_8f32_8f64_mask(<8 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind { 1799; CHECK-LABEL: test_broadcast_8f32_8f64_mask: 1800; CHECK: # %bb.0: 1801; CHECK-NEXT: kmovd %esi, %k1 1802; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1803; CHECK-NEXT: retq 1804 %1 = load <8 x float>, <8 x float> *%p 1805 %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1806 %3 = bitcast <16 x float> %2 to <8 x double> 1807 %mask.cast = bitcast i8 %mask to <8 x i1> 1808 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru 1809 ret <8 x double> %res 1810} 1811 1812define <8 x double> @test_broadcast_8f32_8f64_maskz(<8 x float> *%p, i8 %mask) nounwind { 1813; CHECK-LABEL: test_broadcast_8f32_8f64_maskz: 1814; CHECK: # %bb.0: 1815; CHECK-NEXT: kmovd %esi, %k1 1816; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1817; CHECK-NEXT: retq 1818 %1 = load <8 x float>, <8 x float> *%p 1819 %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1820 %3 = bitcast <16 x float> %2 to <8 x double> 1821 %mask.cast = bitcast i8 %mask to <8 x i1> 1822 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer 1823 ret <8 x double> %res 1824} 1825 1826define <8 x i64> @test_broadcast_8i32_8i64_mask(<8 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind { 1827; CHECK-LABEL: test_broadcast_8i32_8i64_mask: 1828; CHECK: # %bb.0: 1829; CHECK-NEXT: kmovd %esi, %k1 1830; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1831; CHECK-NEXT: retq 1832 %1 = load <8 x i32>, <8 x i32> *%p 1833 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1834 %3 = bitcast <16 x i32> %2 to <8 x i64> 1835 %mask.cast = bitcast i8 %mask to <8 x i1> 1836 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru 1837 ret <8 x i64> %res 1838} 1839 1840define <8 x i64> @test_broadcast_8i32_8i64_maskz(<8 x i32> *%p, i8 %mask) nounwind { 1841; CHECK-LABEL: test_broadcast_8i32_8i64_maskz: 1842; CHECK: # %bb.0: 1843; CHECK-NEXT: kmovd %esi, %k1 1844; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1845; CHECK-NEXT: retq 1846 %1 = load <8 x i32>, <8 x i32> *%p 1847 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1848 %3 = bitcast <16 x i32> %2 to <8 x i64> 1849 %mask.cast = bitcast i8 %mask to <8 x i1> 1850 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer 1851 ret <8 x i64> %res 1852} 1853 1854define <4 x float> @test_broadcastf32x2_v4f32(<4 x float> %vec, <4 x float> %passthru, i8 %mask) { 1855; CHECK-LABEL: test_broadcastf32x2_v4f32: 1856; CHECK: # %bb.0: 1857; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1858; CHECK-NEXT: kmovd %edi, %k1 1859; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 1860; CHECK-NEXT: retq 1861 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1862 %mask.cast = bitcast i8 %mask to <8 x i1> 1863 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1864 %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> %passthru 1865 ret <4 x float> %res 1866} 1867 1868define <4 x float> @test_broadcastf32x2_v4f32_z(<4 x float> %vec, i8 %mask) { 1869; CHECK-LABEL: test_broadcastf32x2_v4f32_z: 1870; CHECK: # %bb.0: 1871; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1872; CHECK-NEXT: kmovd %edi, %k1 1873; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} 1874; CHECK-NEXT: retq 1875 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1876 %mask.cast = bitcast i8 %mask to <8 x i1> 1877 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1878 %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> zeroinitializer 1879 ret <4 x float> %res 1880} 1881 1882define <4 x i32> @test_broadcasti32x2_v4i32(<4 x i32> %vec, <4 x i32> %passthru, i8 %mask) { 1883; CHECK-LABEL: test_broadcasti32x2_v4i32: 1884; CHECK: # %bb.0: 1885; CHECK-NEXT: kmovd %edi, %k1 1886; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1] 1887; CHECK-NEXT: vmovdqa %xmm1, %xmm0 1888; CHECK-NEXT: retq 1889 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1890 %mask.cast = bitcast i8 %mask to <8 x i1> 1891 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1892 %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> %passthru 1893 ret <4 x i32> %res 1894} 1895 1896define <4 x i32> @test_broadcasti32x2_v4i32_z(<4 x i32> %vec, i8 %mask) { 1897; CHECK-LABEL: test_broadcasti32x2_v4i32_z: 1898; CHECK: # %bb.0: 1899; CHECK-NEXT: kmovd %edi, %k1 1900; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1] 1901; CHECK-NEXT: retq 1902 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1903 %mask.cast = bitcast i8 %mask to <8 x i1> 1904 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1905 %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> zeroinitializer 1906 ret <4 x i32> %res 1907} 1908 1909define <8 x float> @test_broadcastf32x2_v8f32(<8 x float> %vec, <8 x float> %passthru, i8 %mask) { 1910; CHECK-LABEL: test_broadcastf32x2_v8f32: 1911; CHECK: # %bb.0: 1912; CHECK-NEXT: kmovd %edi, %k1 1913; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 1914; CHECK-NEXT: vmovapd %ymm1, %ymm0 1915; CHECK-NEXT: retq 1916 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1917 %mask.cast = bitcast i8 %mask to <8 x i1> 1918 %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> %passthru 1919 ret <8 x float> %res 1920} 1921 1922define <8 x float> @test_broadcastf32x2_v8f32_z(<8 x float> %vec, i8 %mask) { 1923; CHECK-LABEL: test_broadcastf32x2_v8f32_z: 1924; CHECK: # %bb.0: 1925; CHECK-NEXT: kmovd %edi, %k1 1926; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 1927; CHECK-NEXT: retq 1928 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1929 %mask.cast = bitcast i8 %mask to <8 x i1> 1930 %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> zeroinitializer 1931 ret <8 x float> %res 1932} 1933 1934define <8 x i32> @test_broadcasti32x2_v8i32(<8 x i32> %vec, <8 x i32> %passthru, i8 %mask) { 1935; CHECK-LABEL: test_broadcasti32x2_v8i32: 1936; CHECK: # %bb.0: 1937; CHECK-NEXT: kmovd %edi, %k1 1938; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 1939; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1940; CHECK-NEXT: retq 1941 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1942 %mask.cast = bitcast i8 %mask to <8 x i1> 1943 %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> %passthru 1944 ret <8 x i32> %res 1945} 1946 1947define <8 x i32> @test_broadcasti32x2_v8i32_z(<8 x i32> %vec, i8 %mask) { 1948; CHECK-LABEL: test_broadcasti32x2_v8i32_z: 1949; CHECK: # %bb.0: 1950; CHECK-NEXT: kmovd %edi, %k1 1951; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 1952; CHECK-NEXT: retq 1953 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1954 %mask.cast = bitcast i8 %mask to <8 x i1> 1955 %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> zeroinitializer 1956 ret <8 x i32> %res 1957} 1958 1959define <16 x float> @test_broadcastf32x2_v16f32_z(<16 x float> %vec, i16 %mask) { 1960; CHECK-LABEL: test_broadcastf32x2_v16f32_z: 1961; CHECK: # %bb.0: 1962; CHECK-NEXT: kmovd %edi, %k1 1963; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1964; CHECK-NEXT: retq 1965 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1966 %mask.cast = bitcast i16 %mask to <16 x i1> 1967 %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> zeroinitializer 1968 ret <16 x float> %res 1969} 1970 1971define <16 x i32> @test_broadcasti32x2_v16i32(<16 x i32> %vec, <16 x i32> %passthru, i16 %mask) { 1972; CHECK-LABEL: test_broadcasti32x2_v16i32: 1973; CHECK: # %bb.0: 1974; CHECK-NEXT: kmovd %edi, %k1 1975; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1976; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1977; CHECK-NEXT: retq 1978 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1979 %mask.cast = bitcast i16 %mask to <16 x i1> 1980 %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> %passthru 1981 ret <16 x i32> %res 1982} 1983 1984define <16 x float> @test_broadcastf32x2_v16f32(<16 x float> %vec, <16 x float> %passthru, i16 %mask) { 1985; CHECK-LABEL: test_broadcastf32x2_v16f32: 1986; CHECK: # %bb.0: 1987; CHECK-NEXT: kmovd %edi, %k1 1988; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1989; CHECK-NEXT: vmovapd %zmm1, %zmm0 1990; CHECK-NEXT: retq 1991 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1992 %mask.cast = bitcast i16 %mask to <16 x i1> 1993 %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> %passthru 1994 ret <16 x float> %res 1995} 1996 1997define <16 x i32> @test_broadcasti32x2_v16i32_z(<16 x i32> %vec, i16 %mask) { 1998; CHECK-LABEL: test_broadcasti32x2_v16i32_z: 1999; CHECK: # %bb.0: 2000; CHECK-NEXT: kmovd %edi, %k1 2001; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2002; CHECK-NEXT: retq 2003 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 2004 %mask.cast = bitcast i16 %mask to <16 x i1> 2005 %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> zeroinitializer 2006 ret <16 x i32> %res 2007} 2008 2009define <16 x i8> @mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) { 2010; CHECK-LABEL: mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16: 2011; CHECK: # %bb.0: 2012; CHECK-NEXT: kmovd %edi, %k1 2013; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 2014; CHECK-NEXT: vmovdqa %xmm2, %xmm0 2015; CHECK-NEXT: retq 2016 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 2017 %mask.cast = bitcast i16 %mask to <16 x i1> 2018 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru 2019 ret <16 x i8> %res 2020} 2021 2022define <16 x i8> @maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2023; CHECK-LABEL: maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16: 2024; CHECK: # %bb.0: 2025; CHECK-NEXT: kmovd %edi, %k1 2026; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 2027; CHECK-NEXT: retq 2028 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 2029 %mask.cast = bitcast i16 %mask to <16 x i1> 2030 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer 2031 ret <16 x i8> %res 2032} 2033 2034define <16 x i8> @mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) { 2035; CHECK-LABEL: mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19: 2036; CHECK: # %bb.0: 2037; CHECK-NEXT: kmovd %edi, %k1 2038; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 2039; CHECK-NEXT: vmovdqa %xmm2, %xmm0 2040; CHECK-NEXT: retq 2041 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19> 2042 %mask.cast = bitcast i16 %mask to <16 x i1> 2043 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru 2044 ret <16 x i8> %res 2045} 2046 2047define <16 x i8> @maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2048; CHECK-LABEL: maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19: 2049; CHECK: # %bb.0: 2050; CHECK-NEXT: kmovd %edi, %k1 2051; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 2052; CHECK-NEXT: retq 2053 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19> 2054 %mask.cast = bitcast i16 %mask to <16 x i1> 2055 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer 2056 ret <16 x i8> %res 2057} 2058 2059define <16 x i8> @mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) { 2060; CHECK-LABEL: mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23: 2061; CHECK: # %bb.0: 2062; CHECK-NEXT: kmovd %edi, %k1 2063; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 2064; CHECK-NEXT: vmovdqa %xmm2, %xmm0 2065; CHECK-NEXT: retq 2066 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 2067 %mask.cast = bitcast i16 %mask to <16 x i1> 2068 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru 2069 ret <16 x i8> %res 2070} 2071 2072define <16 x i8> @maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2073; CHECK-LABEL: maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23: 2074; CHECK: # %bb.0: 2075; CHECK-NEXT: kmovd %edi, %k1 2076; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 2077; CHECK-NEXT: retq 2078 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 2079 %mask.cast = bitcast i16 %mask to <16 x i1> 2080 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer 2081 ret <16 x i8> %res 2082} 2083