1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c 6 7define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) { 8; X32-LABEL: test_mm512_broadcastd_epi32: 9; X32: # BB#0: 10; X32-NEXT: vpbroadcastd %xmm0, %zmm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm512_broadcastd_epi32: 14; X64: # BB#0: 15; X64-NEXT: vpbroadcastd %xmm0, %zmm0 16; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 18 %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer 19 %res1 = bitcast <16 x i32> %res0 to <8 x i64> 20 ret <8 x i64> %res1 21} 22 23define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) { 24; X32-LABEL: test_mm512_mask_broadcastd_epi32: 25; X32: # BB#0: 26; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 27; X32-NEXT: kmovw %eax, %k1 28; X32-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1} 29; X32-NEXT: retl 30; 31; X64-LABEL: test_mm512_mask_broadcastd_epi32: 32; X64: # BB#0: 33; X64-NEXT: kmovw %edi, %k1 34; X64-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1} 35; X64-NEXT: retq 36 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 37 %arg1 = bitcast i16 %a1 to <16 x i1> 38 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 39 %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <16 x i32> zeroinitializer 40 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0 41 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 42 ret <8 x i64> %res2 43} 44 45define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) { 46; X32-LABEL: test_mm512_maskz_broadcastd_epi32: 47; X32: # BB#0: 48; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 49; X32-NEXT: kmovw %eax, %k1 50; X32-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} 51; X32-NEXT: retl 52; 53; X64-LABEL: test_mm512_maskz_broadcastd_epi32: 54; X64: # BB#0: 55; X64-NEXT: kmovw %edi, %k1 56; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} 57; X64-NEXT: retq 58 %arg0 = bitcast i16 %a0 to <16 x i1> 59 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 60 %res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <16 x i32> zeroinitializer 61 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer 62 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 63 ret <8 x i64> %res2 64} 65 66define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) { 67; X32-LABEL: test_mm512_broadcastq_epi64: 68; X32: # BB#0: 69; X32-NEXT: vpbroadcastq %xmm0, %zmm0 70; X32-NEXT: retl 71; 72; X64-LABEL: test_mm512_broadcastq_epi64: 73; X64: # BB#0: 74; X64-NEXT: vpbroadcastq %xmm0, %zmm0 75; X64-NEXT: retq 76 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer 77 ret <8 x i64> %res 78} 79 80define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) { 81; X32-LABEL: test_mm512_mask_broadcastq_epi64: 82; X32: # BB#0: 83; X32-NEXT: movb {{[0-9]+}}(%esp), %al 84; X32-NEXT: kmovw %eax, %k1 85; X32-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1} 86; X32-NEXT: retl 87; 88; X64-LABEL: test_mm512_mask_broadcastq_epi64: 89; X64: # BB#0: 90; X64-NEXT: kmovw %edi, %k1 91; X64-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1} 92; X64-NEXT: retq 93 %arg1 = bitcast i8 %a1 to <8 x i1> 94 %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <8 x i32> zeroinitializer 95 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0 96 ret <8 x i64> %res1 97} 98 99define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { 100; X32-LABEL: test_mm512_maskz_broadcastq_epi64: 101; X32: # BB#0: 102; X32-NEXT: movb {{[0-9]+}}(%esp), %al 103; X32-NEXT: kmovw %eax, %k1 104; X32-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} 105; X32-NEXT: retl 106; 107; X64-LABEL: test_mm512_maskz_broadcastq_epi64: 108; X64: # BB#0: 109; X64-NEXT: kmovw %edi, %k1 110; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} 111; X64-NEXT: retq 112 %arg0 = bitcast i8 %a0 to <8 x i1> 113 %res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <8 x i32> zeroinitializer 114 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer 115 ret <8 x i64> %res1 116} 117 118define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) { 119; X32-LABEL: test_mm512_broadcastsd_pd: 120; X32: # BB#0: 121; X32-NEXT: vbroadcastsd %xmm0, %zmm0 122; X32-NEXT: retl 123; 124; X64-LABEL: test_mm512_broadcastsd_pd: 125; X64: # BB#0: 126; X64-NEXT: vbroadcastsd %xmm0, %zmm0 127; X64-NEXT: retq 128 %res = shufflevector <2 x double> %a0, <2 x double> undef, <8 x i32> zeroinitializer 129 ret <8 x double> %res 130} 131 132define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) { 133; X32-LABEL: test_mm512_mask_broadcastsd_pd: 134; X32: # BB#0: 135; X32-NEXT: movb {{[0-9]+}}(%esp), %al 136; X32-NEXT: kmovw %eax, %k1 137; X32-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1} 138; X32-NEXT: retl 139; 140; X64-LABEL: test_mm512_mask_broadcastsd_pd: 141; X64: # BB#0: 142; X64-NEXT: kmovw %edi, %k1 143; X64-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1} 144; X64-NEXT: retq 145 %arg1 = bitcast i8 %a1 to <8 x i1> 146 %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <8 x i32> zeroinitializer 147 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 148 ret <8 x double> %res1 149} 150 151define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) { 152; X32-LABEL: test_mm512_maskz_broadcastsd_pd: 153; X32: # BB#0: 154; X32-NEXT: movb {{[0-9]+}}(%esp), %al 155; X32-NEXT: kmovw %eax, %k1 156; X32-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 157; X32-NEXT: retl 158; 159; X64-LABEL: test_mm512_maskz_broadcastsd_pd: 160; X64: # BB#0: 161; X64-NEXT: kmovw %edi, %k1 162; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 163; X64-NEXT: retq 164 %arg0 = bitcast i8 %a0 to <8 x i1> 165 %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <8 x i32> zeroinitializer 166 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 167 ret <8 x double> %res1 168} 169 170define <16 x float> @test_mm512_broadcastss_ps(<4 x float> %a0) { 171; X32-LABEL: test_mm512_broadcastss_ps: 172; X32: # BB#0: 173; X32-NEXT: vbroadcastss %xmm0, %zmm0 174; X32-NEXT: retl 175; 176; X64-LABEL: test_mm512_broadcastss_ps: 177; X64: # BB#0: 178; X64-NEXT: vbroadcastss %xmm0, %zmm0 179; X64-NEXT: retq 180 %res = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> zeroinitializer 181 ret <16 x float> %res 182} 183 184define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) { 185; X32-LABEL: test_mm512_mask_broadcastss_ps: 186; X32: # BB#0: 187; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 188; X32-NEXT: kmovw %eax, %k1 189; X32-NEXT: vbroadcastss %xmm1, %zmm0 {%k1} 190; X32-NEXT: retl 191; 192; X64-LABEL: test_mm512_mask_broadcastss_ps: 193; X64: # BB#0: 194; X64-NEXT: kmovw %edi, %k1 195; X64-NEXT: vbroadcastss %xmm1, %zmm0 {%k1} 196; X64-NEXT: retq 197 %arg1 = bitcast i16 %a1 to <16 x i1> 198 %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <16 x i32> zeroinitializer 199 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0 200 ret <16 x float> %res1 201} 202 203define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) { 204; X32-LABEL: test_mm512_maskz_broadcastss_ps: 205; X32: # BB#0: 206; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 207; X32-NEXT: kmovw %eax, %k1 208; X32-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 209; X32-NEXT: retl 210; 211; X64-LABEL: test_mm512_maskz_broadcastss_ps: 212; X64: # BB#0: 213; X64-NEXT: kmovw %edi, %k1 214; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 215; X64-NEXT: retq 216 %arg0 = bitcast i16 %a0 to <16 x i1> 217 %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <16 x i32> zeroinitializer 218 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer 219 ret <16 x float> %res1 220} 221 222define <8 x double> @test_mm512_movddup_pd(<8 x double> %a0) { 223; X32-LABEL: test_mm512_movddup_pd: 224; X32: # BB#0: 225; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] 226; X32-NEXT: retl 227; 228; X64-LABEL: test_mm512_movddup_pd: 229; X64: # BB#0: 230; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] 231; X64-NEXT: retq 232 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 233 ret <8 x double> %res 234} 235 236define <8 x double> @test_mm512_mask_movddup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) { 237; X32-LABEL: test_mm512_mask_movddup_pd: 238; X32: # BB#0: 239; X32-NEXT: movb {{[0-9]+}}(%esp), %al 240; X32-NEXT: kmovw %eax, %k1 241; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6] 242; X32-NEXT: retl 243; 244; X64-LABEL: test_mm512_mask_movddup_pd: 245; X64: # BB#0: 246; X64-NEXT: kmovw %edi, %k1 247; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6] 248; X64-NEXT: retq 249 %arg1 = bitcast i8 %a1 to <8 x i1> 250 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 251 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 252 ret <8 x double> %res1 253} 254 255define <8 x double> @test_mm512_maskz_movddup_pd(i8 %a0, <8 x double> %a1) { 256; X32-LABEL: test_mm512_maskz_movddup_pd: 257; X32: # BB#0: 258; X32-NEXT: movb {{[0-9]+}}(%esp), %al 259; X32-NEXT: kmovw %eax, %k1 260; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] 261; X32-NEXT: retl 262; 263; X64-LABEL: test_mm512_maskz_movddup_pd: 264; X64: # BB#0: 265; X64-NEXT: kmovw %edi, %k1 266; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] 267; X64-NEXT: retq 268 %arg0 = bitcast i8 %a0 to <8 x i1> 269 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 270 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 271 ret <8 x double> %res1 272} 273 274define <16 x float> @test_mm512_movehdup_ps(<16 x float> %a0) { 275; X32-LABEL: test_mm512_movehdup_ps: 276; X32: # BB#0: 277; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 278; X32-NEXT: retl 279; 280; X64-LABEL: test_mm512_movehdup_ps: 281; X64: # BB#0: 282; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 283; X64-NEXT: retq 284 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 285 ret <16 x float> %res 286} 287 288define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) { 289; X32-LABEL: test_mm512_mask_movehdup_ps: 290; X32: # BB#0: 291; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 292; X32-NEXT: kmovw %eax, %k1 293; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 294; X32-NEXT: retl 295; 296; X64-LABEL: test_mm512_mask_movehdup_ps: 297; X64: # BB#0: 298; X64-NEXT: kmovw %edi, %k1 299; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 300; X64-NEXT: retq 301 %arg1 = bitcast i16 %a1 to <16 x i1> 302 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 303 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0 304 ret <16 x float> %res1 305} 306 307define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) { 308; X32-LABEL: test_mm512_maskz_movehdup_ps: 309; X32: # BB#0: 310; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 311; X32-NEXT: kmovw %eax, %k1 312; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 313; X32-NEXT: retl 314; 315; X64-LABEL: test_mm512_maskz_movehdup_ps: 316; X64: # BB#0: 317; X64-NEXT: kmovw %edi, %k1 318; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 319; X64-NEXT: retq 320 %arg0 = bitcast i16 %a0 to <16 x i1> 321 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 322 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer 323 ret <16 x float> %res1 324} 325 326define <16 x float> @test_mm512_moveldup_ps(<16 x float> %a0) { 327; X32-LABEL: test_mm512_moveldup_ps: 328; X32: # BB#0: 329; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 330; X32-NEXT: retl 331; 332; X64-LABEL: test_mm512_moveldup_ps: 333; X64: # BB#0: 334; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 335; X64-NEXT: retq 336 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 337 ret <16 x float> %res 338} 339 340define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) { 341; X32-LABEL: test_mm512_mask_moveldup_ps: 342; X32: # BB#0: 343; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 344; X32-NEXT: kmovw %eax, %k1 345; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 346; X32-NEXT: retl 347; 348; X64-LABEL: test_mm512_mask_moveldup_ps: 349; X64: # BB#0: 350; X64-NEXT: kmovw %edi, %k1 351; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 352; X64-NEXT: retq 353 %arg1 = bitcast i16 %a1 to <16 x i1> 354 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 355 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0 356 ret <16 x float> %res1 357} 358 359define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) { 360; X32-LABEL: test_mm512_maskz_moveldup_ps: 361; X32: # BB#0: 362; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 363; X32-NEXT: kmovw %eax, %k1 364; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 365; X32-NEXT: retl 366; 367; X64-LABEL: test_mm512_maskz_moveldup_ps: 368; X64: # BB#0: 369; X64-NEXT: kmovw %edi, %k1 370; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 371; X64-NEXT: retq 372 %arg0 = bitcast i16 %a0 to <16 x i1> 373 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 374 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer 375 ret <16 x float> %res1 376} 377 378define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) { 379; X32-LABEL: test_mm512_permute_pd: 380; X32: # BB#0: 381; X32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6] 382; X32-NEXT: retl 383; 384; X64-LABEL: test_mm512_permute_pd: 385; X64: # BB#0: 386; X64-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6] 387; X64-NEXT: retq 388 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 389 ret <8 x double> %res 390} 391 392define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) { 393; X32-LABEL: test_mm512_mask_permute_pd: 394; X32: # BB#0: 395; X32-NEXT: movb {{[0-9]+}}(%esp), %al 396; X32-NEXT: kmovw %eax, %k1 397; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6] 398; X32-NEXT: retl 399; 400; X64-LABEL: test_mm512_mask_permute_pd: 401; X64: # BB#0: 402; X64-NEXT: kmovw %edi, %k1 403; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6] 404; X64-NEXT: retq 405 %arg1 = bitcast i8 %a1 to <8 x i1> 406 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 407 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 408 ret <8 x double> %res1 409} 410 411define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) { 412; X32-LABEL: test_mm512_maskz_permute_pd: 413; X32: # BB#0: 414; X32-NEXT: movb {{[0-9]+}}(%esp), %al 415; X32-NEXT: kmovw %eax, %k1 416; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6] 417; X32-NEXT: retl 418; 419; X64-LABEL: test_mm512_maskz_permute_pd: 420; X64: # BB#0: 421; X64-NEXT: kmovw %edi, %k1 422; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6] 423; X64-NEXT: retq 424 %arg0 = bitcast i8 %a0 to <8 x i1> 425 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 426 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 427 ret <8 x double> %res1 428} 429 430define <16 x float> @test_mm512_permute_ps(<16 x float> %a0) { 431; X32-LABEL: test_mm512_permute_ps: 432; X32: # BB#0: 433; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12] 434; X32-NEXT: retl 435; 436; X64-LABEL: test_mm512_permute_ps: 437; X64: # BB#0: 438; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12] 439; X64-NEXT: retq 440 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12> 441 ret <16 x float> %res 442} 443 444define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) { 445; X32-LABEL: test_mm512_mask_permute_ps: 446; X32: # BB#0: 447; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 448; X32-NEXT: kmovw %eax, %k1 449; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12] 450; X32-NEXT: retl 451; 452; X64-LABEL: test_mm512_mask_permute_ps: 453; X64: # BB#0: 454; X64-NEXT: kmovw %edi, %k1 455; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12] 456; X64-NEXT: retq 457 %arg1 = bitcast i16 %a1 to <16 x i1> 458 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12> 459 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0 460 ret <16 x float> %res1 461} 462 463define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) { 464; X32-LABEL: test_mm512_maskz_permute_ps: 465; X32: # BB#0: 466; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 467; X32-NEXT: kmovw %eax, %k1 468; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12] 469; X32-NEXT: retl 470; 471; X64-LABEL: test_mm512_maskz_permute_ps: 472; X64: # BB#0: 473; X64-NEXT: kmovw %edi, %k1 474; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12] 475; X64-NEXT: retq 476 %arg0 = bitcast i16 %a0 to <16 x i1> 477 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12> 478 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer 479 ret <16 x float> %res1 480} 481 482define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) { 483; X32-LABEL: test_mm512_permutex_epi64: 484; X32: # BB#0: 485; X32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4] 486; X32-NEXT: retl 487; 488; X64-LABEL: test_mm512_permutex_epi64: 489; X64: # BB#0: 490; X64-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4] 491; X64-NEXT: retq 492 %res = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 493 ret <8 x i64> %res 494} 495 496define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) { 497; X32-LABEL: test_mm512_mask_permutex_epi64: 498; X32: # BB#0: 499; X32-NEXT: movb {{[0-9]+}}(%esp), %al 500; X32-NEXT: kmovw %eax, %k1 501; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4] 502; X32-NEXT: retl 503; 504; X64-LABEL: test_mm512_mask_permutex_epi64: 505; X64: # BB#0: 506; X64-NEXT: kmovw %edi, %k1 507; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4] 508; X64-NEXT: retq 509 %arg1 = bitcast i8 %a1 to <8 x i1> 510 %res0 = shufflevector <8 x i64> %a2, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 511 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0 512 ret <8 x i64> %res1 513} 514 515define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) { 516; X32-LABEL: test_mm512_maskz_permutex_epi64: 517; X32: # BB#0: 518; X32-NEXT: movb {{[0-9]+}}(%esp), %al 519; X32-NEXT: kmovw %eax, %k1 520; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] 521; X32-NEXT: retl 522; 523; X64-LABEL: test_mm512_maskz_permutex_epi64: 524; X64: # BB#0: 525; X64-NEXT: kmovw %edi, %k1 526; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] 527; X64-NEXT: retq 528 %arg0 = bitcast i8 %a0 to <8 x i1> 529 %res0 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 530 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer 531 ret <8 x i64> %res1 532} 533 534define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) { 535; X32-LABEL: test_mm512_permutex_pd: 536; X32: # BB#0: 537; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4] 538; X32-NEXT: retl 539; 540; X64-LABEL: test_mm512_permutex_pd: 541; X64: # BB#0: 542; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4] 543; X64-NEXT: retq 544 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 545 ret <8 x double> %res 546} 547 548define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) { 549; X32-LABEL: test_mm512_mask_permutex_pd: 550; X32: # BB#0: 551; X32-NEXT: movb {{[0-9]+}}(%esp), %al 552; X32-NEXT: kmovw %eax, %k1 553; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4] 554; X32-NEXT: retl 555; 556; X64-LABEL: test_mm512_mask_permutex_pd: 557; X64: # BB#0: 558; X64-NEXT: kmovw %edi, %k1 559; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4] 560; X64-NEXT: retq 561 %arg1 = bitcast i8 %a1 to <8 x i1> 562 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 563 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 564 ret <8 x double> %res1 565} 566 567define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) { 568; X32-LABEL: test_mm512_maskz_permutex_pd: 569; X32: # BB#0: 570; X32-NEXT: movb {{[0-9]+}}(%esp), %al 571; X32-NEXT: kmovw %eax, %k1 572; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] 573; X32-NEXT: retl 574; 575; X64-LABEL: test_mm512_maskz_permutex_pd: 576; X64: # BB#0: 577; X64-NEXT: kmovw %edi, %k1 578; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] 579; X64-NEXT: retq 580 %arg0 = bitcast i8 %a0 to <8 x i1> 581 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 582 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 583 ret <8 x double> %res1 584} 585 586define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) { 587; X32-LABEL: test_mm512_shuffle_epi32: 588; X32: # BB#0: 589; X32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 590; X32-NEXT: retl 591; 592; X64-LABEL: test_mm512_shuffle_epi32: 593; X64: # BB#0: 594; X64-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 595; X64-NEXT: retq 596 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 597 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> 598 %res1 = bitcast <16 x i32> %res0 to <8 x i64> 599 ret <8 x i64> %res1 600} 601 602define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) { 603; X32-LABEL: test_mm512_mask_shuffle_epi32: 604; X32: # BB#0: 605; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 606; X32-NEXT: kmovw %eax, %k1 607; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 608; X32-NEXT: retl 609; 610; X64-LABEL: test_mm512_mask_shuffle_epi32: 611; X64: # BB#0: 612; X64-NEXT: kmovw %edi, %k1 613; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 614; X64-NEXT: retq 615 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 616 %arg1 = bitcast i16 %a1 to <16 x i1> 617 %arg2 = bitcast <8 x i64> %a2 to <16 x i32> 618 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> 619 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0 620 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 621 ret <8 x i64> %res2 622} 623 624define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) { 625; X32-LABEL: test_mm512_maskz_shuffle_epi32: 626; X32: # BB#0: 627; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 628; X32-NEXT: kmovw %eax, %k1 629; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 630; X32-NEXT: retl 631; 632; X64-LABEL: test_mm512_maskz_shuffle_epi32: 633; X64: # BB#0: 634; X64-NEXT: kmovw %edi, %k1 635; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 636; X64-NEXT: retq 637 %arg0 = bitcast i16 %a0 to <16 x i1> 638 %arg1 = bitcast <8 x i64> %a1 to <16 x i32> 639 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> 640 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer 641 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 642 ret <8 x i64> %res2 643} 644 645define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) { 646; X32-LABEL: test_mm512_shuffle_pd: 647; X32: # BB#0: 648; X32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 649; X32-NEXT: retl 650; 651; X64-LABEL: test_mm512_shuffle_pd: 652; X64: # BB#0: 653; X64-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 654; X64-NEXT: retq 655 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14> 656 ret <8 x double> %res 657} 658 659define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) { 660; X32-LABEL: test_mm512_mask_shuffle_pd: 661; X32: # BB#0: 662; X32-NEXT: movb {{[0-9]+}}(%esp), %al 663; X32-NEXT: kmovw %eax, %k1 664; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] 665; X32-NEXT: retl 666; 667; X64-LABEL: test_mm512_mask_shuffle_pd: 668; X64: # BB#0: 669; X64-NEXT: kmovw %edi, %k1 670; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] 671; X64-NEXT: retq 672 %arg1 = bitcast i8 %a1 to <8 x i1> 673 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14> 674 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 675 ret <8 x double> %res1 676} 677 678define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 679; X32-LABEL: test_mm512_maskz_shuffle_pd: 680; X32: # BB#0: 681; X32-NEXT: movb {{[0-9]+}}(%esp), %al 682; X32-NEXT: kmovw %eax, %k1 683; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 684; X32-NEXT: retl 685; 686; X64-LABEL: test_mm512_maskz_shuffle_pd: 687; X64: # BB#0: 688; X64-NEXT: kmovw %edi, %k1 689; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 690; X64-NEXT: retq 691 %arg0 = bitcast i8 %a0 to <8 x i1> 692 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14> 693 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 694 ret <8 x double> %res1 695} 696 697define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) { 698; X32-LABEL: test_mm512_unpackhi_epi32: 699; X32: # BB#0: 700; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 701; X32-NEXT: retl 702; 703; X64-LABEL: test_mm512_unpackhi_epi32: 704; X64: # BB#0: 705; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 706; X64-NEXT: retq 707 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 708 %arg1 = bitcast <8 x i64> %a1 to <16 x i32> 709 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 710 %res1 = bitcast <16 x i32> %res0 to <8 x i64> 711 ret <8 x i64> %res1 712} 713 714define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) { 715; X32-LABEL: test_mm512_mask_unpackhi_epi32: 716; X32: # BB#0: 717; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 718; X32-NEXT: kmovw %eax, %k1 719; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15] 720; X32-NEXT: retl 721; 722; X64-LABEL: test_mm512_mask_unpackhi_epi32: 723; X64: # BB#0: 724; X64-NEXT: kmovw %edi, %k1 725; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15] 726; X64-NEXT: retq 727 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 728 %arg1 = bitcast i16 %a1 to <16 x i1> 729 %arg2 = bitcast <8 x i64> %a2 to <16 x i32> 730 %arg3 = bitcast <8 x i64> %a3 to <16 x i32> 731 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 732 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0 733 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 734 ret <8 x i64> %res2 735} 736 737define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) { 738; X32-LABEL: test_mm512_maskz_unpackhi_epi32: 739; X32: # BB#0: 740; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 741; X32-NEXT: kmovw %eax, %k1 742; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 743; X32-NEXT: retl 744; 745; X64-LABEL: test_mm512_maskz_unpackhi_epi32: 746; X64: # BB#0: 747; X64-NEXT: kmovw %edi, %k1 748; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 749; X64-NEXT: retq 750 %arg0 = bitcast i16 %a0 to <16 x i1> 751 %arg1 = bitcast <8 x i64> %a1 to <16 x i32> 752 %arg2 = bitcast <8 x i64> %a2 to <16 x i32> 753 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 754 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer 755 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 756 ret <8 x i64> %res2 757} 758 759define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) { 760; X32-LABEL: test_mm512_unpackhi_epi64: 761; X32: # BB#0: 762; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 763; X32-NEXT: retl 764; 765; X64-LABEL: test_mm512_unpackhi_epi64: 766; X64: # BB#0: 767; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 768; X64-NEXT: retq 769 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 770 ret <8 x i64> %res 771} 772 773define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) { 774; X32-LABEL: test_mm512_mask_unpackhi_epi64: 775; X32: # BB#0: 776; X32-NEXT: movb {{[0-9]+}}(%esp), %al 777; X32-NEXT: kmovw %eax, %k1 778; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] 779; X32-NEXT: retl 780; 781; X64-LABEL: test_mm512_mask_unpackhi_epi64: 782; X64: # BB#0: 783; X64-NEXT: kmovw %edi, %k1 784; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] 785; X64-NEXT: retq 786 %arg1 = bitcast i8 %a1 to <8 x i1> 787 %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 788 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0 789 ret <8 x i64> %res1 790} 791 792define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { 793; X32-LABEL: test_mm512_maskz_unpackhi_epi64: 794; X32: # BB#0: 795; X32-NEXT: movb {{[0-9]+}}(%esp), %al 796; X32-NEXT: kmovw %eax, %k1 797; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 798; X32-NEXT: retl 799; 800; X64-LABEL: test_mm512_maskz_unpackhi_epi64: 801; X64: # BB#0: 802; X64-NEXT: kmovw %edi, %k1 803; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 804; X64-NEXT: retq 805 %arg0 = bitcast i8 %a0 to <8 x i1> 806 %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 807 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer 808 ret <8 x i64> %res1 809} 810 811define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1) { 812; X32-LABEL: test_mm512_unpackhi_pd: 813; X32: # BB#0: 814; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 815; X32-NEXT: retl 816; 817; X64-LABEL: test_mm512_unpackhi_pd: 818; X64: # BB#0: 819; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 820; X64-NEXT: retq 821 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 822 ret <8 x double> %res 823} 824 825define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) { 826; X32-LABEL: test_mm512_mask_unpackhi_pd: 827; X32: # BB#0: 828; X32-NEXT: movb {{[0-9]+}}(%esp), %al 829; X32-NEXT: kmovw %eax, %k1 830; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] 831; X32-NEXT: retl 832; 833; X64-LABEL: test_mm512_mask_unpackhi_pd: 834; X64: # BB#0: 835; X64-NEXT: kmovw %edi, %k1 836; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] 837; X64-NEXT: retq 838 %arg1 = bitcast i8 %a1 to <8 x i1> 839 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 840 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 841 ret <8 x double> %res1 842} 843 844define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 845; X32-LABEL: test_mm512_maskz_unpackhi_pd: 846; X32: # BB#0: 847; X32-NEXT: movb {{[0-9]+}}(%esp), %al 848; X32-NEXT: kmovw %eax, %k1 849; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 850; X32-NEXT: retl 851; 852; X64-LABEL: test_mm512_maskz_unpackhi_pd: 853; X64: # BB#0: 854; X64-NEXT: kmovw %edi, %k1 855; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 856; X64-NEXT: retq 857 %arg0 = bitcast i8 %a0 to <8 x i1> 858 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 859 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 860 ret <8 x double> %res1 861} 862 863define <16 x float> @test_mm512_unpackhi_ps(<16 x float> %a0, <16 x float> %a1) { 864; X32-LABEL: test_mm512_unpackhi_ps: 865; X32: # BB#0: 866; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 867; X32-NEXT: retl 868; 869; X64-LABEL: test_mm512_unpackhi_ps: 870; X64: # BB#0: 871; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 872; X64-NEXT: retq 873 %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 874 ret <16 x float> %res 875} 876 877define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) { 878; X32-LABEL: test_mm512_mask_unpackhi_ps: 879; X32: # BB#0: 880; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 881; X32-NEXT: kmovw %eax, %k1 882; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15] 883; X32-NEXT: retl 884; 885; X64-LABEL: test_mm512_mask_unpackhi_ps: 886; X64: # BB#0: 887; X64-NEXT: kmovw %edi, %k1 888; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15] 889; X64-NEXT: retq 890 %arg1 = bitcast i16 %a1 to <16 x i1> 891 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 892 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0 893 ret <16 x float> %res1 894} 895 896define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) { 897; X32-LABEL: test_mm512_maskz_unpackhi_ps: 898; X32: # BB#0: 899; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 900; X32-NEXT: kmovw %eax, %k1 901; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 902; X32-NEXT: retl 903; 904; X64-LABEL: test_mm512_maskz_unpackhi_ps: 905; X64: # BB#0: 906; X64-NEXT: kmovw %edi, %k1 907; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 908; X64-NEXT: retq 909 %arg0 = bitcast i16 %a0 to <16 x i1> 910 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 911 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer 912 ret <16 x float> %res1 913} 914 915define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) { 916; X32-LABEL: test_mm512_unpacklo_epi32: 917; X32: # BB#0: 918; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 919; X32-NEXT: retl 920; 921; X64-LABEL: test_mm512_unpacklo_epi32: 922; X64: # BB#0: 923; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 924; X64-NEXT: retq 925 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 926 %arg1 = bitcast <8 x i64> %a1 to <16 x i32> 927 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 928 %res1 = bitcast <16 x i32> %res0 to <8 x i64> 929 ret <8 x i64> %res1 930} 931 932define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) { 933; X32-LABEL: test_mm512_mask_unpacklo_epi32: 934; X32: # BB#0: 935; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 936; X32-NEXT: kmovw %eax, %k1 937; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13] 938; X32-NEXT: retl 939; 940; X64-LABEL: test_mm512_mask_unpacklo_epi32: 941; X64: # BB#0: 942; X64-NEXT: kmovw %edi, %k1 943; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13] 944; X64-NEXT: retq 945 %arg0 = bitcast <8 x i64> %a0 to <16 x i32> 946 %arg1 = bitcast i16 %a1 to <16 x i1> 947 %arg2 = bitcast <8 x i64> %a2 to <16 x i32> 948 %arg3 = bitcast <8 x i64> %a3 to <16 x i32> 949 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 950 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0 951 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 952 ret <8 x i64> %res2 953} 954 955define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) { 956; X32-LABEL: test_mm512_maskz_unpacklo_epi32: 957; X32: # BB#0: 958; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 959; X32-NEXT: kmovw %eax, %k1 960; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 961; X32-NEXT: retl 962; 963; X64-LABEL: test_mm512_maskz_unpacklo_epi32: 964; X64: # BB#0: 965; X64-NEXT: kmovw %edi, %k1 966; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 967; X64-NEXT: retq 968 %arg0 = bitcast i16 %a0 to <16 x i1> 969 %arg1 = bitcast <8 x i64> %a1 to <16 x i32> 970 %arg2 = bitcast <8 x i64> %a2 to <16 x i32> 971 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 972 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer 973 %res2 = bitcast <16 x i32> %res1 to <8 x i64> 974 ret <8 x i64> %res2 975} 976 977define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) { 978; X32-LABEL: test_mm512_unpacklo_epi64: 979; X32: # BB#0: 980; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 981; X32-NEXT: retl 982; 983; X64-LABEL: test_mm512_unpacklo_epi64: 984; X64: # BB#0: 985; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 986; X64-NEXT: retq 987 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 988 ret <8 x i64> %res 989} 990 991define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) { 992; X32-LABEL: test_mm512_mask_unpacklo_epi64: 993; X32: # BB#0: 994; X32-NEXT: movb {{[0-9]+}}(%esp), %al 995; X32-NEXT: kmovw %eax, %k1 996; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] 997; X32-NEXT: retl 998; 999; X64-LABEL: test_mm512_mask_unpacklo_epi64: 1000; X64: # BB#0: 1001; X64-NEXT: kmovw %edi, %k1 1002; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] 1003; X64-NEXT: retq 1004 %arg1 = bitcast i8 %a1 to <8 x i1> 1005 %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1006 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0 1007 ret <8 x i64> %res1 1008} 1009 1010define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { 1011; X32-LABEL: test_mm512_maskz_unpacklo_epi64: 1012; X32: # BB#0: 1013; X32-NEXT: movb {{[0-9]+}}(%esp), %al 1014; X32-NEXT: kmovw %eax, %k1 1015; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1016; X32-NEXT: retl 1017; 1018; X64-LABEL: test_mm512_maskz_unpacklo_epi64: 1019; X64: # BB#0: 1020; X64-NEXT: kmovw %edi, %k1 1021; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1022; X64-NEXT: retq 1023 %arg0 = bitcast i8 %a0 to <8 x i1> 1024 %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1025 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer 1026 ret <8 x i64> %res1 1027} 1028 1029define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1) { 1030; X32-LABEL: test_mm512_unpacklo_pd: 1031; X32: # BB#0: 1032; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1033; X32-NEXT: retl 1034; 1035; X64-LABEL: test_mm512_unpacklo_pd: 1036; X64: # BB#0: 1037; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1038; X64-NEXT: retq 1039 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1040 ret <8 x double> %res 1041} 1042 1043define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) { 1044; X32-LABEL: test_mm512_mask_unpacklo_pd: 1045; X32: # BB#0: 1046; X32-NEXT: movb {{[0-9]+}}(%esp), %al 1047; X32-NEXT: kmovw %eax, %k1 1048; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] 1049; X32-NEXT: retl 1050; 1051; X64-LABEL: test_mm512_mask_unpacklo_pd: 1052; X64: # BB#0: 1053; X64-NEXT: kmovw %edi, %k1 1054; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] 1055; X64-NEXT: retq 1056 %arg1 = bitcast i8 %a1 to <8 x i1> 1057 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1058 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0 1059 ret <8 x double> %res1 1060} 1061 1062define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 1063; X32-LABEL: test_mm512_maskz_unpacklo_pd: 1064; X32: # BB#0: 1065; X32-NEXT: movb {{[0-9]+}}(%esp), %al 1066; X32-NEXT: kmovw %eax, %k1 1067; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1068; X32-NEXT: retl 1069; 1070; X64-LABEL: test_mm512_maskz_unpacklo_pd: 1071; X64: # BB#0: 1072; X64-NEXT: kmovw %edi, %k1 1073; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1074; X64-NEXT: retq 1075 %arg0 = bitcast i8 %a0 to <8 x i1> 1076 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1077 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer 1078 ret <8 x double> %res1 1079} 1080 1081define <16 x float> @test_mm512_unpacklo_ps(<16 x float> %a0, <16 x float> %a1) { 1082; X32-LABEL: test_mm512_unpacklo_ps: 1083; X32: # BB#0: 1084; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1085; X32-NEXT: retl 1086; 1087; X64-LABEL: test_mm512_unpacklo_ps: 1088; X64: # BB#0: 1089; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1090; X64-NEXT: retq 1091 %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 1092 ret <16 x float> %res 1093} 1094 1095define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) { 1096; X32-LABEL: test_mm512_mask_unpacklo_ps: 1097; X32: # BB#0: 1098; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 1099; X32-NEXT: kmovw %eax, %k1 1100; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13] 1101; X32-NEXT: retl 1102; 1103; X64-LABEL: test_mm512_mask_unpacklo_ps: 1104; X64: # BB#0: 1105; X64-NEXT: kmovw %edi, %k1 1106; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13] 1107; X64-NEXT: retq 1108 %arg1 = bitcast i16 %a1 to <16 x i1> 1109 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 1110 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0 1111 ret <16 x float> %res1 1112} 1113 1114define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) { 1115; X32-LABEL: test_mm512_maskz_unpacklo_ps: 1116; X32: # BB#0: 1117; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 1118; X32-NEXT: kmovw %eax, %k1 1119; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1120; X32-NEXT: retl 1121; 1122; X64-LABEL: test_mm512_maskz_unpacklo_ps: 1123; X64: # BB#0: 1124; X64-NEXT: kmovw %edi, %k1 1125; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1126; X64-NEXT: retq 1127 %arg0 = bitcast i16 %a0 to <16 x i1> 1128 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 1129 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer 1130 ret <16 x float> %res1 1131} 1132 1133!0 = !{i32 1} 1134 1135