1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>) 6 7define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 8; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2] 11; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12 13 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 14 ret <8 x i64> %1 15} 16 17define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 18; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512: 19; X86: # %bb.0: 20; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 21; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 22; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 23; X86-NEXT: retl # encoding: [0xc3] 24; 25; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512: 26; X64: # %bb.0: 27; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 28; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 29; X64-NEXT: retq # encoding: [0xc3] 30 31 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 32 %2 = bitcast i8 %x3 to <8 x i1> 33 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 34 ret <8 x i64> %3 35} 36 37define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 38; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512: 39; X86: # %bb.0: 40; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 41; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 42; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 43; X86-NEXT: retl # encoding: [0xc3] 44; 45; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512: 46; X64: # %bb.0: 47; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 48; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 49; X64-NEXT: retq # encoding: [0xc3] 50 51 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 52 %2 = bitcast i8 %x3 to <8 x i1> 53 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 54 ret <8 x i64> %3 55} 56 57declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>) 58 59define <8 x i64>@test_int_x86_avx512_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 60; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_512: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xc2] 63; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 65 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 66 ret <8 x i64> %1 67} 68 69define <8 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 70; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512: 71; X86: # %bb.0: 72; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 73; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 74; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2] 75; X86-NEXT: retl # encoding: [0xc3] 76; 77; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512: 78; X64: # %bb.0: 79; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 80; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2] 81; X64-NEXT: retq # encoding: [0xc3] 82 83 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 84 %2 = bitcast i8 %x3 to <8 x i1> 85 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 86 ret <8 x i64> %3 87} 88 89define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 90; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512: 91; X86: # %bb.0: 92; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 93; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 94; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2] 95; X86-NEXT: retl # encoding: [0xc3] 96; 97; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512: 98; X64: # %bb.0: 99; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 100; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2] 101; X64-NEXT: retq # encoding: [0xc3] 102 103 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 104 %2 = bitcast i8 %x3 to <8 x i1> 105 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 106 ret <8 x i64> %3 107} 108 109define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) { 110; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: 111; X86: # %bb.0: 112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 113; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00] 114; X86-NEXT: retl # encoding: [0xc3] 115; 116; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: 117; X64: # %bb.0: 118; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07] 119; X64-NEXT: retq # encoding: [0xc3] 120 121 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 122 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 123 ret <8 x i64> %1 124} 125 126define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) { 127; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast: 128; X86: # %bb.0: 129; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 130; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00] 131; X86-NEXT: retl # encoding: [0xc3] 132; 133; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast: 134; X64: # %bb.0: 135; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07] 136; X64-NEXT: retq # encoding: [0xc3] 137 138 %x2load = load i64, i64* %x2ptr 139 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 140 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 141 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 142 ret <8 x i64> %1 143} 144 145define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) { 146; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: 147; X86: # %bb.0: 148; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 149; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00] 150; X86-NEXT: retl # encoding: [0xc3] 151; 152; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: 153; X64: # %bb.0: 154; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07] 155; X64-NEXT: retq # encoding: [0xc3] 156 157 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 158 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 159 ret <8 x i64> %1 160} 161 162define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) { 163; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast: 164; X86: # %bb.0: 165; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 166; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00] 167; X86-NEXT: retl # encoding: [0xc3] 168; 169; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast: 170; X64: # %bb.0: 171; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07] 172; X64-NEXT: retq # encoding: [0xc3] 173 174 %x1load = load i64, i64* %x1ptr 175 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 176 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 177 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 178 ret <8 x i64> %1 179} 180 181define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { 182; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: 183; X86: # %bb.0: 184; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 185; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 186; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 187; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00] 188; X86-NEXT: retl # encoding: [0xc3] 189; 190; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: 191; X64: # %bb.0: 192; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 193; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07] 194; X64-NEXT: retq # encoding: [0xc3] 195 196 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 197 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 198 %2 = bitcast i8 %x3 to <8 x i1> 199 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 200 ret <8 x i64> %3 201} 202 203define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) { 204; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 207; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 208; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 209; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00] 210; X86-NEXT: retl # encoding: [0xc3] 211; 212; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast: 213; X64: # %bb.0: 214; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 215; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07] 216; X64-NEXT: retq # encoding: [0xc3] 217 218 %x2load = load i64, i64* %x2ptr 219 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 220 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 221 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 222 %2 = bitcast i8 %x3 to <8 x i1> 223 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 224 ret <8 x i64> %3 225} 226 227define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { 228; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: 229; X86: # %bb.0: 230; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 231; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 232; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 233; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00] 234; X86-NEXT: retl # encoding: [0xc3] 235; 236; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: 237; X64: # %bb.0: 238; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 239; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07] 240; X64-NEXT: retq # encoding: [0xc3] 241 242 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 243 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 244 %2 = bitcast i8 %x3 to <8 x i1> 245 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 246 ret <8 x i64> %3 247} 248 249define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) { 250; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast: 251; X86: # %bb.0: 252; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 253; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 254; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 255; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00] 256; X86-NEXT: retl # encoding: [0xc3] 257; 258; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast: 259; X64: # %bb.0: 260; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 261; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07] 262; X64-NEXT: retq # encoding: [0xc3] 263 264 %x1load = load i64, i64* %x1ptr 265 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 266 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 267 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 268 %2 = bitcast i8 %x3 to <8 x i1> 269 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 270 ret <8 x i64> %3 271} 272 273define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { 274; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: 275; X86: # %bb.0: 276; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 277; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 278; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 279; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00] 280; X86-NEXT: retl # encoding: [0xc3] 281; 282; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: 283; X64: # %bb.0: 284; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 285; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07] 286; X64-NEXT: retq # encoding: [0xc3] 287 288 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 289 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 290 %2 = bitcast i8 %x3 to <8 x i1> 291 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 292 ret <8 x i64> %3 293} 294 295define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) { 296; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast: 297; X86: # %bb.0: 298; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 299; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 300; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 301; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00] 302; X86-NEXT: retl # encoding: [0xc3] 303; 304; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast: 305; X64: # %bb.0: 306; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 307; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07] 308; X64-NEXT: retq # encoding: [0xc3] 309 310 %x2load = load i64, i64* %x2ptr 311 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 312 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 313 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 314 %2 = bitcast i8 %x3 to <8 x i1> 315 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 316 ret <8 x i64> %3 317} 318 319define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { 320; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: 321; X86: # %bb.0: 322; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 323; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 324; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 325; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00] 326; X86-NEXT: retl # encoding: [0xc3] 327; 328; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: 329; X64: # %bb.0: 330; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 331; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07] 332; X64-NEXT: retq # encoding: [0xc3] 333 334 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 335 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 336 %2 = bitcast i8 %x3 to <8 x i1> 337 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 338 ret <8 x i64> %3 339} 340 341define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) { 342; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast: 343; X86: # %bb.0: 344; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 345; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 346; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 347; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00] 348; X86-NEXT: retl # encoding: [0xc3] 349; 350; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast: 351; X64: # %bb.0: 352; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 353; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07] 354; X64-NEXT: retq # encoding: [0xc3] 355 356 %x1load = load i64, i64* %x1ptr 357 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 358 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 359 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 360 %2 = bitcast i8 %x3 to <8 x i1> 361 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 362 ret <8 x i64> %3 363} 364