1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 6 7define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 8; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2] 11; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12 13 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 14 ret <8 x i64> %res 15} 16 17define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 18; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512: 19; X86: # %bb.0: 20; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 21; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 22; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 23; X86-NEXT: retl # encoding: [0xc3] 24; 25; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512: 26; X64: # %bb.0: 27; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 28; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 29; X64-NEXT: retq # encoding: [0xc3] 30 31 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 32 ret <8 x i64> %res 33} 34 35declare <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 36 37define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 38; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512: 39; X86: # %bb.0: 40; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 41; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 42; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 43; X86-NEXT: retl # encoding: [0xc3] 44; 45; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512: 46; X64: # %bb.0: 47; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 48; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 49; X64-NEXT: retq # encoding: [0xc3] 50 51 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 52 ret <8 x i64> %res 53} 54 55declare <8 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 56 57define <8 x i64>@test_int_x86_avx512_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 58; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_512: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xc2] 61; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62 63 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 64 ret <8 x i64> %res 65} 66 67define <8 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 68; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512: 69; X86: # %bb.0: 70; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 71; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 72; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2] 73; X86-NEXT: retl # encoding: [0xc3] 74; 75; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512: 76; X64: # %bb.0: 77; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 78; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2] 79; X64-NEXT: retq # encoding: [0xc3] 80 81 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 82 ret <8 x i64> %res 83} 84 85declare <8 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 86 87define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 88; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512: 89; X86: # %bb.0: 90; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 91; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 92; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2] 93; X86-NEXT: retl # encoding: [0xc3] 94; 95; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512: 96; X64: # %bb.0: 97; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 98; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2] 99; X64-NEXT: retq # encoding: [0xc3] 100 101 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 102 ret <8 x i64> %res 103} 104 105define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) { 106; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: 107; X86: # %bb.0: 108; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 109; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00] 110; X86-NEXT: retl # encoding: [0xc3] 111; 112; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: 113; X64: # %bb.0: 114; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07] 115; X64-NEXT: retq # encoding: [0xc3] 116 117 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 118 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 119 ret <8 x i64> %res 120} 121 122define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) { 123; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast: 124; X86: # %bb.0: 125; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 126; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00] 127; X86-NEXT: retl # encoding: [0xc3] 128; 129; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast: 130; X64: # %bb.0: 131; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07] 132; X64-NEXT: retq # encoding: [0xc3] 133 134 %x2load = load i64, i64* %x2ptr 135 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 136 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 137 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 138 ret <8 x i64> %res 139} 140 141define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) { 142; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: 143; X86: # %bb.0: 144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 145; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00] 146; X86-NEXT: retl # encoding: [0xc3] 147; 148; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: 149; X64: # %bb.0: 150; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07] 151; X64-NEXT: retq # encoding: [0xc3] 152 153 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 154 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 155 ret <8 x i64> %res 156} 157 158define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) { 159; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast: 160; X86: # %bb.0: 161; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 162; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00] 163; X86-NEXT: retl # encoding: [0xc3] 164; 165; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast: 166; X64: # %bb.0: 167; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07] 168; X64-NEXT: retq # encoding: [0xc3] 169 170 %x1load = load i64, i64* %x1ptr 171 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 172 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 173 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 174 ret <8 x i64> %res 175} 176 177define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { 178; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: 179; X86: # %bb.0: 180; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 181; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 182; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 183; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00] 184; X86-NEXT: retl # encoding: [0xc3] 185; 186; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: 187; X64: # %bb.0: 188; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 189; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07] 190; X64-NEXT: retq # encoding: [0xc3] 191 192 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 193 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 194 ret <8 x i64> %res 195} 196 197define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) { 198; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast: 199; X86: # %bb.0: 200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 201; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 202; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 203; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00] 204; X86-NEXT: retl # encoding: [0xc3] 205; 206; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast: 207; X64: # %bb.0: 208; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 209; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07] 210; X64-NEXT: retq # encoding: [0xc3] 211 212 %x2load = load i64, i64* %x2ptr 213 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 214 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 215 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 216 ret <8 x i64> %res 217} 218 219define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { 220; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: 221; X86: # %bb.0: 222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 223; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 224; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 225; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00] 226; X86-NEXT: retl # encoding: [0xc3] 227; 228; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: 229; X64: # %bb.0: 230; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 231; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07] 232; X64-NEXT: retq # encoding: [0xc3] 233 234 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 235 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 236 ret <8 x i64> %res 237} 238 239define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) { 240; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast: 241; X86: # %bb.0: 242; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 243; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 244; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 245; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00] 246; X86-NEXT: retl # encoding: [0xc3] 247; 248; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast: 249; X64: # %bb.0: 250; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 251; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07] 252; X64-NEXT: retq # encoding: [0xc3] 253 254 %x1load = load i64, i64* %x1ptr 255 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 256 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 257 %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 258 ret <8 x i64> %res 259} 260 261define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { 262; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: 263; X86: # %bb.0: 264; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 265; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 266; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 267; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00] 268; X86-NEXT: retl # encoding: [0xc3] 269; 270; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: 271; X64: # %bb.0: 272; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 273; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07] 274; X64-NEXT: retq # encoding: [0xc3] 275 276 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 277 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 278 ret <8 x i64> %res 279} 280 281define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) { 282; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast: 283; X86: # %bb.0: 284; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 285; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 286; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 287; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00] 288; X86-NEXT: retl # encoding: [0xc3] 289; 290; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast: 291; X64: # %bb.0: 292; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 293; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07] 294; X64-NEXT: retq # encoding: [0xc3] 295 296 %x2load = load i64, i64* %x2ptr 297 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 298 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 299 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 300 ret <8 x i64> %res 301} 302 303define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { 304; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: 305; X86: # %bb.0: 306; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 307; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 308; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 309; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00] 310; X86-NEXT: retl # encoding: [0xc3] 311; 312; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: 313; X64: # %bb.0: 314; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 315; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07] 316; X64-NEXT: retq # encoding: [0xc3] 317 318 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 319 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 320 ret <8 x i64> %res 321} 322 323define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) { 324; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast: 325; X86: # %bb.0: 326; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 327; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 328; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 329; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00] 330; X86-NEXT: retl # encoding: [0xc3] 331; 332; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast: 333; X64: # %bb.0: 334; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 335; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07] 336; X64-NEXT: retq # encoding: [0xc3] 337 338 %x1load = load i64, i64* %x1ptr 339 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 340 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 341 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 342 ret <8 x i64> %res 343} 344