1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 6 7define <2 x i64>@test_int_x86_avx512_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 8; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_128: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xc2] 11; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12 13 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 14 ret <2 x i64> %1 15} 16 17define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 18; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: 19; X86: # %bb.0: 20; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 21; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 22; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2] 23; X86-NEXT: retl # encoding: [0xc3] 24; 25; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: 26; X64: # %bb.0: 27; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 28; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2] 29; X64-NEXT: retq # encoding: [0xc3] 30 31 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 32 %2 = bitcast i8 %x3 to <8 x i1> 33 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 34 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0 35 ret <2 x i64> %3 36} 37 38declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 39 40define <4 x i64>@test_int_x86_avx512_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 41; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_256: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xc2] 44; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 45 46 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 47 ret <4 x i64> %1 48} 49 50define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 51; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: 52; X86: # %bb.0: 53; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 54; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 55; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2] 56; X86-NEXT: retl # encoding: [0xc3] 57; 58; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: 59; X64: # %bb.0: 60; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 61; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2] 62; X64-NEXT: retq # encoding: [0xc3] 63 64 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 65 %2 = bitcast i8 %x3 to <8 x i1> 66 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 67 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0 68 ret <4 x i64> %3 69} 70 71define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 72; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: 73; X86: # %bb.0: 74; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 75; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 76; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2] 77; X86-NEXT: retl # encoding: [0xc3] 78; 79; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: 80; X64: # %bb.0: 81; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 82; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2] 83; X64-NEXT: retq # encoding: [0xc3] 84 85 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 86 %2 = bitcast i8 %x3 to <8 x i1> 87 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 88 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer 89 ret <2 x i64> %3 90} 91 92define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 93; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: 94; X86: # %bb.0: 95; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 96; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 97; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2] 98; X86-NEXT: retl # encoding: [0xc3] 99; 100; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: 101; X64: # %bb.0: 102; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 103; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2] 104; X64-NEXT: retq # encoding: [0xc3] 105 106 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 107 %2 = bitcast i8 %x3 to <8 x i1> 108 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 109 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer 110 ret <4 x i64> %3 111} 112 113declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 114 115define <2 x i64>@test_int_x86_avx512_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 116; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_128: 117; CHECK: # %bb.0: 118; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xc2] 119; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 120 121 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 122 ret <2 x i64> %1 123} 124 125define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 126; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: 127; X86: # %bb.0: 128; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 129; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 130; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2] 131; X86-NEXT: retl # encoding: [0xc3] 132; 133; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: 134; X64: # %bb.0: 135; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 136; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2] 137; X64-NEXT: retq # encoding: [0xc3] 138 139 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 140 %2 = bitcast i8 %x3 to <8 x i1> 141 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 142 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0 143 ret <2 x i64> %3 144} 145 146declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 147 148define <4 x i64>@test_int_x86_avx512_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 149; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_256: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xc2] 152; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 153 154 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 155 ret <4 x i64> %1 156} 157 158define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 159; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: 160; X86: # %bb.0: 161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 162; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 163; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2] 164; X86-NEXT: retl # encoding: [0xc3] 165; 166; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: 167; X64: # %bb.0: 168; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 169; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2] 170; X64-NEXT: retq # encoding: [0xc3] 171 172 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 173 %2 = bitcast i8 %x3 to <8 x i1> 174 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 175 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0 176 ret <4 x i64> %3 177} 178 179define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 180; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: 181; X86: # %bb.0: 182; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 183; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 184; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2] 185; X86-NEXT: retl # encoding: [0xc3] 186; 187; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: 188; X64: # %bb.0: 189; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 190; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2] 191; X64-NEXT: retq # encoding: [0xc3] 192 193 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 194 %2 = bitcast i8 %x3 to <8 x i1> 195 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 196 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer 197 ret <2 x i64> %3 198} 199 200define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 201; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: 202; X86: # %bb.0: 203; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 204; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 205; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2] 206; X86-NEXT: retl # encoding: [0xc3] 207; 208; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: 209; X64: # %bb.0: 210; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 211; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2] 212; X64-NEXT: retq # encoding: [0xc3] 213 214 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 215 %2 = bitcast i8 %x3 to <8 x i1> 216 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 217 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer 218 ret <4 x i64> %3 219} 220