1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512ifmavl-builtins.c 6 7define <2 x i64> @test_mm_madd52hi_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 8; CHECK-LABEL: test_mm_madd52hi_epu64: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 11; CHECK-NEXT: ret{{[l|q]}} 12entry: 13 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 14 ret <2 x i64> %0 15} 16 17define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { 18; X86-LABEL: test_mm_mask_madd52hi_epu64: 19; X86: # %bb.0: # %entry 20; X86-NEXT: movb {{[0-9]+}}(%esp), %al 21; X86-NEXT: kmovw %eax, %k1 22; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} 23; X86-NEXT: retl 24; 25; X64-LABEL: test_mm_mask_madd52hi_epu64: 26; X64: # %bb.0: # %entry 27; X64-NEXT: kmovw %edi, %k1 28; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} 29; X64-NEXT: retq 30entry: 31 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y) 32 %1 = bitcast i8 %__M to <8 x i1> 33 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 34 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W 35 ret <2 x i64> %2 36} 37 38define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 39; X86-LABEL: test_mm_maskz_madd52hi_epu64: 40; X86: # %bb.0: # %entry 41; X86-NEXT: movb {{[0-9]+}}(%esp), %al 42; X86-NEXT: kmovw %eax, %k1 43; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} 44; X86-NEXT: retl 45; 46; X64-LABEL: test_mm_maskz_madd52hi_epu64: 47; X64: # %bb.0: # %entry 48; X64-NEXT: kmovw %edi, %k1 49; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} 50; X64-NEXT: retq 51entry: 52 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 53 %1 = bitcast i8 %__M to <8 x i1> 54 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 55 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 56 ret <2 x i64> %2 57} 58 59define <4 x i64> @test_mm256_madd52hi_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 60; CHECK-LABEL: test_mm256_madd52hi_epu64: 61; CHECK: # %bb.0: # %entry 62; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 63; CHECK-NEXT: ret{{[l|q]}} 64entry: 65 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 66 ret <4 x i64> %0 67} 68 69define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) { 70; X86-LABEL: test_mm256_mask_madd52hi_epu64: 71; X86: # %bb.0: # %entry 72; X86-NEXT: movb {{[0-9]+}}(%esp), %al 73; X86-NEXT: kmovw %eax, %k1 74; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} 75; X86-NEXT: retl 76; 77; X64-LABEL: test_mm256_mask_madd52hi_epu64: 78; X64: # %bb.0: # %entry 79; X64-NEXT: kmovw %edi, %k1 80; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} 81; X64-NEXT: retq 82entry: 83 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y) 84 %1 = bitcast i8 %__M to <8 x i1> 85 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 86 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W 87 ret <4 x i64> %2 88} 89 90define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 91; X86-LABEL: test_mm256_maskz_madd52hi_epu64: 92; X86: # %bb.0: # %entry 93; X86-NEXT: movb {{[0-9]+}}(%esp), %al 94; X86-NEXT: kmovw %eax, %k1 95; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} 96; X86-NEXT: retl 97; 98; X64-LABEL: test_mm256_maskz_madd52hi_epu64: 99; X64: # %bb.0: # %entry 100; X64-NEXT: kmovw %edi, %k1 101; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} 102; X64-NEXT: retq 103entry: 104 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 105 %1 = bitcast i8 %__M to <8 x i1> 106 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 107 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 108 ret <4 x i64> %2 109} 110 111define <2 x i64> @test_mm_madd52lo_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 112; CHECK-LABEL: test_mm_madd52lo_epu64: 113; CHECK: # %bb.0: # %entry 114; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 115; CHECK-NEXT: ret{{[l|q]}} 116entry: 117 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 118 ret <2 x i64> %0 119} 120 121define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { 122; X86-LABEL: test_mm_mask_madd52lo_epu64: 123; X86: # %bb.0: # %entry 124; X86-NEXT: movb {{[0-9]+}}(%esp), %al 125; X86-NEXT: kmovw %eax, %k1 126; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} 127; X86-NEXT: retl 128; 129; X64-LABEL: test_mm_mask_madd52lo_epu64: 130; X64: # %bb.0: # %entry 131; X64-NEXT: kmovw %edi, %k1 132; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} 133; X64-NEXT: retq 134entry: 135 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y) 136 %1 = bitcast i8 %__M to <8 x i1> 137 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 138 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W 139 ret <2 x i64> %2 140} 141 142define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 143; X86-LABEL: test_mm_maskz_madd52lo_epu64: 144; X86: # %bb.0: # %entry 145; X86-NEXT: movb {{[0-9]+}}(%esp), %al 146; X86-NEXT: kmovw %eax, %k1 147; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} 148; X86-NEXT: retl 149; 150; X64-LABEL: test_mm_maskz_madd52lo_epu64: 151; X64: # %bb.0: # %entry 152; X64-NEXT: kmovw %edi, %k1 153; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} 154; X64-NEXT: retq 155entry: 156 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 157 %1 = bitcast i8 %__M to <8 x i1> 158 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 159 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 160 ret <2 x i64> %2 161} 162 163define <4 x i64> @test_mm256_madd52lo_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 164; CHECK-LABEL: test_mm256_madd52lo_epu64: 165; CHECK: # %bb.0: # %entry 166; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 167; CHECK-NEXT: ret{{[l|q]}} 168entry: 169 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 170 ret <4 x i64> %0 171} 172 173define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) { 174; X86-LABEL: test_mm256_mask_madd52lo_epu64: 175; X86: # %bb.0: # %entry 176; X86-NEXT: movb {{[0-9]+}}(%esp), %al 177; X86-NEXT: kmovw %eax, %k1 178; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} 179; X86-NEXT: retl 180; 181; X64-LABEL: test_mm256_mask_madd52lo_epu64: 182; X64: # %bb.0: # %entry 183; X64-NEXT: kmovw %edi, %k1 184; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} 185; X64-NEXT: retq 186entry: 187 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y) 188 %1 = bitcast i8 %__M to <8 x i1> 189 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 190 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W 191 ret <4 x i64> %2 192} 193 194define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 195; X86-LABEL: test_mm256_maskz_madd52lo_epu64: 196; X86: # %bb.0: # %entry 197; X86-NEXT: movb {{[0-9]+}}(%esp), %al 198; X86-NEXT: kmovw %eax, %k1 199; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} 200; X86-NEXT: retl 201; 202; X64-LABEL: test_mm256_maskz_madd52lo_epu64: 203; X64: # %bb.0: # %entry 204; X64-NEXT: kmovw %edi, %k1 205; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} 206; X64-NEXT: retq 207entry: 208 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 209 %1 = bitcast i8 %__M to <8 x i1> 210 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 211 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 212 ret <4 x i64> %2 213} 214 215declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 216declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 217declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 218declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 219