• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512ifmavl-builtins.c
6
7define <2 x i64> @test_mm_madd52hi_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
8; CHECK-LABEL: test_mm_madd52hi_epu64:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0
11; CHECK-NEXT:    ret{{[l|q]}}
12entry:
13  %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
14  ret <2 x i64> %0
15}
16
17define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
18; X86-LABEL: test_mm_mask_madd52hi_epu64:
19; X86:       # %bb.0: # %entry
20; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
21; X86-NEXT:    kmovw %eax, %k1
22; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
23; X86-NEXT:    retl
24;
25; X64-LABEL: test_mm_mask_madd52hi_epu64:
26; X64:       # %bb.0: # %entry
27; X64-NEXT:    kmovw %edi, %k1
28; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
29; X64-NEXT:    retq
30entry:
31  %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y)
32  %1 = bitcast i8 %__M to <8 x i1>
33  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
34  %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W
35  ret <2 x i64> %2
36}
37
38define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
39; X86-LABEL: test_mm_maskz_madd52hi_epu64:
40; X86:       # %bb.0: # %entry
41; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
42; X86-NEXT:    kmovw %eax, %k1
43; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
44; X86-NEXT:    retl
45;
46; X64-LABEL: test_mm_maskz_madd52hi_epu64:
47; X64:       # %bb.0: # %entry
48; X64-NEXT:    kmovw %edi, %k1
49; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
50; X64-NEXT:    retq
51entry:
52  %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
53  %1 = bitcast i8 %__M to <8 x i1>
54  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
55  %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
56  ret <2 x i64> %2
57}
58
59define <4 x i64> @test_mm256_madd52hi_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
60; CHECK-LABEL: test_mm256_madd52hi_epu64:
61; CHECK:       # %bb.0: # %entry
62; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0
63; CHECK-NEXT:    ret{{[l|q]}}
64entry:
65  %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
66  ret <4 x i64> %0
67}
68
69define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
70; X86-LABEL: test_mm256_mask_madd52hi_epu64:
71; X86:       # %bb.0: # %entry
72; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
73; X86-NEXT:    kmovw %eax, %k1
74; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
75; X86-NEXT:    retl
76;
77; X64-LABEL: test_mm256_mask_madd52hi_epu64:
78; X64:       # %bb.0: # %entry
79; X64-NEXT:    kmovw %edi, %k1
80; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
81; X64-NEXT:    retq
82entry:
83  %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y)
84  %1 = bitcast i8 %__M to <8 x i1>
85  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
86  %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W
87  ret <4 x i64> %2
88}
89
90define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
91; X86-LABEL: test_mm256_maskz_madd52hi_epu64:
92; X86:       # %bb.0: # %entry
93; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
94; X86-NEXT:    kmovw %eax, %k1
95; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
96; X86-NEXT:    retl
97;
98; X64-LABEL: test_mm256_maskz_madd52hi_epu64:
99; X64:       # %bb.0: # %entry
100; X64-NEXT:    kmovw %edi, %k1
101; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
102; X64-NEXT:    retq
103entry:
104  %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
105  %1 = bitcast i8 %__M to <8 x i1>
106  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
107  %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
108  ret <4 x i64> %2
109}
110
111define <2 x i64> @test_mm_madd52lo_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
112; CHECK-LABEL: test_mm_madd52lo_epu64:
113; CHECK:       # %bb.0: # %entry
114; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
115; CHECK-NEXT:    ret{{[l|q]}}
116entry:
117  %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
118  ret <2 x i64> %0
119}
120
121define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
122; X86-LABEL: test_mm_mask_madd52lo_epu64:
123; X86:       # %bb.0: # %entry
124; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
125; X86-NEXT:    kmovw %eax, %k1
126; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
127; X86-NEXT:    retl
128;
129; X64-LABEL: test_mm_mask_madd52lo_epu64:
130; X64:       # %bb.0: # %entry
131; X64-NEXT:    kmovw %edi, %k1
132; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
133; X64-NEXT:    retq
134entry:
135  %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y)
136  %1 = bitcast i8 %__M to <8 x i1>
137  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
138  %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W
139  ret <2 x i64> %2
140}
141
142define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
143; X86-LABEL: test_mm_maskz_madd52lo_epu64:
144; X86:       # %bb.0: # %entry
145; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
146; X86-NEXT:    kmovw %eax, %k1
147; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
148; X86-NEXT:    retl
149;
150; X64-LABEL: test_mm_maskz_madd52lo_epu64:
151; X64:       # %bb.0: # %entry
152; X64-NEXT:    kmovw %edi, %k1
153; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
154; X64-NEXT:    retq
155entry:
156  %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
157  %1 = bitcast i8 %__M to <8 x i1>
158  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
159  %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
160  ret <2 x i64> %2
161}
162
163define <4 x i64> @test_mm256_madd52lo_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
164; CHECK-LABEL: test_mm256_madd52lo_epu64:
165; CHECK:       # %bb.0: # %entry
166; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0
167; CHECK-NEXT:    ret{{[l|q]}}
168entry:
169  %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
170  ret <4 x i64> %0
171}
172
173define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
174; X86-LABEL: test_mm256_mask_madd52lo_epu64:
175; X86:       # %bb.0: # %entry
176; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
177; X86-NEXT:    kmovw %eax, %k1
178; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
179; X86-NEXT:    retl
180;
181; X64-LABEL: test_mm256_mask_madd52lo_epu64:
182; X64:       # %bb.0: # %entry
183; X64-NEXT:    kmovw %edi, %k1
184; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
185; X64-NEXT:    retq
186entry:
187  %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y)
188  %1 = bitcast i8 %__M to <8 x i1>
189  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
190  %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W
191  ret <4 x i64> %2
192}
193
194define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
195; X86-LABEL: test_mm256_maskz_madd52lo_epu64:
196; X86:       # %bb.0: # %entry
197; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
198; X86-NEXT:    kmovw %eax, %k1
199; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
200; X86-NEXT:    retl
201;
202; X64-LABEL: test_mm256_maskz_madd52lo_epu64:
203; X64:       # %bb.0: # %entry
204; X64-NEXT:    kmovw %edi, %k1
205; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
206; X64-NEXT:    retq
207entry:
208  %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
209  %1 = bitcast i8 %__M to <8 x i1>
210  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
211  %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
212  ret <4 x i64> %2
213}
214
215declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
216declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
217declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
218declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
219