• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vldq-builtins.c
6
7define <2 x double> @test_mm_cvtepi64_pd(<2 x i64> %__A) {
8; CHECK-LABEL: test_mm_cvtepi64_pd:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0
11; CHECK-NEXT:    ret{{[l|q]}}
12entry:
13  %conv.i = sitofp <2 x i64> %__A to <2 x double>
14  ret <2 x double> %conv.i
15}
16
17define <2 x double> @test_mm_mask_cvtepi64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) {
18; X86-LABEL: test_mm_mask_cvtepi64_pd:
19; X86:       # %bb.0: # %entry
20; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
21; X86-NEXT:    vcvtqq2pd %xmm1, %xmm0 {%k1}
22; X86-NEXT:    retl
23;
24; X64-LABEL: test_mm_mask_cvtepi64_pd:
25; X64:       # %bb.0: # %entry
26; X64-NEXT:    kmovw %edi, %k1
27; X64-NEXT:    vcvtqq2pd %xmm1, %xmm0 {%k1}
28; X64-NEXT:    retq
29entry:
30  %conv.i.i = sitofp <2 x i64> %__A to <2 x double>
31  %0 = bitcast i8 %__U to <8 x i1>
32  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
33  %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W
34  ret <2 x double> %1
35}
36
37define <2 x double> @test_mm_maskz_cvtepi64_pd(i8 zeroext %__U, <2 x i64> %__A) {
38; X86-LABEL: test_mm_maskz_cvtepi64_pd:
39; X86:       # %bb.0: # %entry
40; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
41; X86-NEXT:    vcvtqq2pd %xmm0, %xmm0 {%k1} {z}
42; X86-NEXT:    retl
43;
44; X64-LABEL: test_mm_maskz_cvtepi64_pd:
45; X64:       # %bb.0: # %entry
46; X64-NEXT:    kmovw %edi, %k1
47; X64-NEXT:    vcvtqq2pd %xmm0, %xmm0 {%k1} {z}
48; X64-NEXT:    retq
49entry:
50  %conv.i.i = sitofp <2 x i64> %__A to <2 x double>
51  %0 = bitcast i8 %__U to <8 x i1>
52  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
53  %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer
54  ret <2 x double> %1
55}
56
57define <4 x double> @test_mm256_cvtepi64_pd(<4 x i64> %__A) {
58; CHECK-LABEL: test_mm256_cvtepi64_pd:
59; CHECK:       # %bb.0: # %entry
60; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0
61; CHECK-NEXT:    ret{{[l|q]}}
62entry:
63  %conv.i = sitofp <4 x i64> %__A to <4 x double>
64  ret <4 x double> %conv.i
65}
66
67define <4 x double> @test_mm256_mask_cvtepi64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) {
68; X86-LABEL: test_mm256_mask_cvtepi64_pd:
69; X86:       # %bb.0: # %entry
70; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
71; X86-NEXT:    vcvtqq2pd %ymm1, %ymm0 {%k1}
72; X86-NEXT:    retl
73;
74; X64-LABEL: test_mm256_mask_cvtepi64_pd:
75; X64:       # %bb.0: # %entry
76; X64-NEXT:    kmovw %edi, %k1
77; X64-NEXT:    vcvtqq2pd %ymm1, %ymm0 {%k1}
78; X64-NEXT:    retq
79entry:
80  %conv.i.i = sitofp <4 x i64> %__A to <4 x double>
81  %0 = bitcast i8 %__U to <8 x i1>
82  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
83  %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W
84  ret <4 x double> %1
85}
86
87define <4 x double> @test_mm256_maskz_cvtepi64_pd(i8 zeroext %__U, <4 x i64> %__A) {
88; X86-LABEL: test_mm256_maskz_cvtepi64_pd:
89; X86:       # %bb.0: # %entry
90; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
91; X86-NEXT:    vcvtqq2pd %ymm0, %ymm0 {%k1} {z}
92; X86-NEXT:    retl
93;
94; X64-LABEL: test_mm256_maskz_cvtepi64_pd:
95; X64:       # %bb.0: # %entry
96; X64-NEXT:    kmovw %edi, %k1
97; X64-NEXT:    vcvtqq2pd %ymm0, %ymm0 {%k1} {z}
98; X64-NEXT:    retq
99entry:
100  %conv.i.i = sitofp <4 x i64> %__A to <4 x double>
101  %0 = bitcast i8 %__U to <8 x i1>
102  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
103  %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer
104  ret <4 x double> %1
105}
106
107define <2 x double> @test_mm_cvtepu64_pd(<2 x i64> %__A) {
108; CHECK-LABEL: test_mm_cvtepu64_pd:
109; CHECK:       # %bb.0: # %entry
110; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0
111; CHECK-NEXT:    ret{{[l|q]}}
112entry:
113  %conv.i = uitofp <2 x i64> %__A to <2 x double>
114  ret <2 x double> %conv.i
115}
116
117define <2 x double> @test_mm_mask_cvtepu64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) {
118; X86-LABEL: test_mm_mask_cvtepu64_pd:
119; X86:       # %bb.0: # %entry
120; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
121; X86-NEXT:    vcvtuqq2pd %xmm1, %xmm0 {%k1}
122; X86-NEXT:    retl
123;
124; X64-LABEL: test_mm_mask_cvtepu64_pd:
125; X64:       # %bb.0: # %entry
126; X64-NEXT:    kmovw %edi, %k1
127; X64-NEXT:    vcvtuqq2pd %xmm1, %xmm0 {%k1}
128; X64-NEXT:    retq
129entry:
130  %conv.i.i = uitofp <2 x i64> %__A to <2 x double>
131  %0 = bitcast i8 %__U to <8 x i1>
132  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
133  %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W
134  ret <2 x double> %1
135}
136
137define <2 x double> @test_mm_maskz_cvtepu64_pd(i8 zeroext %__U, <2 x i64> %__A) {
138; X86-LABEL: test_mm_maskz_cvtepu64_pd:
139; X86:       # %bb.0: # %entry
140; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
141; X86-NEXT:    vcvtuqq2pd %xmm0, %xmm0 {%k1} {z}
142; X86-NEXT:    retl
143;
144; X64-LABEL: test_mm_maskz_cvtepu64_pd:
145; X64:       # %bb.0: # %entry
146; X64-NEXT:    kmovw %edi, %k1
147; X64-NEXT:    vcvtuqq2pd %xmm0, %xmm0 {%k1} {z}
148; X64-NEXT:    retq
149entry:
150  %conv.i.i = uitofp <2 x i64> %__A to <2 x double>
151  %0 = bitcast i8 %__U to <8 x i1>
152  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
153  %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer
154  ret <2 x double> %1
155}
156
157define <4 x double> @test_mm256_cvtepu64_pd(<4 x i64> %__A) {
158; CHECK-LABEL: test_mm256_cvtepu64_pd:
159; CHECK:       # %bb.0: # %entry
160; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0
161; CHECK-NEXT:    ret{{[l|q]}}
162entry:
163  %conv.i = uitofp <4 x i64> %__A to <4 x double>
164  ret <4 x double> %conv.i
165}
166
167define <4 x double> @test_mm256_mask_cvtepu64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) {
168; X86-LABEL: test_mm256_mask_cvtepu64_pd:
169; X86:       # %bb.0: # %entry
170; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
171; X86-NEXT:    vcvtuqq2pd %ymm1, %ymm0 {%k1}
172; X86-NEXT:    retl
173;
174; X64-LABEL: test_mm256_mask_cvtepu64_pd:
175; X64:       # %bb.0: # %entry
176; X64-NEXT:    kmovw %edi, %k1
177; X64-NEXT:    vcvtuqq2pd %ymm1, %ymm0 {%k1}
178; X64-NEXT:    retq
179entry:
180  %conv.i.i = uitofp <4 x i64> %__A to <4 x double>
181  %0 = bitcast i8 %__U to <8 x i1>
182  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
183  %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W
184  ret <4 x double> %1
185}
186
187define <4 x double> @test_mm256_maskz_cvtepu64_pd(i8 zeroext %__U, <4 x i64> %__A) {
188; X86-LABEL: test_mm256_maskz_cvtepu64_pd:
189; X86:       # %bb.0: # %entry
190; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
191; X86-NEXT:    vcvtuqq2pd %ymm0, %ymm0 {%k1} {z}
192; X86-NEXT:    retl
193;
194; X64-LABEL: test_mm256_maskz_cvtepu64_pd:
195; X64:       # %bb.0: # %entry
196; X64-NEXT:    kmovw %edi, %k1
197; X64-NEXT:    vcvtuqq2pd %ymm0, %ymm0 {%k1} {z}
198; X64-NEXT:    retq
199entry:
200  %conv.i.i = uitofp <4 x i64> %__A to <4 x double>
201  %0 = bitcast i8 %__U to <8 x i1>
202  %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
203  %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer
204  ret <4 x double> %1
205}
206
207define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) {
208; X86-LABEL: test_mm_mask_fpclass_pd_mask:
209; X86:       # %bb.0: # %entry
210; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
211; X86-NEXT:    vfpclasspd $2, %xmm0, %k0 {%k1}
212; X86-NEXT:    kmovw %k0, %eax
213; X86-NEXT:    # kill: def $al killed $al killed $eax
214; X86-NEXT:    retl
215;
216; X64-LABEL: test_mm_mask_fpclass_pd_mask:
217; X64:       # %bb.0: # %entry
218; X64-NEXT:    kmovw %edi, %k1
219; X64-NEXT:    vfpclasspd $2, %xmm0, %k0 {%k1}
220; X64-NEXT:    kmovw %k0, %eax
221; X64-NEXT:    # kill: def $al killed $al killed $eax
222; X64-NEXT:    retq
223entry:
224  %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
225  %1 = bitcast i8 %__U to <8 x i1>
226  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
227  %2 = and <2 x i1> %0, %extract
228  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
229  %4 = bitcast <8 x i1> %3 to i8
230  ret i8 %4
231}
232
233declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
234
235define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
236; CHECK-LABEL: test_mm_fpclass_pd_mask:
237; CHECK:       # %bb.0: # %entry
238; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0
239; CHECK-NEXT:    kmovw %k0, %eax
240; CHECK-NEXT:    # kill: def $al killed $al killed $eax
241; CHECK-NEXT:    ret{{[l|q]}}
242entry:
243  %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
244  %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
245  %2 = bitcast <8 x i1> %1 to i8
246  ret i8 %2
247}
248
249define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) {
250; X86-LABEL: test_mm256_mask_fpclass_pd_mask:
251; X86:       # %bb.0: # %entry
252; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
253; X86-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1}
254; X86-NEXT:    kmovw %k0, %eax
255; X86-NEXT:    # kill: def $al killed $al killed $eax
256; X86-NEXT:    vzeroupper
257; X86-NEXT:    retl
258;
259; X64-LABEL: test_mm256_mask_fpclass_pd_mask:
260; X64:       # %bb.0: # %entry
261; X64-NEXT:    kmovw %edi, %k1
262; X64-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1}
263; X64-NEXT:    kmovw %k0, %eax
264; X64-NEXT:    # kill: def $al killed $al killed $eax
265; X64-NEXT:    vzeroupper
266; X64-NEXT:    retq
267entry:
268  %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
269  %1 = bitcast i8 %__U to <8 x i1>
270  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
271  %2 = and <4 x i1> %0, %extract
272  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
273  %4 = bitcast <8 x i1> %3 to i8
274  ret i8 %4
275}
276
277declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
278
279define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
280; CHECK-LABEL: test_mm256_fpclass_pd_mask:
281; CHECK:       # %bb.0: # %entry
282; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0
283; CHECK-NEXT:    kmovw %k0, %eax
284; CHECK-NEXT:    # kill: def $al killed $al killed $eax
285; CHECK-NEXT:    vzeroupper
286; CHECK-NEXT:    ret{{[l|q]}}
287entry:
288  %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
289  %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
290  %2 = bitcast <8 x i1> %1 to i8
291  ret i8 %2
292}
293
294define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) {
295; X86-LABEL: test_mm_mask_fpclass_ps_mask:
296; X86:       # %bb.0: # %entry
297; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
298; X86-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1}
299; X86-NEXT:    kmovw %k0, %eax
300; X86-NEXT:    # kill: def $al killed $al killed $eax
301; X86-NEXT:    retl
302;
303; X64-LABEL: test_mm_mask_fpclass_ps_mask:
304; X64:       # %bb.0: # %entry
305; X64-NEXT:    kmovw %edi, %k1
306; X64-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1}
307; X64-NEXT:    kmovw %k0, %eax
308; X64-NEXT:    # kill: def $al killed $al killed $eax
309; X64-NEXT:    retq
310entry:
311  %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
312  %1 = bitcast i8 %__U to <8 x i1>
313  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
314  %2 = and <4 x i1> %0, %extract
315  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
316  %4 = bitcast <8 x i1> %3 to i8
317  ret i8 %4
318}
319
320declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
321
322define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
323; CHECK-LABEL: test_mm_fpclass_ps_mask:
324; CHECK:       # %bb.0: # %entry
325; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0
326; CHECK-NEXT:    kmovw %k0, %eax
327; CHECK-NEXT:    # kill: def $al killed $al killed $eax
328; CHECK-NEXT:    ret{{[l|q]}}
329entry:
330  %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
331  %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
332  %2 = bitcast <8 x i1> %1 to i8
333  ret i8 %2
334}
335
336define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) {
337; X86-LABEL: test_mm256_mask_fpclass_ps_mask:
338; X86:       # %bb.0: # %entry
339; X86-NEXT:    vfpclassps $2, %ymm0, %k0
340; X86-NEXT:    kmovw %k0, %eax
341; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
342; X86-NEXT:    # kill: def $al killed $al killed $eax
343; X86-NEXT:    vzeroupper
344; X86-NEXT:    retl
345;
346; X64-LABEL: test_mm256_mask_fpclass_ps_mask:
347; X64:       # %bb.0: # %entry
348; X64-NEXT:    vfpclassps $2, %ymm0, %k0
349; X64-NEXT:    kmovw %k0, %eax
350; X64-NEXT:    andb %dil, %al
351; X64-NEXT:    # kill: def $al killed $al killed $eax
352; X64-NEXT:    vzeroupper
353; X64-NEXT:    retq
354entry:
355  %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
356  %1 = bitcast i8 %__U to <8 x i1>
357  %2 = and <8 x i1> %0, %1
358  %3 = bitcast <8 x i1> %2 to i8
359  ret i8 %3
360}
361
362declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
363
364define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
365; CHECK-LABEL: test_mm256_fpclass_ps_mask:
366; CHECK:       # %bb.0: # %entry
367; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0
368; CHECK-NEXT:    kmovw %k0, %eax
369; CHECK-NEXT:    # kill: def $al killed $al killed $eax
370; CHECK-NEXT:    vzeroupper
371; CHECK-NEXT:    ret{{[l|q]}}
372entry:
373  %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
374  %1 = bitcast <8 x i1> %0 to i8
375  ret i8 %1
376}
377