• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
4
5
6define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
7; CHECK-LABEL: test_rsqrt14_ss:
8; CHECK:       ## %bb.0:
9; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
10; CHECK-NEXT:    retq
11    %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
12    ret <4 x float> %res
13}
14
15define <4 x float> @test_rsqrt14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
16; CHECK-LABEL: test_rsqrt14_ss_load:
17; CHECK:       ## %bb.0:
18; CHECK-NEXT:    vrsqrt14ss (%rdi), %xmm0, %xmm0
19; CHECK-NEXT:    retq
20  %a1 = load <4 x float>, <4 x float>* %a1ptr
21  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ;
22  ret <4 x float> %res
23}
24declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
25
26define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
27; CHECK-LABEL: test_rcp14_ss:
28; CHECK:       ## %bb.0:
29; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
30; CHECK-NEXT:    retq
31    %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
32    ret <4 x float> %res
33}
34
35define <4 x float> @test_rcp14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
36; CHECK-LABEL: test_rcp14_ss_load:
37; CHECK:       ## %bb.0:
38; CHECK-NEXT:    vrcp14ss (%rdi), %xmm0, %xmm0
39; CHECK-NEXT:    retq
40  %a1 = load <4 x float>, <4 x float>* %a1ptr
41  %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ;
42  ret <4 x float> %res
43}
44declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
45
46define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
47; CHECK-LABEL: test_rsqrt14_sd:
48; CHECK:       ## %bb.0:
49; CHECK-NEXT:    vrsqrt14sd %xmm0, %xmm0, %xmm0
50; CHECK-NEXT:    retq
51    %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
52    ret <2 x double> %res
53}
54
55define <2 x double> @test_rsqrt14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) {
56; CHECK-LABEL: test_rsqrt14_sd_load:
57; CHECK:       ## %bb.0:
58; CHECK-NEXT:    vrsqrt14sd (%rdi), %xmm0, %xmm0
59; CHECK-NEXT:    retq
60  %a1 = load <2 x double>, <2 x double>* %a1ptr
61  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ;
62  ret <2 x double> %res
63}
64declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
65
66define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
67; CHECK-LABEL: test_rcp14_sd:
68; CHECK:       ## %bb.0:
69; CHECK-NEXT:    vrcp14sd %xmm0, %xmm0, %xmm0
70; CHECK-NEXT:    retq
71    %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
72    ret <2 x double> %res
73
74}
75
76define <2 x double> @test_rcp14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) {
77; CHECK-LABEL: test_rcp14_sd_load:
78; CHECK:       ## %bb.0:
79; CHECK-NEXT:    vrcp14sd (%rdi), %xmm0, %xmm0
80; CHECK-NEXT:    retq
81  %a1 = load <2 x double>, <2 x double>* %a1ptr
82  %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ;
83  ret <2 x double> %res
84}
85declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
86
87declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
88define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
89; SKX-LABEL: test_int_x86_avx512_mask_scalef_ss:
90; SKX:       ## %bb.0:
91; SKX-NEXT:    kmovd %edi, %k1
92; SKX-NEXT:    vscalefss %xmm1, %xmm0, %xmm2 {%k1}
93; SKX-NEXT:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
94; SKX-NEXT:    vaddps %xmm0, %xmm2, %xmm0
95; SKX-NEXT:    retq
96;
97; KNL-LABEL: test_int_x86_avx512_mask_scalef_ss:
98; KNL:       ## %bb.0:
99; KNL-NEXT:    kmovw %edi, %k1
100; KNL-NEXT:    vscalefss %xmm1, %xmm0, %xmm2 {%k1}
101; KNL-NEXT:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
102; KNL-NEXT:    vaddps %xmm0, %xmm2, %xmm0
103; KNL-NEXT:    retq
104    %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
105    %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
106    %res2 = fadd <4 x float> %res, %res1
107    ret <4 x float> %res2
108}
109
110define <4 x float>@test_int_x86_avx512_mask_scalef_ss_load(<4 x float> %x0, <4 x float>* %x1ptr) {
111; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss_load:
112; CHECK:       ## %bb.0:
113; CHECK-NEXT:    vscalefss (%rdi), %xmm0, %xmm0
114; CHECK-NEXT:    retq
115  %x1 = load <4 x float>, <4 x float>* %x1ptr
116  %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4)
117  ret <4 x float> %res
118}
119
120declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
121define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
122; SKX-LABEL: test_int_x86_avx512_mask_scalef_sd:
123; SKX:       ## %bb.0:
124; SKX-NEXT:    kmovd %edi, %k1
125; SKX-NEXT:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
126; SKX-NEXT:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
127; SKX-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
128; SKX-NEXT:    retq
129;
130; KNL-LABEL: test_int_x86_avx512_mask_scalef_sd:
131; KNL:       ## %bb.0:
132; KNL-NEXT:    kmovw %edi, %k1
133; KNL-NEXT:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
134; KNL-NEXT:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
135; KNL-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
136; KNL-NEXT:    retq
137    %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
138    %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
139    %res2 = fadd <2 x double> %res, %res1
140    ret <2 x double> %res2
141}
142
143define <2 x double>@test_int_x86_avx512_mask_scalef_sd_load(<2 x double> %x0, <2 x double>* %x1ptr) {
144; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd_load:
145; CHECK:       ## %bb.0:
146; CHECK-NEXT:    vscalefsd (%rdi), %xmm0, %xmm0
147; CHECK-NEXT:    retq
148  %x1 = load <2 x double>, <2 x double>* %x1ptr
149  %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> undef, i8 -1, i32 4)
150  ret <2 x double> %res
151}
152