1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 4 5 6define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { 7; CHECK-LABEL: test_rsqrt14_ss: 8; CHECK: ## %bb.0: 9; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 10; CHECK-NEXT: retq 11 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; 12 ret <4 x float> %res 13} 14 15define <4 x float> @test_rsqrt14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 16; CHECK-LABEL: test_rsqrt14_ss_load: 17; CHECK: ## %bb.0: 18; CHECK-NEXT: vrsqrt14ss (%rdi), %xmm0, %xmm0 19; CHECK-NEXT: retq 20 %a1 = load <4 x float>, <4 x float>* %a1ptr 21 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ; 22 ret <4 x float> %res 23} 24declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 25 26define <4 x float> @test_rcp14_ss(<4 x float> %a0) { 27; CHECK-LABEL: test_rcp14_ss: 28; CHECK: ## %bb.0: 29; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 30; CHECK-NEXT: retq 31 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; 32 ret <4 x float> %res 33} 34 35define <4 x float> @test_rcp14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 36; CHECK-LABEL: test_rcp14_ss_load: 37; CHECK: ## %bb.0: 38; CHECK-NEXT: vrcp14ss (%rdi), %xmm0, %xmm0 39; CHECK-NEXT: retq 40 %a1 = load <4 x float>, <4 x float>* %a1ptr 41 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ; 42 ret <4 x float> %res 43} 44declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 45 46define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) { 47; CHECK-LABEL: test_rsqrt14_sd: 48; CHECK: ## %bb.0: 49; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0 50; CHECK-NEXT: retq 51 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; 52 ret <2 x double> %res 53} 54 55define <2 x double> @test_rsqrt14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) { 56; CHECK-LABEL: test_rsqrt14_sd_load: 57; CHECK: ## %bb.0: 58; CHECK-NEXT: vrsqrt14sd (%rdi), %xmm0, %xmm0 59; CHECK-NEXT: retq 60 %a1 = load <2 x double>, <2 x double>* %a1ptr 61 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ; 62 ret <2 x double> %res 63} 64declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 65 66define <2 x double> @test_rcp14_sd(<2 x double> %a0) { 67; CHECK-LABEL: test_rcp14_sd: 68; CHECK: ## %bb.0: 69; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0 70; CHECK-NEXT: retq 71 %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; 72 ret <2 x double> %res 73 74} 75 76define <2 x double> @test_rcp14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) { 77; CHECK-LABEL: test_rcp14_sd_load: 78; CHECK: ## %bb.0: 79; CHECK-NEXT: vrcp14sd (%rdi), %xmm0, %xmm0 80; CHECK-NEXT: retq 81 %a1 = load <2 x double>, <2 x double>* %a1ptr 82 %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ; 83 ret <2 x double> %res 84} 85declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 86 87declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) 88define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 89; SKX-LABEL: test_int_x86_avx512_mask_scalef_ss: 90; SKX: ## %bb.0: 91; SKX-NEXT: kmovd %edi, %k1 92; SKX-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} 93; SKX-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 94; SKX-NEXT: vaddps %xmm0, %xmm2, %xmm0 95; SKX-NEXT: retq 96; 97; KNL-LABEL: test_int_x86_avx512_mask_scalef_ss: 98; KNL: ## %bb.0: 99; KNL-NEXT: kmovw %edi, %k1 100; KNL-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} 101; KNL-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 102; KNL-NEXT: vaddps %xmm0, %xmm2, %xmm0 103; KNL-NEXT: retq 104 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) 105 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) 106 %res2 = fadd <4 x float> %res, %res1 107 ret <4 x float> %res2 108} 109 110define <4 x float>@test_int_x86_avx512_mask_scalef_ss_load(<4 x float> %x0, <4 x float>* %x1ptr) { 111; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss_load: 112; CHECK: ## %bb.0: 113; CHECK-NEXT: vscalefss (%rdi), %xmm0, %xmm0 114; CHECK-NEXT: retq 115 %x1 = load <4 x float>, <4 x float>* %x1ptr 116 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4) 117 ret <4 x float> %res 118} 119 120declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) 121define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 122; SKX-LABEL: test_int_x86_avx512_mask_scalef_sd: 123; SKX: ## %bb.0: 124; SKX-NEXT: kmovd %edi, %k1 125; SKX-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} 126; SKX-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 127; SKX-NEXT: vaddpd %xmm0, %xmm2, %xmm0 128; SKX-NEXT: retq 129; 130; KNL-LABEL: test_int_x86_avx512_mask_scalef_sd: 131; KNL: ## %bb.0: 132; KNL-NEXT: kmovw %edi, %k1 133; KNL-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} 134; KNL-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 135; KNL-NEXT: vaddpd %xmm0, %xmm2, %xmm0 136; KNL-NEXT: retq 137 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) 138 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) 139 %res2 = fadd <2 x double> %res, %res1 140 ret <2 x double> %res2 141} 142 143define <2 x double>@test_int_x86_avx512_mask_scalef_sd_load(<2 x double> %x0, <2 x double>* %x1ptr) { 144; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd_load: 145; CHECK: ## %bb.0: 146; CHECK-NEXT: vscalefsd (%rdi), %xmm0, %xmm0 147; CHECK-NEXT: retq 148 %x1 = load <2 x double>, <2 x double>* %x1ptr 149 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> undef, i8 -1, i32 4) 150 ret <2 x double> %res 151} 152