1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 3declare float @llvm.amdgcn.rcp.f32(float) #0 4declare double @llvm.amdgcn.rcp.f64(double) #0 5 6declare double @llvm.sqrt.f64(double) #0 7declare float @llvm.sqrt.f32(float) #0 8 9; FUNC-LABEL: {{^}}rcp_undef_f32: 10; SI-NOT: v_rcp_f32 11define void @rcp_undef_f32(float addrspace(1)* %out) #1 { 12 %rcp = call float @llvm.amdgcn.rcp.f32(float undef) 13 store float %rcp, float addrspace(1)* %out, align 4 14 ret void 15} 16 17; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32: 18; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 19; SI-NOT: [[RESULT]] 20; SI: buffer_store_dword [[RESULT]] 21define void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 { 22 %rcp = fdiv float 1.0, %src 23 store float %rcp, float addrspace(1)* %out, align 4 24 ret void 25} 26 27; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32: 28; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 29; SI-NOT: [[RESULT]] 30; SI: buffer_store_dword [[RESULT]] 31define void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 { 32 %rcp = fdiv float 1.0, %src 33 store float %rcp, float addrspace(1)* %out, align 4 34 ret void 35} 36 37; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32: 38; SI: v_div_scale_f32 39define void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 { 40 %rcp = fdiv float 1.0, %src 41 store float %rcp, float addrspace(1)* %out, align 4 42 ret void 43} 44 45; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32: 46; SI: v_sqrt_f32_e32 47; SI: v_rcp_f32_e32 48define void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { 49 %sqrt = call float @llvm.sqrt.f32(float %src) 50 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 51 store float %rcp, float addrspace(1)* %out, align 4 52 ret void 53} 54 55; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32: 56; SI: v_rsq_f32_e32 57define void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 { 58 %sqrt = call float @llvm.sqrt.f32(float %src) 59 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 60 store float %rcp, float addrspace(1)* %out, align 4 61 ret void 62} 63 64; FUNC-LABEL: {{^}}rcp_f64: 65; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 66; SI-NOT: [[RESULT]] 67; SI: buffer_store_dwordx2 [[RESULT]] 68define void @rcp_f64(double addrspace(1)* %out, double %src) #1 { 69 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 70 store double %rcp, double addrspace(1)* %out, align 8 71 ret void 72} 73 74; FUNC-LABEL: {{^}}unsafe_rcp_f64: 75; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 76; SI-NOT: [[RESULT]] 77; SI: buffer_store_dwordx2 [[RESULT]] 78define void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 { 79 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 80 store double %rcp, double addrspace(1)* %out, align 8 81 ret void 82} 83 84; FUNC-LABEL: {{^}}rcp_pat_f64: 85; SI: v_div_scale_f64 86define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { 87 %rcp = fdiv double 1.0, %src 88 store double %rcp, double addrspace(1)* %out, align 8 89 ret void 90} 91 92; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64: 93; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 94; SI-NOT: [[RESULT]] 95; SI: buffer_store_dwordx2 [[RESULT]] 96define void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { 97 %rcp = fdiv double 1.0, %src 98 store double %rcp, double addrspace(1)* %out, align 8 99 ret void 100} 101 102; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64: 103; SI-NOT: v_rsq_f64_e32 104; SI: v_sqrt_f64 105; SI: v_rcp_f64 106define void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { 107 %sqrt = call double @llvm.sqrt.f64(double %src) 108 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 109 store double %rcp, double addrspace(1)* %out, align 8 110 ret void 111} 112 113; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64: 114; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 115; SI-NOT: [[RESULT]] 116; SI: buffer_store_dwordx2 [[RESULT]] 117define void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { 118 %sqrt = call double @llvm.sqrt.f64(double %src) 119 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 120 store double %rcp, double addrspace(1)* %out, align 8 121 ret void 122} 123 124attributes #0 = { nounwind readnone } 125attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" } 126attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" } 127attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" } 128attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" } 129