1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2007 Julien Pommier 5 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> 6 // Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org> 7 // 8 // This Source Code Form is subject to the terms of the Mozilla 9 // Public License v. 2.0. If a copy of the MPL was not distributed 10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 11 12 /* The sin, cos, exp, and log functions of this file come from 13 * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ 14 */ 15 16 #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H 17 #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H 18 19 namespace Eigen { 20 21 namespace internal { 22 23 static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0); 24 static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0); 25 static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5); 26 27 static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437); 28 static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303); 29 30 static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599); 31 32 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4); 33 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2); 34 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1); 35 36 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6); 37 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3); 38 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1); 39 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); 40 41 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); 42 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); 43 44 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 45 Packet2d pexp<Packet2d>(const Packet2d& _x) 46 { 47 Packet2d x = _x; 48 49 Packet2d tmp, fx; 50 Packet2l emm0; 51 52 // clamp x 53 x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo); 54 /* express exp(x) as exp(g + n*log(2)) */ 55 fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half); 56 57 fx = vec_floor(fx); 58 59 tmp = pmul(fx, p2d_cephes_exp_C1); 60 Packet2d z = pmul(fx, p2d_cephes_exp_C2); 61 x = psub(x, tmp); 62 x = psub(x, z); 63 64 Packet2d x2 = pmul(x,x); 65 66 Packet2d px = p2d_cephes_exp_p0; 67 px = pmadd(px, x2, p2d_cephes_exp_p1); 68 px = pmadd(px, x2, p2d_cephes_exp_p2); 69 px = pmul (px, x); 70 71 Packet2d qx = p2d_cephes_exp_q0; 72 qx = pmadd(qx, x2, p2d_cephes_exp_q1); 73 qx = pmadd(qx, x2, p2d_cephes_exp_q2); 74 qx = pmadd(qx, x2, p2d_cephes_exp_q3); 75 76 x = pdiv(px,psub(qx,px)); 77 x = pmadd(p2d_2,x,p2d_1); 78 79 // build 2^n 80 emm0 = vec_ctsl(fx, 0); 81 82 static const Packet2l p2l_1023 = { 1023, 1023 }; 83 static const Packet2ul p2ul_52 = { 52, 52 }; 84 85 emm0 = emm0 + p2l_1023; 86 emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52); 87 88 // Altivec's max & min operators just drop silent NaNs. Check NaNs in 89 // inputs and return them unmodified. 90 Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x)); 91 return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x), 92 isnumber_mask); 93 } 94 95 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 96 Packet4f pexp<Packet4f>(const Packet4f& x) 97 { 98 Packet4f res; 99 res.v4f[0] = pexp<Packet2d>(x.v4f[0]); 100 res.v4f[1] = pexp<Packet2d>(x.v4f[1]); 101 return res; 102 } 103 104 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 105 Packet2d psqrt<Packet2d>(const Packet2d& x) 106 { 107 return __builtin_s390_vfsqdb(x); 108 } 109 110 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 111 Packet4f psqrt<Packet4f>(const Packet4f& x) 112 { 113 Packet4f res; 114 res.v4f[0] = psqrt<Packet2d>(x.v4f[0]); 115 res.v4f[1] = psqrt<Packet2d>(x.v4f[1]); 116 return res; 117 } 118 119 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 120 Packet2d prsqrt<Packet2d>(const Packet2d& x) { 121 // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation. 122 return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x); 123 } 124 125 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 126 Packet4f prsqrt<Packet4f>(const Packet4f& x) { 127 Packet4f res; 128 res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]); 129 res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]); 130 return res; 131 } 132 133 } // end namespace internal 134 135 } // end namespace Eigen 136 137 #endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H 138