1 // Auto-generated file. Do not edit! 2 // Template: src/f32-velu/scalar-rr2-p6.c.in 3 // Generator: tools/xngen 4 // 5 // Copyright 2020 Google LLC 6 // 7 // This source code is licensed under the BSD-style license found in the 8 // LICENSE file in the root directory of this source tree. 9 10 #include <assert.h> 11 #include <math.h> 12 13 #include <xnnpack/common.h> 14 #include <xnnpack/vunary.h> 15 16 #include <fp16/bitcasts.h> 17 18 xnn_f32_velu_ukernel__scalar_rr2_p6_x2(size_t n,const float * x,float * y,const union xnn_f32_elu_params params[restrict XNN_MIN_ELEMENTS (1)])19void xnn_f32_velu_ukernel__scalar_rr2_p6_x2( 20 size_t n, 21 const float* x, 22 float* y, 23 const union xnn_f32_elu_params params[restrict XNN_MIN_ELEMENTS(1)]) 24 { 25 assert(n % sizeof(float) == 0); 26 27 const float vprescale = params->scalar.prescale; 28 const float valpha = params->scalar.alpha; 29 const float vbeta = params->scalar.beta; 30 31 const float vmagic_bias = 0x1.8000FEp23f; 32 const float vlog2e = 0x1.715476p+0f; 33 const float vsat_cutoff = -0x1.154246p+4f; 34 const float vminus_ln2_hi = -0x1.62E440p-1f; 35 const float vminus_ln2_lo = 0x1.0105C6p-21f; 36 const float vc6 = 0x1.6b7338p-10f; 37 const float vc5 = 0x1.12278Ep-7f; 38 const float vc4 = 0x1.555716p-5f; 39 const float vc3 = 0x1.5554B0p-3f; 40 const float vc2 = 0x1.FFFFFEp-2f; 41 const float vone = 1.0f; 42 43 for (; n >= 2 * sizeof(float); n -= 2 * sizeof(float)) { 44 float vx0 = x[0]; 45 float vx1 = x[1]; 46 x += 2; 47 48 const float vz0 = vx0 * vprescale; 49 const float vz1 = vx1 * vprescale; 50 51 float vn0 = vz0 * vlog2e + vmagic_bias; 52 float vn1 = vz1 * vlog2e + vmagic_bias; 53 54 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); 55 vn0 -= vmagic_bias; 56 float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); 57 vn1 -= vmagic_bias; 58 59 float vt0 = vn0 * vminus_ln2_hi + vz0; 60 float vt1 = vn1 * vminus_ln2_hi + vz1; 61 62 vt0 = vn0 * vminus_ln2_lo + vt0; 63 vt1 = vn1 * vminus_ln2_lo + vt1; 64 65 if XNN_UNPREDICTABLE(vz0 <= vsat_cutoff) { 66 vs0 = 0.0f; 67 vt0 = 0.0f; 68 } 69 if XNN_UNPREDICTABLE(vz1 <= vsat_cutoff) { 70 vs1 = 0.0f; 71 vt1 = 0.0f; 72 } 73 74 float vp0 = vc6 * vt0 + vc5; 75 float vp1 = vc6 * vt1 + vc5; 76 77 vp0 = vp0 * vt0 + vc4; 78 vp1 = vp1 * vt1 + vc4; 79 80 vp0 = vp0 * vt0 + vc3; 81 vp1 = vp1 * vt1 + vc3; 82 83 vp0 = vp0 * vt0 + vc2; 84 vp1 = vp1 * vt1 + vc2; 85 86 vp0 *= vt0; 87 vp1 *= vt1; 88 89 vt0 *= vs0; 90 vs0 -= vone; 91 vt1 *= vs1; 92 vs1 -= vone; 93 94 vp0 = vp0 * vt0 + vt0; 95 vp1 = vp1 * vt1 + vt1; 96 97 const float ve0 = (vp0 + vs0) * valpha; 98 float vy0 = vx0 * vbeta; 99 const float ve1 = (vp1 + vs1) * valpha; 100 float vy1 = vx1 * vbeta; 101 102 if XNN_UNPREDICTABLE(vx0 < 0.0f) { 103 vy0 = ve0; 104 } 105 if XNN_UNPREDICTABLE(vx1 < 0.0f) { 106 vy1 = ve1; 107 } 108 109 y[0] = vy0; 110 y[1] = vy1; 111 y += 2; 112 } 113 if XNN_UNLIKELY(n != 0) { 114 float vx = *x; 115 116 const float vz = vx * vprescale; 117 118 float vn = vz * vlog2e + vmagic_bias; 119 float vs = fp32_from_bits(fp32_to_bits(vn) << 23); 120 vn -= vmagic_bias; 121 122 float vt = vn * vminus_ln2_hi + vz; 123 vt = vn * vminus_ln2_lo + vt; 124 125 if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 126 vs = 0.0f; 127 vt = 0.0f; 128 } 129 130 float vp = vc6 * vt + vc5; 131 vp = vp * vt + vc4; 132 vp = vp * vt + vc3; 133 vp = vp * vt + vc2; 134 vp *= vt; 135 136 vt *= vs; 137 vs -= vone; 138 vp = vp * vt + vt; 139 const float ve = (vp + vs) * valpha; 140 141 float vy = vx * vbeta; 142 if XNN_UNPREDICTABLE(vx < 0.0f) { 143 vy = ve; 144 } 145 146 *y = vy; 147 } 148 } 149