/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-p5-div-x12.c | 46 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 50 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 62 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 102 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | wasmsimd-p5-div-x12.c | 46 const v128_t vz89AB = wasm_f32x4_abs(vx89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() local 50 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 62 v128_t vt89AB = wasm_f32x4_add(vz89AB, wasm_f32x4_mul(vn89AB, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 102 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
|
D | wasmsimd-p5-div-x16.c | 47 const v128_t vz89AB = wasm_f32x4_abs(vx89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 52 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 67 v128_t vt89AB = wasm_f32x4_add(vz89AB, wasm_f32x4_mul(vn89AB, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 117 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | sse2-p5-div-x12.c | 46 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local 50 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 62 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 102 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | wasmsimd-lut64-p2-div-x12.c | 45 const v128_t vz89AB = wasm_f32x4_abs(vx89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() local 49 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 91 v128_t vt89AB = wasm_f32x4_add(vz89AB, wasm_f32x4_mul(vn89AB, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 119 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
|
D | sse41-p5-div-x16.c | 47 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 52 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 67 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | wasmsimd-p5-div-x20.c | 48 const v128_t vz89AB = wasm_f32x4_abs(vx89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 54 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 72 v128_t vt89AB = wasm_f32x4_add(vz89AB, wasm_f32x4_mul(vn89AB, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 132 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | sse2-p5-div-x16.c | 47 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local 52 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 67 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
|
D | sse41-p5-div-x20.c | 48 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 54 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 72 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 132 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | neonfma-rr1-p5-div-x12.c | 44 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 48 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 60 float32x4_t vt89AB = vfmaq_f32(vz89AB, vn89AB, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | wasmsimd-lut64-p2-div-x16.c | 46 const v128_t vz89AB = wasm_f32x4_abs(vx89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() local 51 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 105 v128_t vt89AB = wasm_f32x4_add(vz89AB, wasm_f32x4_mul(vn89AB, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 140 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16()
|
D | sse2-p5-div-x20.c | 48 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() local 54 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 72 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 132 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
|
D | wasmsimd-p5-div-x24.c | 49 const v128_t vz89AB = wasm_f32x4_abs(vx89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() local 56 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 77 v128_t vt89AB = wasm_f32x4_add(vz89AB, wasm_f32x4_mul(vn89AB, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 147 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | sse41-p5-div-x24.c | 49 const __m128 vz89AB = _mm_or_ps(vx89AB, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 56 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 77 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 147 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | neon-rr2-p5-nr2recps-x12.c | 45 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 49 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 61 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 44 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 48 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 60 float32x4_t vt89AB = vfmaq_f32(vz89AB, vn89AB, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-div-x16.c | 45 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 50 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 65 float32x4_t vt89AB = vfmaq_f32(vz89AB, vn89AB, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 44 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 48 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 60 float32x4_t vt89AB = vfmaq_f32(vz89AB, vn89AB, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x12.c | 53 const v128_t vz89AB = wasm_f32x4_mul(vx89AB, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 57 v128_t vn89AB = wasm_f32x4_add(wasm_f32x4_mul(vz89AB, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 71 v128_t vt89AB = wasm_f32x4_add(wasm_f32x4_mul(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 72 const v128_t vsatm89AB = wasm_f32x4_le(vz89AB, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x12.c | 51 const float32x4_t vz89AB = vmaxq_f32(vmulq_f32(vx89AB, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 55 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vlog2e); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 66 float32x4_t vt89AB = vfmaq_f32(vz89AB, vn89AB, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 53 const v128_t vz89AB = wasm_f32x4_mul(vx89AB, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() local 57 v128_t vn89AB = wasm_f32x4_add(wasm_f32x4_mul(vz89AB, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 99 v128_t vt89AB = wasm_f32x4_add(wasm_f32x4_mul(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 100 const v128_t vsatm89AB = wasm_f32x4_le(vz89AB, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 54 const v128_t vz89AB = wasm_f32x4_mul(vx89AB, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 59 v128_t vn89AB = wasm_f32x4_add(wasm_f32x4_mul(vz89AB, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 76 v128_t vt89AB = wasm_f32x4_add(wasm_f32x4_mul(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 77 const v128_t vsatm89AB = wasm_f32x4_le(vz89AB, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 55 const v128_t vz89AB = wasm_f32x4_mul(vx89AB, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 61 v128_t vn89AB = wasm_f32x4_add(wasm_f32x4_mul(vz89AB, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 81 v128_t vt89AB = wasm_f32x4_add(wasm_f32x4_mul(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 82 const v128_t vsatm89AB = wasm_f32x4_le(vz89AB, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x12.c | 52 const float32x4_t vz89AB = vmaxq_f32(vmulq_f32(vx89AB, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 56 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 67 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 53 const v128_t vz89AB = wasm_f32x4_max(wasm_f32x4_mul(vx89AB, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 57 v128_t vn89AB = wasm_f32x4_add(wasm_f32x4_mul(vz89AB, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 69 v128_t vt89AB = wasm_f32x4_add(wasm_f32x4_mul(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|