/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x16.c | 48 const v128_t vzCDEF = wasm_f32x4_abs(vxCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 53 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 68 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 118 vfCDEF = wasm_v128_andnot(vfCDEF, wasm_f32x4_gt(vzCDEF, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | sse41-p5-div-x16.c | 48 const __m128 vzCDEF = _mm_or_ps(vxCDEF, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 53 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 68 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 118 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vzCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | wasmsimd-p5-div-x20.c | 49 const v128_t vzCDEF = wasm_f32x4_abs(vxCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 55 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 73 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 133 vfCDEF = wasm_v128_andnot(vfCDEF, wasm_f32x4_gt(vzCDEF, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | sse2-p5-div-x16.c | 48 const __m128 vzCDEF = _mm_or_ps(vxCDEF, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local 53 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 68 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 118 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vzCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
|
D | sse41-p5-div-x20.c | 49 const __m128 vzCDEF = _mm_or_ps(vxCDEF, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 55 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 73 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 133 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vzCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | wasmsimd-lut64-p2-div-x16.c | 47 const v128_t vzCDEF = wasm_f32x4_abs(vxCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() local 52 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 106 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 141 vfCDEF = wasm_v128_andnot(vfCDEF, wasm_f32x4_gt(vzCDEF, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16()
|
D | sse2-p5-div-x20.c | 49 const __m128 vzCDEF = _mm_or_ps(vxCDEF, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() local 55 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 73 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 133 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vzCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
|
D | wasmsimd-p5-div-x24.c | 50 const v128_t vzCDEF = wasm_f32x4_abs(vxCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() local 57 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 78 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 148 vfCDEF = wasm_v128_andnot(vfCDEF, wasm_f32x4_gt(vzCDEF, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | sse41-p5-div-x24.c | 50 const __m128 vzCDEF = _mm_or_ps(vxCDEF, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 57 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 78 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 148 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vzCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | neonfma-rr1-p5-div-x16.c | 46 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 51 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 66 float32x4_t vtCDEF = vfmaq_f32(vzCDEF, vnCDEF, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | wasmsimd-lut64-p2-div-x20.c | 48 const v128_t vzCDEF = wasm_f32x4_abs(vxCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() local 54 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 120 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 162 vfCDEF = wasm_v128_andnot(vfCDEF, wasm_f32x4_gt(vzCDEF, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20()
|
D | sse2-p5-div-x24.c | 50 const __m128 vzCDEF = _mm_or_ps(vxCDEF, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() local 57 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 78 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 148 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vzCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
|
D | neon-rr2-p5-nr2recps-x16.c | 47 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 52 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 67 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | neonfma-rr1-p5-div-x20.c | 47 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 53 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 71 float32x4_t vtCDEF = vfmaq_f32(vzCDEF, vnCDEF, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 46 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 51 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 66 float32x4_t vtCDEF = vfmaq_f32(vzCDEF, vnCDEF, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x16.c | 55 const v128_t vzCDEF = wasm_f32x4_mul(vxCDEF, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 60 v128_t vnCDEF = wasm_f32x4_add(wasm_f32x4_mul(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 78 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 79 const v128_t vsatmCDEF = wasm_f32x4_le(vzCDEF, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 56 const v128_t vzCDEF = wasm_f32x4_mul(vxCDEF, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 62 v128_t vnCDEF = wasm_f32x4_add(wasm_f32x4_mul(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 83 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 84 const v128_t vsatmCDEF = wasm_f32x4_le(vzCDEF, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 55 const v128_t vzCDEF = wasm_f32x4_mul(vxCDEF, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() local 60 v128_t vnCDEF = wasm_f32x4_add(wasm_f32x4_mul(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 115 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 116 const v128_t vsatmCDEF = wasm_f32x4_le(vzCDEF, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16()
|
D | velu-neonfma-rr1-p6-x16.c | 53 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 58 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vlog2e); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 72 float32x4_t vtCDEF = vfmaq_f32(vzCDEF, vnCDEF, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 55 const v128_t vzCDEF = wasm_f32x4_max(wasm_f32x4_mul(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 60 v128_t vnCDEF = wasm_f32x4_add(wasm_f32x4_mul(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 75 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x16.c | 55 const __m128 vzCDEF = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxCDEF, vprescale)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 60 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 75 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x16.c | 54 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 59 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 73 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 57 const v128_t vzCDEF = wasm_f32x4_mul(vxCDEF, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 64 v128_t vnCDEF = wasm_f32x4_add(wasm_f32x4_mul(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 88 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 89 const v128_t vsatmCDEF = wasm_f32x4_le(vzCDEF, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x20.c | 54 const float32x4_t vzCDEF = vmaxq_f32(vmulq_f32(vxCDEF, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 60 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vlog2e); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 77 float32x4_t vtCDEF = vfmaq_f32(vzCDEF, vnCDEF, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-sse2-rr2-p6-x16.c | 55 const __m128 vzCDEF = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vxCDEF, vprescale)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 60 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 75 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|