/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-wasmsimd-rr2-p5-div-x8.c | 43 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() local 46 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 55 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 85 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8()
|
D | vsigmoid-sse41-rr2-p5-div-x8.c | 43 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() local 46 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 55 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 85 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8()
|
D | vsigmoid-sse2-rr2-p5-div-x8.c | 43 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8() local 46 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8() 55 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8() 85 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x8.c | 42 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() local 45 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 75 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 96 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x12.c | 44 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12() local 48 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12() 60 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12() 100 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12()
|
D | vsigmoid-sse2-rr2-p5-div-x12.c | 44 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12() local 48 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12() 60 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12() 100 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12()
|
D | vsigmoid-sse41-rr2-p5-div-x12.c | 44 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12() local 48 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12() 60 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12() 100 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12()
|
D | vsigmoid-sse41-rr2-p5-div-x16.c | 45 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16() local 50 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16() 65 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16() 115 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x16.c | 45 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16() local 50 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16() 65 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16() 115 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x12.c | 43 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() local 47 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 89 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 117 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12()
|
D | vsigmoid-neonfma-rr1-p5-div-x8.c | 41 const float32x4_t vz0123 = vabsq_f32(vx0123); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8() local 44 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8() 53 float32x4_t vt0123 = vfmaq_f32(vz0123, vn0123, vln2); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | vsigmoid-sse2-rr2-p5-div-x16.c | 45 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16() local 50 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16() 65 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16() 115 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x20.c | 46 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() local 52 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 70 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 130 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
|
D | vsigmoid-neonfma-rr1-p5-nr2recps-x8.c | 41 const float32x4_t vz0123 = vabsq_f32(vx0123); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 44 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 53 float32x4_t vt0123 = vfmaq_f32(vz0123, vn0123, vln2); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | vsigmoid-sse41-rr2-p5-div-x20.c | 46 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() local 52 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 130 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x16.c | 44 const v128_t vz0123 = wasm_f32x4_abs(vx0123); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16() local 49 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16() 103 v128_t vt0123 = wasm_f32x4_add(vz0123, wasm_f32x4_mul(vn0123, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16() 138 vf0123 = wasm_v128_andnot(vf0123, wasm_f32x4_gt(vz0123, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x8.c | 43 const __m128 vz0123 = _mm_or_ps(vx0123, vsign_mask); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() local 46 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 97 __m128 vt0123 = _mm_add_ps(vz0123, _mm_mul_ps(vn0123, vminus_ln2_hi)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 115 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x8.c | 49 const v128_t vz0123 = wasm_f32x4_mul(vx0123, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 52 v128_t vn0123 = wasm_f32x4_add(wasm_f32x4_mul(vz0123, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 61 v128_t vt0123 = wasm_f32x4_add(wasm_f32x4_mul(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 62 const v128_t vsatm0123 = wasm_f32x4_le(vz0123, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 49 const v128_t vz0123 = wasm_f32x4_mul(vx0123, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local 52 v128_t vn0123 = wasm_f32x4_add(wasm_f32x4_mul(vz0123, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 80 v128_t vt0123 = wasm_f32x4_add(wasm_f32x4_mul(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 81 const v128_t vsatm0123 = wasm_f32x4_le(vz0123, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 50 const v128_t vz0123 = wasm_f32x4_mul(vx0123, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 54 v128_t vn0123 = wasm_f32x4_add(wasm_f32x4_mul(vz0123, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 66 v128_t vt0123 = wasm_f32x4_add(wasm_f32x4_mul(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 67 const v128_t vsatm0123 = wasm_f32x4_le(vz0123, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x8.c | 47 const float32x4_t vz0123 = vmaxq_f32(vmulq_f32(vx0123, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() local 50 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vlog2e); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 58 float32x4_t vt0123 = vfmaq_f32(vz0123, vn0123, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8()
|
D | velu-neon-rr2-p6-x8.c | 48 const float32x4_t vz0123 = vmaxq_f32(vmulq_f32(vx0123, vprescale), vsat_cutoff); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 51 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 59 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 51 const v128_t vz0123 = wasm_f32x4_mul(vx0123, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 56 v128_t vn0123 = wasm_f32x4_add(wasm_f32x4_mul(vz0123, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 71 v128_t vt0123 = wasm_f32x4_add(wasm_f32x4_mul(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 72 const v128_t vsatm0123 = wasm_f32x4_le(vz0123, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 50 const v128_t vz0123 = wasm_f32x4_mul(vx0123, vprescale); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() local 54 v128_t vn0123 = wasm_f32x4_add(wasm_f32x4_mul(vz0123, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 94 v128_t vt0123 = wasm_f32x4_add(wasm_f32x4_mul(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 95 const v128_t vsatm0123 = wasm_f32x4_le(vz0123, vsat_cutoff); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
D | velu-sse41-rr2-p6-x8.c | 49 const __m128 vz0123 = _mm_max_ps(vsat_cutoff, _mm_mul_ps(vx0123, vprescale)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 52 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 61 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|