/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x24.c | 59 v128_t vnKLMN = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzKLMN, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() local 66 const v128_t vsKLMN = wasm_i32x4_shl(vnKLMN, 23); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 73 vnKLMN = wasm_f32x4_sub(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 80 v128_t vtKLMN = wasm_f32x4_add(vzKLMN, wasm_f32x4_mul(vnKLMN, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 87 vtKLMN = wasm_f32x4_add(vtKLMN, wasm_f32x4_mul(vnKLMN, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | sse41-p5-div-x24.c | 59 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 66 const __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 73 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 80 __m128 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 87 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | wasmsimd-lut64-p2-div-x24.c | 58 v128_t vnKLMN = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzKLMN, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() local 65 const v128_t veKLMN = wasm_i32x4_shl(vnKLMN, 17); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 72 const v128_t vidxKLMN = wasm_i32x4_shl(wasm_v128_and(vnKLMN, vindex_mask), 2); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 129 vnKLMN = wasm_f32x4_sub(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 136 v128_t vtKLMN = wasm_f32x4_add(vzKLMN, wasm_f32x4_mul(vnKLMN, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 143 vtKLMN = wasm_f32x4_add(vtKLMN, wasm_f32x4_mul(vnKLMN, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24()
|
D | sse2-p5-div-x24.c | 59 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() local 66 const __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 73 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 80 __m128 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 87 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 57 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 64 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 12); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 71 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 129 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 136 float32x4_t vtKLMN = vmlaq_f32(vzKLMN, vnKLMN, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 143 vtKLMN = vmlaq_f32(vtKLMN, vnKLMN, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 57 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 64 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 17); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 72 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 130 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 137 float32x4_t vtKLMN = vmlaq_f32(vzKLMN, vnKLMN, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 144 vtKLMN = vmlaq_f32(vtKLMN, vnKLMN, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | neon-rr2-p5-nr2recps-x24.c | 58 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 65 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 72 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 79 float32x4_t vtKLMN = vmlaq_f32(vzKLMN, vnKLMN, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 86 vtKLMN = vmlaq_f32(vtKLMN, vnKLMN, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-div-x24.c | 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 71 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 78 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | neonfma-rr1-lut64-p2-div-x24.c | 56 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 63 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 17); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 71 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 129 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 136 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 56 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 63 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 12); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 70 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 128 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 135 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | neonfma-rr1-p5-nr2recps-x24.c | 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 71 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 78 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 71 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 78 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 56 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 63 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 17); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 71 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 129 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 136 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 56 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 63 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 17); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 71 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 129 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 136 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 56 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() local 63 const int32x4_t veKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 12); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 70 …const uint64x2_t vidxKLMN = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 128 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 135 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neon-rr2-p6-x24.c | 65 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 77 float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 78 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 85 float32x4_t vtKLMN = vmlaq_f32(vzKLMN, vnKLMN, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 92 vtKLMN = vmlaq_f32(vtKLMN, vnKLMN, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x24.c | 66 v128_t vnKLMN = wasm_f32x4_add(wasm_f32x4_mul(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() local 78 const v128_t vidxKLMN = wasm_i32x4_shl(wasm_v128_and(vnKLMN, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 79 const v128_t venKLMN = wasm_i32x4_shl(vnKLMN, 19); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 134 vnKLMN = wasm_f32x4_sub(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 142 v128_t vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 149 vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-p6-x24.c | 66 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local 73 __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 80 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 87 __m128 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 94 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x24.c | 66 v128_t vnKLMN = wasm_f32x4_add(wasm_f32x4_mul(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local 73 v128_t vsKLMN = wasm_i32x4_shl(vnKLMN, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 80 vnKLMN = wasm_f32x4_sub(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 87 v128_t vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 94 vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 66 v128_t vnKLMN = wasm_f32x4_add(wasm_f32x4_mul(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local 78 const v128_t vidxKLMN = wasm_i32x4_shl(wasm_v128_and(vnKLMN, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 79 const v128_t venKLMN = wasm_i32x4_shl(vnKLMN, 19); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 134 vnKLMN = wasm_f32x4_sub(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 147 v128_t vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 160 vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
|
D | velu-neon-rr2-lut16-p3-x24.c | 65 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() local 77 …KLMN = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_mask), 2)… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 78 const int32x4_t venKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 133 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 141 float32x4_t vtKLMN = vmlaq_f32(vzKLMN, vnKLMN, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 148 vtKLMN = vmlaq_f32(vtKLMN, vnKLMN, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 66 v128_t vnKLMN = wasm_f32x4_add(wasm_f32x4_mul(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 73 v128_t vsKLMN = wasm_i32x4_shl(vnKLMN, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 80 vnKLMN = wasm_f32x4_sub(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 92 v128_t vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 105 vtKLMN = wasm_f32x4_add(wasm_f32x4_mul(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x24.c | 66 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local 73 __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 80 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 87 __m128 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_hi), vzKLMN); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 94 vtKLMN = _mm_add_ps(_mm_mul_ps(vnKLMN, vminus_ln2_lo), vtKLMN); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x24.c | 64 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vlog2e); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 76 float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 77 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 84 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
D | velu-neonfma-rr1-lut16-p3-x24.c | 64 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vlog2e); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() local 76 …KLMN = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnKLMN), vindex_mask), 2)… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 77 const int32x4_t venKLMN = vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 132 vnKLMN = vsubq_f32(vnKLMN, vmagic_bias); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 140 float32x4_t vtKLMN = vfmaq_f32(vzKLMN, vnKLMN, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
|