/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x8.c | 46 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() local 49 const v128_t ve4567 = wasm_i32x4_shl(vn4567, 17); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 52 const v128_t vidx4567 = wasm_i32x4_shl(wasm_v128_and(vn4567, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 73 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 76 v128_t vt4567 = wasm_f32x4_add(vz4567, wasm_f32x4_mul(vn4567, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 79 vt4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vn4567, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x8.c | 47 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() local 50 const v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 53 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 56 v128_t vt4567 = wasm_f32x4_add(vz4567, wasm_f32x4_mul(vn4567, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 59 vt4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vn4567, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8()
|
D | vsigmoid-neon-rr2-lut2048-p1-nr2recps-x8.c | 45 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 48 const int32x4_t ve4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 12); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 51 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn4567), vindex_… in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 73 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 76 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 79 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | vsigmoid-sse41-rr2-p5-div-x8.c | 47 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() local 50 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 53 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 56 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 59 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8()
|
D | vsigmoid-neon-rr2-lut64-p2-nr2recps-x8.c | 45 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local 48 const int32x4_t ve4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 17); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 52 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn4567), vindex_… in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 74 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 77 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 80 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x12.c | 48 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() local 52 const v128_t ve4567 = wasm_i32x4_shl(vn4567, 17); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 56 const v128_t vidx4567 = wasm_i32x4_shl(wasm_v128_and(vn4567, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 86 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 90 v128_t vt4567 = wasm_f32x4_add(vz4567, wasm_f32x4_mul(vn4567, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 94 vt4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vn4567, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12()
|
D | vsigmoid-neonfma-rr1-lut64-p2-div-x8.c | 44 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 47 const int32x4_t ve4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 17); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 51 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn4567), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 73 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 76 float32x4_t vt4567 = vfmaq_f32(vz4567, vn4567, vln2); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-rr2-p5-x8-acc2.c | 50 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8_acc2() local 53 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8_acc2() 56 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8_acc2() 59 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8_acc2() 62 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8_acc2()
|
D | sse2-rr2-p5-x8-acc2.c | 54 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8_acc2() local 59 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8_acc2() 63 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8_acc2() 68 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8_acc2() 71 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8_acc2()
|
D | wasmsimd-rr2-p5-x8-acc2.c | 54 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vx4567, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8_acc2() local 59 const v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8_acc2() 63 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8_acc2() 68 v128_t vt4567 = wasm_f32x4_add(vx4567, wasm_f32x4_mul(vn4567, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8_acc2() 71 vt4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vn4567, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8_acc2()
|
D | wasmsimd-rr2-p5-x8.c | 53 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vx4567, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8() local 58 const v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8() 62 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8() 67 v128_t vt4567 = wasm_f32x4_add(vx4567, wasm_f32x4_mul(vn4567, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8() 70 vt4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vn4567, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x8()
|
D | neon-rr2-p5-x8.c | 49 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8() local 52 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8() 55 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8() 58 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8() 61 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x8()
|
D | sse2-rr2-p5-x8.c | 53 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8() local 58 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8() 62 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8() 67 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8() 70 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x8()
|
D | neon-rr2-lut64-p2-x8-acc2.c | 49 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() local 52 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() 57 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn4567), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() 77 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() 80 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() 83 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2()
|
D | neon-rr2-lut64-p2-x8.c | 48 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() local 51 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() 56 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn4567), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() 76 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() 79 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() 82 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8()
|
D | neon-rr2-p5-x12-acc3.c | 53 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc3() local 57 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc3() 61 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc3() 65 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc3() 69 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x12_acc3()
|
D | wasmsimd-rr2-p5-x12.c | 55 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vx4567, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12() local 61 const v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12() 66 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12() 72 v128_t vt4567 = wasm_f32x4_add(vx4567, wasm_f32x4_mul(vn4567, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12() 76 vt4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vn4567, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x12()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-arm-rr2-lut16-p3-x8.c | 53 v128_t vn4567 = wasm_f32x4_add(wasm_f32x4_mul(vz4567, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() local 57 const v128_t vidx4567 = wasm_i32x4_shl(wasm_v128_and(vn4567, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 58 const v128_t ven4567 = wasm_i32x4_shl(vn4567, 19); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 77 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 81 v128_t vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 84 vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
|
D | velu-neon-rr2-p6-x8.c | 52 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 56 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 57 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 60 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 63 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 53 v128_t vn4567 = wasm_f32x4_add(wasm_f32x4_mul(vz4567, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local 57 const v128_t vidx4567 = wasm_i32x4_shl(wasm_v128_and(vn4567, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 58 const v128_t ven4567 = wasm_i32x4_shl(vn4567, 19); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 77 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 82 v128_t vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 87 vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|
D | velu-neon-rr2-lut16-p3-x8.c | 52 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() local 56 …4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn4567), vindex_mask), 2)… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 57 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 76 vn4567 = vsubq_f32(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 80 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 83 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-p6-x8.c | 53 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 56 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 59 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 62 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 65 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x8.c | 53 v128_t vn4567 = wasm_f32x4_add(wasm_f32x4_mul(vz4567, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 56 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 59 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 63 v128_t vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 68 vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-wasmsimd-arm-rr2-p6-x8.c | 53 v128_t vn4567 = wasm_f32x4_add(wasm_f32x4_mul(vz4567, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() local 56 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 59 vn4567 = wasm_f32x4_sub(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 62 v128_t vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 65 vt4567 = wasm_f32x4_add(wasm_f32x4_mul(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8()
|
D | velu-sse2-rr2-p6-x8.c | 53 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() local 56 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 59 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 62 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 65 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8()
|