/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neon-frac-p9-p10-nr1recps-x16.c | 50 float32x4_t vn89AB = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() local 58 vn89AB = vminq_f32(vn89AB, vsigmoid_maxinput); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 59 vn89AB = vmaxq_f32(vn89AB, vsigmoid_mininput); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 66 const float32x4_t vn89AB_sq = vmulq_f32(vn89AB, vn89AB); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 92 vnum89AB = vmulq_f32(vn89AB, vnum89AB); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
|
D | psimd-p5-div-x12.c | 67 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 73 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 78 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 84 psimd_f32 vt89AB = psimd_qfma_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 88 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | psimd-p5-div-x16.c | 69 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 76 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 82 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 89 psimd_f32 vt89AB = psimd_qfma_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 94 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 73 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 88 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x12.c | 59 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 65 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 70 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 76 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 80 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 60 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 66 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 71 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 77 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 81 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 61 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 67 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 72 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 78 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 82 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 59 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 65 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 70 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 76 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 80 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 63 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 69 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 74 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 80 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 84 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 62 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 68 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 73 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 79 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 83 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 63 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 69 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 74 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 80 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 84 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 64 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 75 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 81 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 85 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | neonfma-lut64-p2-x12-acc3.c | 65 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() local 79 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 88 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 117 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 123 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 127 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc3.c | 66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() local 80 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 89 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 118 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 124 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 128 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc2.c | 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() local 79 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 88 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 117 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 123 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 127 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
|
D | neon-lut64-p2-x12.c | 64 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() local 78 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 87 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 116 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 122 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 126 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
|
D | neonfma-lut64-p2-x12-acc2.c | 64 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() local 78 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 87 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 116 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 122 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 126 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
|
D | neonfma-lut64-p2-x12.c | 63 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() local 77 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 86 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 115 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 121 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 125 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
|
D | psimd-p5-x16-acc2.c | 62 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() local 69 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 75 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 82 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 87 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2()
|
D | sse2-p5-x12-acc3.c | 61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() local 67 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 72 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 78 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 82 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
|
D | psimd-p5-x16.c | 61 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() local 68 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 74 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 81 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 86 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16()
|
D | psimd-p5-x16-acc4.c | 64 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() local 71 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 77 vn89AB = psimd_sub_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 84 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 89 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4()
|
D | neonfma-p5-x12-acc3.c | 64 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() local 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 75 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 81 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 85 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3()
|
D | neon-p5-x12-acc3.c | 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() local 71 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 76 vn89AB = vsubq_f32(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 82 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 86 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
|
D | sse2-p5-x12-acc2.c | 60 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() local 66 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 71 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 77 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 81 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
|