/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neon-frac-p9-p10-nr1recps-x16.c | 48 float32x4_t vn0123 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() local 54 vn0123 = vminq_f32(vn0123, vsigmoid_maxinput); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 55 vn0123 = vmaxq_f32(vn0123, vsigmoid_mininput); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 64 const float32x4_t vn0123_sq = vmulq_f32(vn0123, vn0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 90 vnum0123 = vmulq_f32(vn0123, vnum0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 147 float32x4_t vn0123 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() local 149 vn0123 = vminq_f32(vn0123, vsigmoid_maxinput); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 150 vn0123 = vmaxq_f32(vn0123, vsigmoid_mininput); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 152 const float32x4_t vn0123_sq = vmulq_f32(vn0123, vn0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 160 vnum0123 = vmulq_f32(vn0123, vnum0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() [all …]
|
D | psimd-p5-div-x8.c | 63 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 68 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 72 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 77 psimd_f32 vt0123 = psimd_qfma_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 80 vt0123 = psimd_qfma_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | sse41-p5-div-x8.c | 63 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 68 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 72 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | psimd-p5-div-x12.c | 65 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 71 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 76 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 82 psimd_f32 vt0123 = psimd_qfma_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 86 vt0123 = psimd_qfma_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e_x2048); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 76 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 80 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 104 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 109 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_o2048_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 112 vt0123 = vmlaq_f32(vt0123, vn0123, vln2_o2048_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
/external/XNNPACK/src/f32-sigmoid/ |
D | neon-frac-p9-p10-nr1recps.c.in | 109 float32x4_t vn0123 = vld1q_f32(x); x += 4; variable 111 vn0123 = vminq_f32(vn0123, vsigmoid_maxinput); 112 vn0123 = vmaxq_f32(vn0123, vsigmoid_mininput); 114 const float32x4_t vn0123_sq = vmulq_f32(vn0123, vn0123); 122 vnum0123 = vmulq_f32(vn0123, vnum0123); 142 float32x4_t vn0123 = vld1q_f32(x); variable 144 vn0123 = vminq_f32(vn0123, vsigmoid_maxinput); 145 vn0123 = vmaxq_f32(vn0123, vsigmoid_mininput); 147 const float32x4_t vn0123_sq = vmulq_f32(vn0123, vn0123); 155 vnum0123 = vmulq_f32(vn0123, vnum0123);
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x4.c | 53 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() local 57 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 60 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 64 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 66 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
|
D | sse2-p5-x4.c | 53 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local 57 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 60 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 66 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
|
D | psimd-p5-x8.c | 55 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local 60 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 64 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 69 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 72 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
|
D | psimd-p5-x8-acc2.c | 56 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local 61 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 65 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 70 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 73 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
|
D | neon-lut64-p2-x8-acc2.c | 61 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 74 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 78 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 102 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 107 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 110 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
D | neonfma-lut64-p2-x8-acc2.c | 60 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local 73 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 77 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 101 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 106 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 109 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
|
D | neon-lut64-p2-x8.c | 60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local 73 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 77 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 101 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 106 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 109 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
|
D | neonfma-lut64-p2-x8.c | 59 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local 72 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 76 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 100 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 105 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 108 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
|
D | neon-p5-x8-acc2.c | 60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 65 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 69 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 77 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | sse2-p5-x8.c | 55 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 60 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 64 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 72 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neonfma-p5-x8-acc2.c | 59 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 64 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 68 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 73 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 76 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 59 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 64 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 68 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 76 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8-acc2.c | 56 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 61 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 65 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 73 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 58 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local 63 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 67 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 72 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 75 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | psimd-p5-x12.c | 57 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 63 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 68 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 74 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 78 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 58 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 64 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 69 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 75 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 79 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 59 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 65 const psimd_f32 vs0123 = (psimd_f32) ((psimd_u32) vn0123 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 70 vn0123 = psimd_sub_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 76 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 80 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 57 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 63 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 68 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 74 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 78 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 61 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 67 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 72 vn0123 = vsubq_f32(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 78 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 82 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|