/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x12.c | 54 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 59 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 76 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 115 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 55 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 60 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 77 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 116 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 56 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 61 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 78 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 117 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 54 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 59 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 76 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 115 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vx89AB, vdenorm_cutoff), vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 53 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 63 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 80 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 119 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 52 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 62 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 79 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 118 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 53 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 63 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 80 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 119 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 54 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 64 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 81 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 120 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | psimd-p5-x16-acc2.c | 56 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() local 62 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 82 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 129 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2()
|
D | sse2-p5-x12-acc3.c | 56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() local 61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 78 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vx89AB, vdenorm_cutoff), vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
|
D | psimd-p5-x16.c | 55 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() local 61 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 81 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 128 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16()
|
D | psimd-p5-x16-acc4.c | 58 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() local 64 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 84 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 131 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4()
|
D | neonfma-p5-x12-acc3.c | 54 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() local 64 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 81 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 120 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3()
|
D | neon-p5-x12-acc3.c | 55 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() local 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 82 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 121 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
|
D | sse2-p5-x12-acc2.c | 55 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() local 60 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 77 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 116 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vx89AB, vdenorm_cutoff), vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
|
D | neonfma-p5-x16-acc4.c | 56 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() local 67 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 87 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 134 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | psimd-p5-x20-acc2.c | 57 const psimd_f32 vx89AB = psimd_sub_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() local 64 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 87 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 142 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2()
|
D | neon-p5-x16-acc2.c | 55 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() local 66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 86 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 133 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
|
D | sse2-p5-x16.c | 55 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() local 61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 81 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 128 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vx89AB, vdenorm_cutoff), vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
|
D | neon-p5-x16.c | 54 const float32x4_t vx89AB = vsubq_f32(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() local 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 85 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 132 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcltq_f32(vx89AB, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x12.c | 43 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 54 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 125 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 130 const uint32x4_t vm89AB = vcltq_f32(vx89AB, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | neonfma-rr1-p5-div-x16.c | 43 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 55 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 139 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 145 const uint32x4_t vm89AB = vcltq_f32(vx89AB, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 43 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 54 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 140 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 145 const uint32x4_t vm89AB = vcltq_f32(vx89AB, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 43 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 54 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 140 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 145 const uint32x4_t vm89AB = vcltq_f32(vx89AB, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 45 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 56 const float32x4_t vz89AB = vabsq_f32(vx89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 147 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 152 const uint32x4_t vm89AB = vcltq_f32(vx89AB, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|