/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x16-acc2.c | 57 const psimd_f32 vxCDEF = psimd_sub_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() local 63 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 83 psimd_f32 vtCDEF = psimd_qfma_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 130 vfCDEF = psimd_andnotmask_f32(vxCDEF < vdenorm_cutoff, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2()
|
D | psimd-p5-x16.c | 56 const psimd_f32 vxCDEF = psimd_sub_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() local 62 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 82 psimd_f32 vtCDEF = psimd_qfma_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 129 vfCDEF = psimd_andnotmask_f32(vxCDEF < vdenorm_cutoff, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16()
|
D | psimd-p5-x16-acc4.c | 59 const psimd_f32 vxCDEF = psimd_sub_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() local 65 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 85 psimd_f32 vtCDEF = psimd_qfma_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 132 vfCDEF = psimd_andnotmask_f32(vxCDEF < vdenorm_cutoff, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4()
|
D | neonfma-p5-x16-acc4.c | 57 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() local 68 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 88 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 135 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | psimd-p5-x20-acc2.c | 58 const psimd_f32 vxCDEF = psimd_sub_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() local 65 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 88 psimd_f32 vtCDEF = psimd_qfma_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 143 vfCDEF = psimd_andnotmask_f32(vxCDEF < vdenorm_cutoff, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2()
|
D | neon-p5-x16-acc2.c | 56 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() local 67 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 87 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 134 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
|
D | sse2-p5-x16.c | 56 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() local 62 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 82 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 129 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vxCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
|
D | neon-p5-x16.c | 55 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() local 66 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 86 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 133 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
D | neonfma-p5-x16.c | 54 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() local 65 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 85 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 132 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16()
|
D | sse2-p5-x16-acc4.c | 59 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() local 65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 85 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 132 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vxCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
|
D | neonfma-p5-x16-acc2.c | 55 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() local 66 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 86 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 133 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2()
|
D | sse2-p5-x16-acc2.c | 57 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() local 63 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 83 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 130 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vxCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
|
D | neon-p5-x16-acc4.c | 58 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() local 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 89 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 136 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
|
D | psimd-p5-x20.c | 57 const psimd_f32 vxCDEF = psimd_sub_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() local 64 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() 87 psimd_f32 vtCDEF = psimd_qfma_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() 142 vfCDEF = psimd_andnotmask_f32(vxCDEF < vdenorm_cutoff, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20()
|
D | neonfma-p5-x20-acc2.c | 56 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() local 68 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 91 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 146 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2()
|
D | sse2-p5-x20.c | 57 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() local 64 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 87 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 142 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vxCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
|
D | neonfma-p5-x20-acc5.c | 59 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() local 71 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 94 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 149 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
|
D | neon-p5-x20-acc5.c | 60 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() local 72 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 95 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 150 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
|
D | neonfma-p5-x20.c | 55 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 90 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 145 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
D | psimd-p5-x20-acc5.c | 61 const psimd_f32 vxCDEF = psimd_sub_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() local 68 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() 91 psimd_f32 vtCDEF = psimd_qfma_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() 146 vfCDEF = psimd_andnotmask_f32(vxCDEF < vdenorm_cutoff, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5()
|
D | sse2-p5-x20-acc2.c | 58 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() local 65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 88 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 143 vfCDEF = _mm_andnot_ps(_mm_cmplt_ps(vxCDEF, vdenorm_cutoff), vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
|
D | neon-p5-x20.c | 56 const float32x4_t vxCDEF = vsubq_f32(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() local 68 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 91 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 146 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcltq_f32(vxCDEF, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x16.c | 44 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 56 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 140 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcagtq_f32(vxCDEF, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 146 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-div-x20.c | 44 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 57 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 154 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcagtq_f32(vxCDEF, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 161 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 44 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 56 const float32x4_t vzCDEF = vabsq_f32(vxCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 158 …vfCDEF = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfCDEF), vcagtq_f32(vxCDEF, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 164 const uint32x4_t vmCDEF = vcltq_f32(vxCDEF, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|