/external/XNNPACK/src/f32-hswish/gen/ |
D | neonfma-x8.c | 34 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x8() local 37 float32x4_t vacc4567 = vfmaq_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 46 vacc4567 = vmulq_f32(vacc4567, vx4567); in xnn_f32_hswish_ukernel__neonfma_x8()
|
D | psimd-x8.c | 34 const psimd_f32 vx4567 = psimd_load_f32(x + 4); in xnn_f32_hswish_ukernel__psimd_x8() local 38 psimd_f32 vacc4567 = psimd_qfma_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 47 vacc4567 = psimd_mul_f32(vacc4567, vx4567); in xnn_f32_hswish_ukernel__psimd_x8()
|
D | neon-x8.c | 34 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x8() local 37 float32x4_t vacc4567 = vmlaq_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 46 vacc4567 = vmulq_f32(vacc4567, vx4567); in xnn_f32_hswish_ukernel__neon_x8()
|
D | sse-x8.c | 34 const __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_hswish_ukernel__sse_x8() local 38 __m128 vacc4567 = _mm_mul_ps(vx4567, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 50 vacc4567 = _mm_mul_ps(vacc4567, vx4567); in xnn_f32_hswish_ukernel__sse_x8()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x8.c | 52 const psimd_f32 vx4567 = psimd_sub_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local 56 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 70 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 101 vf4567 = psimd_andnotmask_f32(vx4567 < vdenorm_cutoff, vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
|
D | psimd-p5-x8-acc2.c | 53 const psimd_f32 vx4567 = psimd_sub_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local 57 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 71 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 102 vf4567 = psimd_andnotmask_f32(vx4567 < vdenorm_cutoff, vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
|
D | neon-p5-x8-acc2.c | 52 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 61 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 75 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 106 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | sse2-p5-x8.c | 52 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 56 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 70 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 101 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vx4567, vdenorm_cutoff), vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neonfma-p5-x8-acc2.c | 51 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 60 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 74 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 105 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 51 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 60 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 74 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 105 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8-acc2.c | 53 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 57 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 71 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 102 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vx4567, vdenorm_cutoff), vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 50 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local 59 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 73 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 104 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | psimd-p5-x12.c | 53 const psimd_f32 vx4567 = psimd_sub_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 58 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 75 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 114 vf4567 = psimd_andnotmask_f32(vx4567 < vdenorm_cutoff, vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 54 const psimd_f32 vx4567 = psimd_sub_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 59 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 76 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 115 vf4567 = psimd_andnotmask_f32(vx4567 < vdenorm_cutoff, vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 55 const psimd_f32 vx4567 = psimd_sub_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 60 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 77 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 116 vf4567 = psimd_andnotmask_f32(vx4567 < vdenorm_cutoff, vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 53 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 58 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 75 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 114 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vx4567, vdenorm_cutoff), vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 52 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 62 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 79 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 118 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 51 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 61 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 78 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 117 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 52 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 62 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 79 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 118 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 53 const float32x4_t vx4567 = vsubq_f32(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 63 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 80 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 119 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcltq_f32(vx4567, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x8.c | 42 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 52 const float32x4_t vz4567 = vabsq_f32(vx4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 110 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 114 const uint32x4_t vm4567 = vcltq_f32(vx4567, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 42 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 52 const float32x4_t vz4567 = vabsq_f32(vx4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 122 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 126 const uint32x4_t vm4567 = vcltq_f32(vx4567, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 42 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 52 const float32x4_t vz4567 = vabsq_f32(vx4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 122 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 126 const uint32x4_t vm4567 = vcltq_f32(vx4567, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-div-x12.c | 42 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 53 const float32x4_t vz4567 = vabsq_f32(vx4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 124 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 129 const uint32x4_t vm4567 = vcltq_f32(vx4567, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | neonfma-rr1-p5-nr2fma-x8.c | 42 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 52 const float32x4_t vz4567 = vabsq_f32(vx4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 122 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 126 const uint32x4_t vm4567 = vcltq_f32(vx4567, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|