/external/XNNPACK/src/f32-hswish/gen/ |
D | psimd-x4.c | 33 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x4() local 36 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4() 42 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x4() 48 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x4() local 50 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4() 53 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x4() 58 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x4() local 59 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4() 62 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x4()
|
D | neon-x4.c | 33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x4() local 35 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4() 41 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x4() 46 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x4() local 47 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4() 50 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x4() 54 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neon_x4() local 55 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4() 58 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x4()
|
D | neonfma-x4.c | 33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x4() local 35 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4() 41 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x4() 46 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x4() local 47 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4() 50 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x4() 54 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neonfma_x4() local 55 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4() 58 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x4()
|
D | sse-x4.c | 33 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x4() local 36 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4() 44 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x4() 50 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x4() local 52 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4() 56 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x4() 61 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x4() local 62 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4() 66 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x4()
|
D | neonfma-x8.c | 33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x8() local 36 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 45 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x8() 52 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x8() local 53 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 56 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x8() 60 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neonfma_x8() local 61 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 64 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x8()
|
D | psimd-x8.c | 33 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x8() local 37 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 46 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x8() 54 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x8() local 56 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 59 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x8() 64 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x8() local 65 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 68 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x8()
|
D | neon-x8.c | 33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x8() local 36 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 45 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x8() 52 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x8() local 53 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 56 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x8() 60 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neon_x8() local 61 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 64 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x8()
|
D | sse-x8.c | 33 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x8() local 37 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 49 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x8() 57 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x8() local 59 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 63 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x8() 68 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x8() local 69 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 73 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x8()
|
/external/XNNPACK/src/f32-hswish/ |
D | neon.c.in | 55 const float32x4_t vx0123 = vld1q_f32(x); x += 4; 57 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); 59 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); 62 vacc0123 = vmulq_f32(vacc0123, vx0123); 66 const float32x4_t vx0123 = vld1q_f32(x); 68 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); 70 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); 73 vacc0123 = vmulq_f32(vacc0123, vx0123);
|
D | sse.c.in | 59 const __m128 vx0123 = _mm_loadu_ps(x); variable 61 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); 65 vacc0123 = _mm_mul_ps(vacc0123, vx0123); 70 const __m128 vx0123 = _mm_loadu_ps(x); variable 71 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); 75 vacc0123 = _mm_mul_ps(vacc0123, vx0123);
|
D | psimd.c.in | 56 const psimd_f32 vx0123 = psimd_load_f32(x); variable 58 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); 61 vacc0123 = psimd_mul_f32(vacc0123, vx0123); 66 const psimd_f32 vx0123 = psimd_load_f32(x); variable 67 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); 70 vacc0123 = psimd_mul_f32(vacc0123, vx0123);
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x4.c | 50 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() local 53 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 64 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 87 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
|
D | sse2-p5-x4.c | 50 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local 53 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 87 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
|
D | psimd-p5-x8.c | 51 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local 55 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 69 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 100 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
|
D | psimd-p5-x8-acc2.c | 52 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local 56 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 70 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 101 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
|
D | neon-p5-x8-acc2.c | 51 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 105 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | sse2-p5-x8.c | 51 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 55 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 100 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neonfma-p5-x8-acc2.c | 50 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 59 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 73 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 104 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 50 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 59 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 104 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8-acc2.c | 52 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 56 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 101 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 49 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local 58 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 72 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 103 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | psimd-p5-x12.c | 52 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 57 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 74 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 113 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 53 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 58 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 75 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 114 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x8.c | 41 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 51 const float32x4_t vz0123 = vabsq_f32(vx0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 109 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 113 const uint32x4_t vm0123 = vcltq_f32(vx0123, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 41 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 51 const float32x4_t vz0123 = vabsq_f32(vx0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 121 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 125 const uint32x4_t vm0123 = vcltq_f32(vx0123, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|