Home
last modified time | relevance | path

Searched refs:vx0123 (Results 1 – 25 of 169) sorted by relevance

1234567

/external/XNNPACK/src/f32-hswish/gen/
Dpsimd-x4.c33 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x4() local
36 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
42 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x4()
48 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x4() local
50 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
53 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x4()
58 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x4() local
59 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
62 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x4()
Dneon-x4.c33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x4() local
35 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
41 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x4()
46 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x4() local
47 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
50 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x4()
54 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neon_x4() local
55 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
58 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x4()
Dneonfma-x4.c33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x4() local
35 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
41 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x4()
46 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x4() local
47 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
50 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x4()
54 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neonfma_x4() local
55 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
58 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x4()
Dsse-x4.c33 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x4() local
36 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
44 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x4()
50 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x4() local
52 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
56 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x4()
61 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x4() local
62 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
66 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x4()
Dneonfma-x8.c33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x8() local
36 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
45 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x8()
52 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neonfma_x8() local
53 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
56 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x8()
60 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neonfma_x8() local
61 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
64 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neonfma_x8()
Dpsimd-x8.c33 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x8() local
37 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
46 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x8()
54 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x8() local
56 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
59 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x8()
64 const psimd_f32 vx0123 = psimd_load_f32(x); in xnn_f32_hswish_ukernel__psimd_x8() local
65 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
68 vacc0123 = psimd_mul_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__psimd_x8()
Dneon-x8.c33 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x8() local
36 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
45 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x8()
52 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_hswish_ukernel__neon_x8() local
53 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
56 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x8()
60 const float32x4_t vx0123 = vld1q_f32(x); in xnn_f32_hswish_ukernel__neon_x8() local
61 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
64 vacc0123 = vmulq_f32(vacc0123, vx0123); in xnn_f32_hswish_ukernel__neon_x8()
Dsse-x8.c33 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x8() local
37 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
49 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x8()
57 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x8() local
59 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
63 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x8()
68 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_hswish_ukernel__sse_x8() local
69 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
73 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_hswish_ukernel__sse_x8()
/external/XNNPACK/src/f32-hswish/
Dneon.c.in55 const float32x4_t vx0123 = vld1q_f32(x); x += 4;
57 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth);
59 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth);
62 vacc0123 = vmulq_f32(vacc0123, vx0123);
66 const float32x4_t vx0123 = vld1q_f32(x);
68 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth);
70 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth);
73 vacc0123 = vmulq_f32(vacc0123, vx0123);
Dsse.c.in59 const __m128 vx0123 = _mm_loadu_ps(x); variable
61 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth);
65 vacc0123 = _mm_mul_ps(vacc0123, vx0123);
70 const __m128 vx0123 = _mm_loadu_ps(x); variable
71 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth);
75 vacc0123 = _mm_mul_ps(vacc0123, vx0123);
Dpsimd.c.in56 const psimd_f32 vx0123 = psimd_load_f32(x); variable
58 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth);
61 vacc0123 = psimd_mul_f32(vacc0123, vx0123);
66 const psimd_f32 vx0123 = psimd_load_f32(x); variable
67 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth);
70 vacc0123 = psimd_mul_f32(vacc0123, vx0123);
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dpsimd-p5-x4.c50 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() local
53 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
64 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
87 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
Dsse2-p5-x4.c50 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local
53 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
87 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
Dpsimd-p5-x8.c51 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local
55 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
69 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
100 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
Dpsimd-p5-x8-acc2.c52 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local
56 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
70 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
101 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
Dneon-p5-x8-acc2.c51 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local
60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
105 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
Dsse2-p5-x8.c51 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local
55 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
100 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
Dneonfma-p5-x8-acc2.c50 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local
59 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
73 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
104 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
Dneon-p5-x8.c50 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local
59 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
104 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
Dsse2-p5-x8-acc2.c52 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local
56 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
101 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
Dneonfma-p5-x8.c49 const float32x4_t vx0123 = vsubq_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local
58 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
72 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
103 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcltq_f32(vx0123, vdenorm_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
Dpsimd-p5-x12.c52 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local
57 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
74 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
113 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
Dpsimd-p5-x12-acc2.c53 const psimd_f32 vx0123 = psimd_sub_f32(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local
58 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
75 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
114 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-p5-div-x8.c41 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local
51 const float32x4_t vz0123 = vabsq_f32(vx0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
109 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
113 const uint32x4_t vm0123 = vcltq_f32(vx0123, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
Dneonfma-rr1-p5-nr2recps-x8.c41 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local
51 const float32x4_t vz0123 = vabsq_f32(vx0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
121 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
125 const uint32x4_t vm0123 = vcltq_f32(vx0123, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()

1234567