Home
last modified time | relevance | path

Searched refs:vt0123 (Results 1 – 25 of 169) sorted by relevance

1234567

/external/XNNPACK/src/f32-bilinear/gen/
Dneonfma-c8.c61 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c8() local
66 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8() local
72 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c8()
76 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c8()
79 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8()
96 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c8() local
99 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8() local
103 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c8()
106 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c8()
108 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8()
[all …]
Dneonfma-c4.c55 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c4() local
58 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4() local
62 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c4()
65 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c4()
67 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4()
82 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c4() local
85 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4() local
89 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c4()
92 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c4()
94 float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4()
Dpsimd-c8.c61 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c8() local
66 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c8()
69 const psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c8()
89 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c8() local
92 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c8()
94 const psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c8()
108 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c8() local
111 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c8()
113 psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c8()
Dneon-c8.c56 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8() local
61 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c8()
64 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
79 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8() local
82 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c8()
84 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
98 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8() local
101 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c8()
103 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
Dsse-c8.c63 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c8() local
68 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8()
71 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c8()
91 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c8() local
94 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8()
96 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c8()
110 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c8() local
113 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8()
115 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c8()
Dneon-c4.c50 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4() local
53 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c4()
55 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c4()
68 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4() local
71 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c4()
73 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c4()
Dpsimd-c4.c55 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c4() local
58 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c4()
60 const psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c4()
74 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c4() local
77 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c4()
79 psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c4()
Dsse-c4.c57 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c4() local
60 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4()
62 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c4()
76 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c4() local
79 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4()
81 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c4()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dpsimd-p5-x4.c64 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() local
66 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
69 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
71 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
73 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
75 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
81 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
83 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
Dsse2-p5-x4.c64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local
66 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
69 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
73 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
75 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
81 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
83 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
Dpsimd-p5-x8.c69 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local
72 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
76 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
79 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
82 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
85 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
92 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
95 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
Dpsimd-p5-x8-acc2.c70 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local
73 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
77 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
80 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
83 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
86 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
93 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
96 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
Dneon-p5-x8-acc2.c74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local
77 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
81 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
84 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
87 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
90 vp0123 = vmlaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
97 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
100 float32x4_t vf0123 = vmlaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
Dsse2-p5-x8.c69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local
72 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
76 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
82 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
85 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
92 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
95 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
Dneonfma-p5-x8-acc2.c73 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local
76 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
80 float32x4_t vp0123 = vfmaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
83 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
86 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
89 vp0123 = vfmaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
96 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
99 float32x4_t vf0123 = vfmaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
Dneon-p5-x8.c73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local
76 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
80 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
83 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
86 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
89 vp0123 = vmlaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
96 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
99 float32x4_t vf0123 = vmlaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
Dsse2-p5-x8-acc2.c70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local
73 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
77 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
80 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
83 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
86 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
93 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
96 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
Dneonfma-p5-x8.c72 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local
75 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
79 float32x4_t vp0123 = vfmaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
82 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
85 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
88 vp0123 = vfmaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
95 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
98 float32x4_t vf0123 = vfmaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
Dpsimd-p5-x12.c74 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local
78 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
83 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
87 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
91 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
95 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
103 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
107 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
Dpsimd-p5-x12-acc2.c75 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local
79 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
84 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
88 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
92 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
96 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
104 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
108 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
Dpsimd-p5-x12-acc3.c76 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local
80 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
85 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
89 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
93 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
97 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
105 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
109 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
/external/XNNPACK/src/f32-sigmoid/gen/
Dpsimd-p5-div-x8.c77 psimd_f32 vt0123 = psimd_qfma_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local
80 vt0123 = psimd_qfma_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
85 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vt0123, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
88 vp0123 = psimd_qfma_f32(vc3, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
91 vp0123 = psimd_qfma_f32(vc2, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
94 vp0123 = psimd_qfma_f32(vc1, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
101 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
104 const psimd_f32 ve0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
Dsse41-p5-div-x8.c77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local
80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
90 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
93 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
100 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
103 __m128 ve0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
Dpsimd-p5-div-x12.c82 psimd_f32 vt0123 = psimd_qfma_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local
86 vt0123 = psimd_qfma_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
92 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vt0123, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
96 vp0123 = psimd_qfma_f32(vc3, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
100 vp0123 = psimd_qfma_f32(vc2, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
104 vp0123 = psimd_qfma_f32(vc1, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
112 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
116 const psimd_f32 ve0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
/external/XNNPACK/src/f32-bilinear/
Dneon.c.in103 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah);
106 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0);
110 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0);
113 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123);
117 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav);
119 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1);
122 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1);
138 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah);
141 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0);
145 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0);
[all …]

1234567