/external/XNNPACK/src/f32-bilinear/gen/ |
D | neonfma-c8.c | 61 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c8() local 66 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8() local 72 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c8() 76 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c8() 79 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8() 96 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c8() local 99 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8() local 103 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c8() 106 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c8() 108 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8() [all …]
|
D | neonfma-c4.c | 55 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c4() local 58 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4() local 62 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c4() 65 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c4() 67 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4() 82 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__neonfma_c4() local 85 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4() local 89 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neonfma_c4() 92 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__neonfma_c4() 94 float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4()
|
D | psimd-c8.c | 61 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c8() local 66 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c8() 69 const psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c8() 89 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c8() local 92 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c8() 94 const psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c8() 108 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c8() local 111 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c8() 113 psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c8()
|
D | neon-c8.c | 56 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8() local 61 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c8() 64 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8() 79 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8() local 82 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c8() 84 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8() 98 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8() local 101 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c8() 103 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
|
D | sse-c8.c | 63 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c8() local 68 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8() 71 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c8() 91 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c8() local 94 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8() 96 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c8() 110 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c8() local 113 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8() 115 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c8()
|
D | neon-c4.c | 50 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4() local 53 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c4() 55 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c4() 68 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4() local 71 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__neon_c4() 73 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c4()
|
D | psimd-c4.c | 55 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c4() local 58 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c4() 60 const psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c4() 74 const psimd_f32 vt0123 = psimd_qfma_f32(vtl0123, vtd0123, valphah); in xnn_f32_bilinear_ukernel__psimd_c4() local 77 const psimd_f32 vd0123 = psimd_sub_f32(vb0123, vt0123); in xnn_f32_bilinear_ukernel__psimd_c4() 79 psimd_f32 vo0123 = psimd_qfma_f32(vt0123, vd0123, valphav); in xnn_f32_bilinear_ukernel__psimd_c4()
|
D | sse-c4.c | 57 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c4() local 60 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4() 62 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c4() 76 const __m128 vt0123 = _mm_add_ps(vtl0123, _mm_mul_ps(vtd0123, valphah)); in xnn_f32_bilinear_ukernel__sse_c4() local 79 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4() 81 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_bilinear_ukernel__sse_c4()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x4.c | 64 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() local 66 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 69 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 71 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 73 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 75 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 81 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 83 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
|
D | sse2-p5-x4.c | 64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local 66 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 69 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 73 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 75 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 81 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 83 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
|
D | psimd-p5-x8.c | 69 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local 72 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 76 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 79 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 82 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 85 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 92 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 95 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
|
D | psimd-p5-x8-acc2.c | 70 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local 73 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 77 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 80 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 83 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 86 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 93 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 96 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
|
D | neon-p5-x8-acc2.c | 74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 77 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 81 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 84 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 87 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 90 vp0123 = vmlaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 97 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 100 float32x4_t vf0123 = vmlaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | sse2-p5-x8.c | 69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 72 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 76 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 82 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 85 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 92 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 95 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neonfma-p5-x8-acc2.c | 73 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 76 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 80 float32x4_t vp0123 = vfmaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 83 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 86 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 89 vp0123 = vfmaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 96 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 99 float32x4_t vf0123 = vfmaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 76 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 80 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 83 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 86 vp0123 = vmlaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 89 vp0123 = vmlaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 96 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 99 float32x4_t vf0123 = vmlaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8-acc2.c | 70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 73 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 77 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 80 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 83 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 86 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 93 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 96 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 72 float32x4_t vt0123 = vfmaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local 75 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 79 float32x4_t vp0123 = vfmaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 82 vp0123 = vfmaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 85 vp0123 = vfmaq_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 88 vp0123 = vfmaq_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 95 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 98 float32x4_t vf0123 = vfmaq_f32(vs0123, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | psimd-p5-x12.c | 74 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 78 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 83 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 87 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 91 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 95 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 103 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 107 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 75 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 79 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 84 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 88 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 92 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 96 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 104 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 108 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 76 psimd_f32 vt0123 = psimd_qfma_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 80 vt0123 = psimd_qfma_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 85 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 89 vp0123 = psimd_qfma_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 93 vp0123 = psimd_qfma_f32(vc2, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 97 vp0123 = psimd_qfma_f32(vc1, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 105 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 109 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x8.c | 77 psimd_f32 vt0123 = psimd_qfma_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 80 vt0123 = psimd_qfma_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 85 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vt0123, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 88 vp0123 = psimd_qfma_f32(vc3, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 91 vp0123 = psimd_qfma_f32(vc2, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 94 vp0123 = psimd_qfma_f32(vc1, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 101 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 104 const psimd_f32 ve0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | sse41-p5-div-x8.c | 77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 90 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 93 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 100 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 103 __m128 ve0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | psimd-p5-div-x12.c | 82 psimd_f32 vt0123 = psimd_qfma_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 86 vt0123 = psimd_qfma_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 92 psimd_f32 vp0123 = psimd_qfma_f32(vc4, vt0123, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 96 vp0123 = psimd_qfma_f32(vc3, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 100 vp0123 = psimd_qfma_f32(vc2, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 104 vp0123 = psimd_qfma_f32(vc1, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 112 vt0123 = psimd_mul_f32(vt0123, vs0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 116 const psimd_f32 ve0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
/external/XNNPACK/src/f32-bilinear/ |
D | neon.c.in | 103 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); 106 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); 110 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); 113 const float32x4_t vd0123 = vsubq_f32(vb0123, vt0123); 117 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); 119 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); 122 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); 138 const float32x4_t vt0123 = vfmaq_f32(vtl0123, vtd0123, valphah); 141 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); 145 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); [all …]
|