/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x8.c | 70 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local 73 vt4567 = psimd_qfma_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 77 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 80 vp4567 = psimd_qfma_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 83 vp4567 = psimd_qfma_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 86 vp4567 = psimd_qfma_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 93 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 96 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
|
D | psimd-p5-x8-acc2.c | 71 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local 74 vt4567 = psimd_qfma_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 78 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 81 vp4567 = psimd_qfma_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 84 vp4567 = psimd_qfma_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 87 vp4567 = psimd_qfma_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 94 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 97 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
|
D | neon-p5-x8-acc2.c | 75 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 78 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 82 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 85 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 88 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 91 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 98 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 101 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | sse2-p5-x8.c | 70 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 73 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 77 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 80 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 83 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 86 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 93 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 96 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neonfma-p5-x8-acc2.c | 74 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 77 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 81 float32x4_t vp4567 = vfmaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 84 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 87 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 90 vp4567 = vfmaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 97 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 100 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 74 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 77 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 81 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 84 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 87 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 90 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 97 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 100 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8-acc2.c | 71 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 74 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 78 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 81 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 84 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 87 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 94 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 97 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 73 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local 76 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 80 float32x4_t vp4567 = vfmaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 83 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 86 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 89 vp4567 = vfmaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 96 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 99 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | psimd-p5-x12.c | 75 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 79 vt4567 = psimd_qfma_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 84 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 88 vp4567 = psimd_qfma_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 92 vp4567 = psimd_qfma_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 96 vp4567 = psimd_qfma_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 104 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 108 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 76 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 80 vt4567 = psimd_qfma_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 85 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 89 vp4567 = psimd_qfma_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 93 vp4567 = psimd_qfma_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 97 vp4567 = psimd_qfma_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 105 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 109 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 77 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 81 vt4567 = psimd_qfma_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 86 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 90 vp4567 = psimd_qfma_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 94 vp4567 = psimd_qfma_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 98 vp4567 = psimd_qfma_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 106 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 110 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 75 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 79 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 84 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 92 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 96 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 104 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 108 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 79 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 83 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 88 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 92 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 96 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 100 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 108 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 112 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 78 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 82 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 87 float32x4_t vp4567 = vfmaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 91 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 95 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 99 vp4567 = vfmaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 107 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 111 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 79 float32x4_t vt4567 = vfmaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 83 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 88 float32x4_t vp4567 = vfmaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 92 vp4567 = vfmaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 96 vp4567 = vfmaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 100 vp4567 = vfmaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 108 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 112 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 80 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 84 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 89 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 93 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 97 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 101 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 109 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 113 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | psimd-p5-x16-acc2.c | 81 psimd_f32 vt4567 = psimd_qfma_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() local 86 vt4567 = psimd_qfma_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 92 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 97 vp4567 = psimd_qfma_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 102 vp4567 = psimd_qfma_f32(vc2, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 107 vp4567 = psimd_qfma_f32(vc1, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 116 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 121 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2()
|
D | sse2-p5-x12-acc3.c | 77 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() local 81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 86 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 90 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 94 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 98 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 106 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 110 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x8.c | 78 psimd_f32 vt4567 = psimd_qfma_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 81 vt4567 = psimd_qfma_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 86 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vt4567, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 89 vp4567 = psimd_qfma_f32(vc3, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 92 vp4567 = psimd_qfma_f32(vc2, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 95 vp4567 = psimd_qfma_f32(vc1, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 102 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 105 const psimd_f32 ve4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | sse41-p5-div-x8.c | 78 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 85 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 91 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 94 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 101 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 104 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | psimd-p5-div-x12.c | 83 psimd_f32 vt4567 = psimd_qfma_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 87 vt4567 = psimd_qfma_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 93 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vt4567, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 97 vp4567 = psimd_qfma_f32(vc3, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 101 vp4567 = psimd_qfma_f32(vc2, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 105 vp4567 = psimd_qfma_f32(vc1, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 113 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 117 const psimd_f32 ve4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | sse2-p5-div-x8.c | 78 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() local 81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 85 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 91 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 94 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 101 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 104 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
|
D | neon-rr2-p5-nr2recps-x8.c | 77 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 80 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 84 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 87 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 90 vp4567 = vmlaq_f32(vc2, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 93 vp4567 = vmlaq_f32(vc1, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 100 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 103 float32x4_t ve4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | psimd-p5-div-x16.c | 88 psimd_f32 vt4567 = psimd_qfma_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 93 vt4567 = psimd_qfma_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 100 psimd_f32 vp4567 = psimd_qfma_f32(vc4, vt4567, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 105 vp4567 = psimd_qfma_f32(vc3, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 110 vp4567 = psimd_qfma_f32(vc2, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 115 vp4567 = psimd_qfma_f32(vc1, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 124 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 129 const psimd_f32 ve4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 87 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 92 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 96 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 100 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc2); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 104 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc1); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 112 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 116 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|