/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x12.c | 84 psimd_f32 vt89AB = psimd_qfma_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 88 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 94 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vt89AB, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 98 vp89AB = psimd_qfma_f32(vc3, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 102 vp89AB = psimd_qfma_f32(vc2, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 106 vp89AB = psimd_qfma_f32(vc1, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 114 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 118 const psimd_f32 ve89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | psimd-p5-div-x16.c | 89 psimd_f32 vt89AB = psimd_qfma_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 94 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 101 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vt89AB, vc5); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 106 vp89AB = psimd_qfma_f32(vc3, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 111 vp89AB = psimd_qfma_f32(vc2, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 116 vp89AB = psimd_qfma_f32(vc1, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 125 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 130 const psimd_f32 ve89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 88 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 93 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc5, vt89AB), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 97 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 101 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 105 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc1); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 113 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 117 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 83 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 87 vt89AB = vmlaq_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 92 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 96 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 100 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 104 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 112 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 116 float32x4_t ve89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x12.c | 76 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 80 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 85 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 89 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 93 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 97 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 105 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 109 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 77 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 81 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 86 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 90 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 94 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 98 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 106 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 110 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 78 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 82 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 87 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 91 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 95 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 99 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 107 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 111 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 76 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 80 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 85 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc5, vt89AB), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 89 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 93 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 97 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 105 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 109 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 80 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 84 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 89 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 93 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 97 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 101 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 109 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 113 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 79 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 83 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 88 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 92 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 96 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 100 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 108 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 112 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 80 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 84 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 89 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 93 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 97 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 101 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 109 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 113 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 81 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 85 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 90 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 94 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 98 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 102 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 110 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 114 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | psimd-p5-x16-acc2.c | 82 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() local 87 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 93 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 98 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 103 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 108 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 117 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 122 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2()
|
D | sse2-p5-x12-acc3.c | 78 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() local 82 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 87 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc5, vt89AB), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 91 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 95 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 99 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 107 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 111 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
|
D | psimd-p5-x16.c | 81 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() local 86 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 92 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 97 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 102 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 107 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 116 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 121 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16()
|
D | psimd-p5-x16-acc4.c | 84 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() local 89 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 95 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 100 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 105 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 110 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 119 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 124 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4()
|
D | neonfma-p5-x12-acc3.c | 81 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() local 85 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 90 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 94 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 98 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 102 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 110 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 114 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3()
|
D | neon-p5-x12-acc3.c | 82 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() local 86 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 91 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 95 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 99 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 103 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 111 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 115 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
|
D | sse2-p5-x12-acc2.c | 77 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() local 81 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 86 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc5, vt89AB), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 90 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 94 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 98 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 106 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 110 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
|
D | neonfma-p5-x16-acc4.c | 87 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() local 92 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 98 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 103 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 108 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 113 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 122 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 127 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | psimd-p5-x20-acc2.c | 87 psimd_f32 vt89AB = psimd_qfma_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() local 93 vt89AB = psimd_qfma_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 100 psimd_f32 vp89AB = psimd_qfma_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 106 vp89AB = psimd_qfma_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 112 vp89AB = psimd_qfma_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 118 vp89AB = psimd_qfma_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 128 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 134 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2()
|
D | neon-p5-x16-acc2.c | 86 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() local 91 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 97 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 102 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 107 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 112 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 121 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 126 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
|
D | sse2-p5-x16.c | 81 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() local 86 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 92 __m128 vp89AB = _mm_add_ps(_mm_mul_ps(vc5, vt89AB), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 97 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 102 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 107 vp89AB = _mm_add_ps(_mm_mul_ps(vp89AB, vt89AB), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 116 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 121 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
|
D | neon-p5-x16.c | 85 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() local 90 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 96 float32x4_t vp89AB = vmlaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 101 vp89AB = vmlaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 106 vp89AB = vmlaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 111 vp89AB = vmlaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 120 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 125 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
D | neonfma-p5-x16.c | 84 float32x4_t vt89AB = vfmaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() local 89 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 95 float32x4_t vp89AB = vfmaq_f32(vc4, vc5, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 100 vp89AB = vfmaq_f32(vc3, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 105 vp89AB = vfmaq_f32(vc2, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 110 vp89AB = vfmaq_f32(vc1, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 119 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 124 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16()
|