/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x12.c | 73 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 114 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 118 const psimd_f32 ve89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neonfma-rr1-p5-div-x12.c | 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 105 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 109 float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | psimd-p5-div-x16.c | 76 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 125 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 130 const psimd_f32 ve89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 73 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 113 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 117 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | neonfma-rr1-p5-div-x16.c | 73 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 115 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 120 float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 105 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 109 float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 105 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 109 float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 72 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 112 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 116 float32x4_t ve89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x12.c | 65 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 105 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 109 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 66 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 106 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 110 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 67 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 107 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 111 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 65 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 105 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 109 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 69 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 109 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 113 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 68 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 108 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 112 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 69 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 109 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 113 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 110 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 114 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | psimd-p5-x16-acc2.c | 69 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() local 117 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2() 122 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2()
|
D | sse2-p5-x12-acc3.c | 67 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() local 107 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 111 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
|
D | psimd-p5-x16.c | 68 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() local 116 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16() 121 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16()
|
D | psimd-p5-x16-acc4.c | 71 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() local 119 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4() 124 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc4()
|
D | neonfma-p5-x12-acc3.c | 70 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() local 110 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 114 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3()
|
D | neon-p5-x12-acc3.c | 71 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() local 111 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 115 float32x4_t vf89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
|
D | sse2-p5-x12-acc2.c | 66 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() local 106 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 110 __m128 vf89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
|
D | neonfma-p5-x16-acc4.c | 74 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() local 122 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 127 float32x4_t vf89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | psimd-p5-x20-acc2.c | 72 const psimd_f32 vs89AB = (psimd_f32) ((psimd_u32) vn89AB << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() local 128 vt89AB = psimd_mul_f32(vt89AB, vs89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 134 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2()
|