/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x8.c | 61 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() local 93 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8() 96 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8()
|
D | psimd-p5-x8-acc2.c | 62 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() local 94 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2() 97 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x8_acc2()
|
D | neon-p5-x8-acc2.c | 66 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() local 98 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 101 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | sse2-p5-x8.c | 61 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() local 93 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 96 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | neonfma-p5-x8-acc2.c | 65 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() local 97 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 100 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 65 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() local 97 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 100 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | sse2-p5-x8-acc2.c | 62 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() local 94 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 97 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 64 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() local 96 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 99 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | psimd-p5-x12.c | 64 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 104 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 108 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|
D | psimd-p5-x12-acc2.c | 65 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() local 105 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2() 109 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc2()
|
D | psimd-p5-x12-acc3.c | 66 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() local 106 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3() 110 psimd_f32 vf4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12_acc3()
|
D | sse2-p5-x12.c | 64 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() local 104 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 108 __m128 vf4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | neon-p5-x12.c | 68 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() local 108 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 112 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neonfma-p5-x12.c | 67 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() local 107 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 111 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neonfma-p5-x12-acc2.c | 68 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() local 108 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 112 float32x4_t vf4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12-acc2.c | 69 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() local 109 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 113 float32x4_t vf4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x8.c | 69 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 102 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 105 const psimd_f32 ve4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | neonfma-rr1-p5-div-x8.c | 66 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 94 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 97 float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | sse41-p5-div-x8.c | 69 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 101 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 104 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | psimd-p5-div-x12.c | 72 const psimd_f32 vs4567 = (psimd_f32) ((psimd_u32) vn4567 << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 113 vt4567 = psimd_mul_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 117 const psimd_f32 ve4567 = psimd_qfma_f32(vs4567, vt4567, vp4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 66 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 94 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 97 float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | sse2-p5-div-x8.c | 69 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() local 101 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 104 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 66 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 94 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 97 float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-div-x12.c | 69 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 104 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 108 float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | neonfma-rr1-p5-nr2fma-x8.c | 66 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 94 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 97 float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|