/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x20-acc2.c | 66 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() local 74 const psimd_f32 vsGHIJ = (psimd_f32) ((psimd_u32) vnGHIJ << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 81 vnGHIJ = psimd_sub_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 89 psimd_f32 vtGHIJ = psimd_qfma_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2() 95 vtGHIJ = psimd_qfma_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc2()
|
D | psimd-p5-x20.c | 65 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() local 73 const psimd_f32 vsGHIJ = (psimd_f32) ((psimd_u32) vnGHIJ << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() 80 vnGHIJ = psimd_sub_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() 88 psimd_f32 vtGHIJ = psimd_qfma_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20() 94 vtGHIJ = psimd_qfma_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20()
|
D | neonfma-p5-x20-acc2.c | 69 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() local 77 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 84 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 92 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 98 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2()
|
D | sse2-p5-x20.c | 65 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() local 73 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 80 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 88 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 94 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
|
D | neonfma-p5-x20-acc5.c | 72 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() local 80 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 87 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 95 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 101 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
|
D | neon-p5-x20-acc5.c | 73 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() local 81 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 88 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 96 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 102 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
|
D | neonfma-p5-x20.c | 68 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local 76 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 83 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 91 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 97 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
D | psimd-p5-x20-acc5.c | 69 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() local 77 const psimd_f32 vsGHIJ = (psimd_f32) ((psimd_u32) vnGHIJ << 23); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() 84 vnGHIJ = psimd_sub_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() 92 psimd_f32 vtGHIJ = psimd_qfma_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5() 98 vtGHIJ = psimd_qfma_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x20_acc5()
|
D | sse2-p5-x20-acc2.c | 66 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() local 74 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 81 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 95 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
|
D | neon-p5-x20.c | 69 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() local 77 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 84 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 92 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 98 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
|
D | neon-p5-x20-acc2.c | 70 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() local 78 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 85 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 93 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 99 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
|
D | sse2-p5-x20-acc5.c | 69 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() local 77 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 84 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 92 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 98 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
|
D | neonfma-lut64-p2-x20.c | 69 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() local 85 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 100 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 143 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 151 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 157 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
|
D | neonfma-lut64-p2-x20-acc5.c | 73 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() local 89 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 104 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 147 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 155 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 161 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
|
D | neonfma-lut64-p2-x20-acc2.c | 70 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() local 86 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 101 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 144 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 152 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 158 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20-acc2.c | 71 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() local 87 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 102 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 145 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 153 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 159 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20.c | 70 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() local 86 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 101 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 144 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 152 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 158 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
|
D | neon-lut64-p2-x20-acc5.c | 74 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() local 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 105 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 148 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 156 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 162 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x20.c | 73 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() local 81 const psimd_f32 vsGHIJ = (psimd_f32) ((psimd_u32) vnGHIJ << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 88 vnGHIJ = psimd_sub_f32(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 96 psimd_f32 vtGHIJ = psimd_qfma_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 102 vtGHIJ = psimd_qfma_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20()
|
D | psimd-p5-div-x24.c | 75 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() local 84 const psimd_f32 vsGHIJ = (psimd_f32) ((psimd_u32) vnGHIJ << 23); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 92 vnGHIJ = psimd_sub_f32(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 101 psimd_f32 vtGHIJ = psimd_qfma_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 108 vtGHIJ = psimd_qfma_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
|
D | sse41-p5-div-x20.c | 73 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 81 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 88 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 96 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 102 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | sse2-p5-div-x20.c | 73 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() local 81 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 88 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 96 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 102 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
|
D | neonfma-rr1-p5-div-x20.c | 70 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 78 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 85 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 92 float32x4_t vtGHIJ = vfmaq_f32(vzGHIJ, vnGHIJ, vln2); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | sse41-p5-div-x24.c | 75 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 84 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 92 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 101 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 108 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | neon-rr2-p5-nr2recps-x20.c | 72 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 80 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 87 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 95 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 101 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|