/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-rr2-p5-x20.c | 58 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() local 64 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 70 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 76 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 82 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
|
D | wasmsimd-rr2-p5-x20-acc2.c | 63 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxGHIJ, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() local 71 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 78 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 86 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 92 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
|
D | sse2-rr2-p5-x20-acc2.c | 63 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() local 71 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 78 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 86 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() 92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
|
D | wasmsimd-rr2-p5-x20-acc5.c | 66 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxGHIJ, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() local 74 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 81 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 89 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() 95 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
|
D | sse2-rr2-p5-x20-acc5.c | 66 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() local 74 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 81 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() 95 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
|
D | neon-rr2-p5-x20-acc5.c | 62 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() local 68 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 74 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 80 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() 86 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
|
D | sse2-rr2-p5-x20.c | 62 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() local 70 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 77 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() 91 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
|
D | neon-rr2-p5-x20-acc2.c | 59 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() local 65 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 71 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 77 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() 83 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
|
D | wasmsimd-rr2-p5-x20.c | 62 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxGHIJ, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() local 70 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 77 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 85 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() 91 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
|
D | neon-rr2-lut64-p2-x20.c | 57 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() local 63 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() 77 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() 118 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() 124 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() 130 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
|
D | neon-rr2-lut64-p2-x20-acc2.c | 58 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2() local 64 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2() 78 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2() 119 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2() 125 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2() 131 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2()
|
D | neon-rr2-lut64-p2-x20-acc5.c | 61 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5() local 67 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5() 81 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5() 122 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5() 128 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5() 134 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5()
|
D | neonfma-rr1-p5-x20-acc5.c | 61 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5() local 67 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5() 73 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5() 79 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5()
|
D | neonfma-rr1-p5-x20.c | 57 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20() local 63 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20() 69 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20() 75 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20()
|
D | neonfma-rr1-p5-x20-acc2.c | 58 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2() local 64 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2() 70 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2() 76 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2()
|
/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-wasmsimd-rr2-p5-div-x20.c | 56 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() local 62 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 68 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 74 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 80 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x20.c | 55 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20() local 61 const v128_t veGHIJ = wasm_i32x4_shl(vnGHIJ, 17); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20() 67 const v128_t vidxGHIJ = wasm_i32x4_shl(wasm_v128_and(vnGHIJ, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20() 115 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20() 121 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20() 127 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20()
|
D | vsigmoid-sse41-rr2-p5-div-x20.c | 56 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() local 62 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 68 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
|
D | vsigmoid-sse2-rr2-p5-div-x20.c | 56 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() local 62 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 68 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
|
D | vsigmoid-sse41-rr2-p5-div-x24.c | 58 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() local 65 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 72 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 79 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
|
D | vsigmoid-neon-rr2-p5-nr2recps-x20.c | 55 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 61 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 67 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 73 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 79 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x24.c | 58 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() local 65 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() 72 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() 79 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() 86 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-p6-x20.c | 62 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 68 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 74 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 80 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 61 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 71 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 72 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 78 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 84 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 62 v128_t vnGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 68 v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 74 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 80 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 86 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|