Home
last modified time | relevance | path

Searched refs:vnGHIJ (Results 1 – 25 of 85) sorted by relevance

1234

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneon-rr2-p5-x20.c58 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() local
64 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
70 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
76 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
82 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
Dwasmsimd-rr2-p5-x20-acc2.c63 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxGHIJ, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() local
71 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
78 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
86 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
92 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
Dsse2-rr2-p5-x20-acc2.c63 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2() local
71 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
78 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
86 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
92 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2()
Dwasmsimd-rr2-p5-x20-acc5.c66 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxGHIJ, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5() local
74 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
81 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
89 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
95 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc5()
Dsse2-rr2-p5-x20-acc5.c66 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5() local
74 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
81 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
95 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc5()
Dneon-rr2-p5-x20-acc5.c62 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5() local
68 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
74 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
80 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
86 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc5()
Dsse2-rr2-p5-x20.c62 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20() local
70 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
77 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
85 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
91 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20()
Dneon-rr2-p5-x20-acc2.c59 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2() local
65 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
71 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
77 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
83 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20_acc2()
Dwasmsimd-rr2-p5-x20.c62 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxGHIJ, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20() local
70 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
77 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
85 v128_t vtGHIJ = wasm_f32x4_add(vxGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
91 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20()
Dneon-rr2-lut64-p2-x20.c57 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() local
63 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
77 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
118 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
124 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
130 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
Dneon-rr2-lut64-p2-x20-acc2.c58 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2() local
64 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2()
78 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2()
119 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2()
125 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2()
131 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc2()
Dneon-rr2-lut64-p2-x20-acc5.c61 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5() local
67 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5()
81 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnGHIJ), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5()
122 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5()
128 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5()
134 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20_acc5()
Dneonfma-rr1-p5-x20-acc5.c61 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5() local
67 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5()
73 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5()
79 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5()
Dneonfma-rr1-p5-x20.c57 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20() local
63 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20()
69 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20()
75 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20()
Dneonfma-rr1-p5-x20-acc2.c58 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2() local
64 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2()
70 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2()
76 float32x4_t vtGHIJ = vfmaq_f32(vxGHIJ, vnGHIJ, vminus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2()
/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-wasmsimd-rr2-p5-div-x20.c56 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() local
62 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
68 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
74 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
80 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
Dvsigmoid-wasmsimd-rr2-lut64-p2-div-x20.c55 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20() local
61 const v128_t veGHIJ = wasm_i32x4_shl(vnGHIJ, 17); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20()
67 const v128_t vidxGHIJ = wasm_i32x4_shl(wasm_v128_and(vnGHIJ, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20()
115 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20()
121 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20()
127 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20()
Dvsigmoid-sse41-rr2-p5-div-x20.c56 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() local
62 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
68 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
Dvsigmoid-sse2-rr2-p5-div-x20.c56 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() local
62 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
68 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
74 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
80 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
Dvsigmoid-sse41-rr2-p5-div-x24.c58 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() local
65 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
72 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
79 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
Dvsigmoid-neon-rr2-p5-nr2recps-x20.c55 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local
61 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
67 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
73 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
79 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vln2_lo); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
Dvsigmoid-wasmsimd-rr2-p5-div-x24.c58 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() local
65 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
72 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
79 v128_t vtGHIJ = wasm_f32x4_add(vzGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
86 vtGHIJ = wasm_f32x4_add(vtGHIJ, wasm_f32x4_mul(vnGHIJ, vln2_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-sse41-rr2-p6-x20.c62 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local
68 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
74 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
80 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
86 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
Dvelu-neon-rr2-p6-x20.c61 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local
71 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
72 vnGHIJ = vsubq_f32(vnGHIJ, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
78 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
84 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
Dvelu-wasmsimd-arm-rr2-p6-x20.c62 v128_t vnGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local
68 v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
74 vnGHIJ = wasm_f32x4_sub(vnGHIJ, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
80 v128_t vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
86 vtGHIJ = wasm_f32x4_add(wasm_f32x4_mul(vnGHIJ, vminus_ln2_lo), vtGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()

1234