/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-p5-x16.c | 66 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() local 73 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 79 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 86 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 91 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
D | wasmsimd-p5-x16.c | 62 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxCDEF, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() local 69 const v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 75 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 82 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16() 87 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16()
|
D | neonfma-p5-x16.c | 65 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() local 72 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 78 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 85 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 90 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16()
|
D | sse2-p5-x16-acc4.c | 65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() local 72 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 78 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 85 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 90 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
|
D | neonfma-p5-x16-acc2.c | 66 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() local 73 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 79 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 86 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 91 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2()
|
D | neon-p5-x16-acc2.c | 67 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() local 74 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 80 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 87 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 92 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
|
D | wasmsimd-p5-x16-acc2.c | 63 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxCDEF, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() local 70 const v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 76 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 83 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2() 88 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc2()
|
D | sse2-p5-x16-acc2.c | 63 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() local 70 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 76 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 83 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 88 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
|
D | wasmsimd-p5-x16-acc4.c | 65 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxCDEF, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() local 72 const v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 78 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 85 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 90 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4()
|
D | neonfma-p5-x16-acc4.c | 68 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() local 75 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 81 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 88 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 93 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | neon-p5-x16-acc4.c | 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() local 76 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 82 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 89 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 94 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
|
D | sse2-p5-x16.c | 62 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() local 69 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 75 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 82 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 87 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
|
D | neonfma-lut64-p2-x16-acc4.c | 69 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() local 84 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 96 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnCDEF), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 132 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 139 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 144 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
|
D | neon-lut64-p2-x16-acc4.c | 70 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() local 85 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 97 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnCDEF), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 133 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 140 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 145 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
|
D | neon-lut64-p2-x16.c | 67 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() local 82 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 94 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnCDEF), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 130 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 137 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 142 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
|
D | neon-lut64-p2-x16-acc2.c | 68 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() local 83 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 95 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnCDEF), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 131 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 138 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 143 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
|
D | neonfma-lut64-p2-x16-acc2.c | 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() local 82 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 94 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnCDEF), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 130 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 137 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 142 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
|
D | neonfma-lut64-p2-x16.c | 66 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() local 81 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 93 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vnCDEF), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 129 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 136 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 141 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
|
D | neonfma-p5-x20.c | 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local 75 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 82 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 90 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 96 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
D | wasmsimd-p5-x20-acc5.c | 68 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vxCDEF, vlog2e)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() local 76 const v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() 83 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() 91 v128_t vtCDEF = wasm_f32x4_add(vxCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_hi)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() 97 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vminus_ln2_lo)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5()
|
D | neon-p5-x20-acc5.c | 72 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() local 80 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 87 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 95 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 101 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
|
D | neonfma-p5-x20-acc5.c | 71 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() local 79 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 86 vnCDEF = vsubq_f32(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 94 float32x4_t vtCDEF = vfmaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 100 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x16.c | 53 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 58 const v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 63 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 68 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 73 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | wasmsimd-lut64-p2-div-x16.c | 52 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() local 57 const v128_t veCDEF = wasm_i32x4_shl(vnCDEF, 17); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 62 const v128_t vidxCDEF = wasm_i32x4_shl(wasm_v128_and(vnCDEF, vindex_mask), 2); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 101 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 106 v128_t vtCDEF = wasm_f32x4_add(vzCDEF, wasm_f32x4_mul(vnCDEF, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 111 vtCDEF = wasm_f32x4_add(vtCDEF, wasm_f32x4_mul(vnCDEF, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-arm-rr2-p6-x16.c | 60 v128_t vnCDEF = wasm_f32x4_add(wasm_f32x4_mul(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 65 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 70 vnCDEF = wasm_f32x4_sub(vnCDEF, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 75 v128_t vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 80 vtCDEF = wasm_f32x4_add(wasm_f32x4_mul(vnCDEF, vminus_ln2_lo), vtCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|