Home
last modified time | relevance | path

Searched refs:vn (Results 1 – 25 of 923) sorted by relevance

12345678910>>...37

/external/vixl/src/aarch64/
Dassembler-aarch64.cc315 const VRegister& vn, in NEONTable() argument
320 VIXL_ASSERT(vn.Is16B()); in NEONTable()
322 Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd)); in NEONTable()
327 const VRegister& vn, in tbl() argument
330 NEONTable(vd, vn, vm, NEON_TBL_1v); in tbl()
335 const VRegister& vn, in tbl() argument
340 VIXL_ASSERT(AreSameFormat(vn, vn2)); in tbl()
341 VIXL_ASSERT(AreConsecutive(vn, vn2)); in tbl()
342 NEONTable(vd, vn, vm, NEON_TBL_2v); in tbl()
347 const VRegister& vn, in tbl() argument
[all …]
Dassembler-aarch64.h565 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
569 const VRegister& vn,
575 const VRegister& vn,
582 const VRegister& vn,
589 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
593 const VRegister& vn,
599 const VRegister& vn,
606 const VRegister& vn,
2213 void fmov(const Register& rd, const VRegister& vn, int index);
2216 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Davx512f-rr2-lut32-p2-perm2-scalef-div-x16.c52 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16() local
53 const __m512 vl = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
54 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
56 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2_hi, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
57 vt = _mm512_fmadd_ps(vn, vminus_ln2_lo, vt); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
63 const __m512 ve = _mm512_scalef_ps(vp, vn); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
84 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16() local
85 const __m512 vl = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
86 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
88 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2_hi, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x16.c52 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16() local
53 const __m512 vl = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
54 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
56 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2_hi, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
57 vt = _mm512_fmadd_ps(vn, vminus_ln2_lo, vt); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
63 const __m512 ve = _mm512_scalef_ps(vp, vn); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
87 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16() local
88 const __m512 vl = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
89 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
91 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2_hi, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-div-x16.c46 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16() local
47 const __m512 vl = _mm512_permutexvar_ps(_mm512_castps_si512(vn), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
48 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
50 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
57 const __m512 ve = _mm512_scalef_ps(vp, vn); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
78 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16() local
79 const __m512 vl = _mm512_permutexvar_ps(_mm512_castps_si512(vn), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
80 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
82 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
89 const __m512 ve = _mm512_scalef_ps(vp, vn); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16()
Dwasmsimd-lut64-p2-div-x4.c43 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4() local
44 const v128_t ve = wasm_i32x4_shl(vn, 17); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
46 const v128_t vidx = wasm_i32x4_shl(wasm_v128_and(vn, vindex_mask), 2); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
56 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
58 v128_t vt = wasm_f32x4_add(vz, wasm_f32x4_mul(vn, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
59 vt = wasm_f32x4_add(vt, wasm_f32x4_mul(vn, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
79 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4() local
80 const v128_t ve = wasm_i32x4_shl(vn, 17); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
82 const v128_t vidx = wasm_i32x4_shl(wasm_v128_and(vn, vindex_mask), 2); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
92 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x16.c46 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16() local
47 const __m512 vl = _mm512_permutexvar_ps(_mm512_castps_si512(vn), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
48 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
50 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
57 const __m512 ve = _mm512_scalef_ps(vp, vn); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
81 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16() local
82 const __m512 vl = _mm512_permutexvar_ps(_mm512_castps_si512(vn), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
83 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
85 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
92 const __m512 ve = _mm512_scalef_ps(vp, vn); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16()
Davx-rr2-p5-div-x8.c47 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8() local
49 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
50 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
53 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
55 __m256 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
56 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
84 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8() local
85 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
86 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
89 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
[all …]
Dwasmsimd-p5-div-x4.c44 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() local
45 const v128_t vs = wasm_i32x4_shl(vn, 23); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
46 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
48 v128_t vt = wasm_f32x4_add(vz, wasm_f32x4_mul(vn, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
49 vt = wasm_f32x4_add(vt, wasm_f32x4_mul(vn, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
72 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() local
73 const v128_t vs = wasm_i32x4_shl(vn, 23); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
74 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
76 v128_t vt = wasm_f32x4_add(vz, wasm_f32x4_mul(vn, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
77 vt = wasm_f32x4_add(vt, wasm_f32x4_mul(vn, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4()
Dneon-rr2-lut2048-p1-nr2recps-x4.c42 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
43 const int32x4_t ve = vshlq_n_s32(vreinterpretq_s32_f32(vn), 12); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
45 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
55 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
56 float32x4_t vt = vmlaq_f32(vz, vn, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
57 vt = vmlaq_f32(vt, vn, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
80 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
81 const int32x4_t ve = vshlq_n_s32(vreinterpretq_s32_f32(vn), 12); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
83 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
93 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
[all …]
Davx-rr2-p5-nr2-x8.c48 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() local
50 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
51 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
54 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
56 __m256 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
57 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
88 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() local
89 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
90 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
93 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
[all …]
Dneon-rr2-lut64-p2-nr2recps-x4.c42 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
43 const int32x4_t ve = vshlq_n_s32(vreinterpretq_s32_f32(vn), 17); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
45 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
55 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
56 float32x4_t vt = vmlaq_f32(vz, vn, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
57 vt = vmlaq_f32(vt, vn, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
81 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
82 const int32x4_t ve = vshlq_n_s32(vreinterpretq_s32_f32(vn), 17); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
84 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
94 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
[all …]
Dsse41-p5-div-x4.c44 __m128 vn = _mm_add_ps(_mm_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() local
45 const __m128 vs = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
46 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
48 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
49 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
75 __m128 vn = _mm_add_ps(_mm_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() local
76 const __m128 vs = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn), 23)); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
77 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
79 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
80 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
/external/XNNPACK/src/f32-sigmoid/
Dscalar-lut2048-p1-div.c.in47 float vn${N} = vz${N} * vminus_log2e + vmagic_bias;
50 const uint32_t ve${N} = fp32_to_bits(vn${N}) << 12;
53 const uint32_t vidx${N} = fp32_to_bits(vn${N}) & vindex_mask;
57 vn${N} -= vmagic_bias;
60 float vt${N} = vn${N} * vln2_hi + vz${N};
63 vt${N} = vn${N} * vln2_lo + vt${N};
97 float vn = vz * vminus_log2e + vmagic_bias; variable
98 const uint32_t ve = fp32_to_bits(vn) << 12;
99 const uint32_t vidx = fp32_to_bits(vn) & vindex_mask;
101 vn -= vmagic_bias;
[all …]
Dscalar-lut64-p2-div.c.in47 float vn${N} = vz${N} * vminus_log2e + vmagic_bias;
50 const uint32_t ve${N} = fp32_to_bits(vn${N}) << 17;
53 const uint32_t vidx${N} = fp32_to_bits(vn${N}) & vindex_mask;
57 vn${N} -= vmagic_bias;
60 float vt${N} = vn${N} * vln2_hi + vz${N};
63 vt${N} = vn${N} * vln2_lo + vt${N};
100 float vn = vz * vminus_log2e + vmagic_bias; variable
101 const uint32_t ve = fp32_to_bits(vn) << 17;
102 const uint32_t vidx = fp32_to_bits(vn) & vindex_mask;
104 vn -= vmagic_bias;
[all …]
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-lut64-p2-x4.c57 float32x4_t vn = vfmaq_f32(vmagic_bias, vx, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
69 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
72 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
84 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
88 float32x4_t vt = vfmaq_f32(vx, vn, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
89 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
133 float32x4_t vn = vfmaq_f32(vmagic_bias, vx, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
145 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
148 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
160 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
[all …]
Dneon-lut64-p2-x4.c58 float32x4_t vn = vmlaq_f32(vmagic_bias, vx, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
70 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
73 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
85 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
89 float32x4_t vt = vmlaq_f32(vx, vn, vminus_ln2_o64_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
90 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
134 float32x4_t vn = vmlaq_f32(vmagic_bias, vx, vlog2e_x64); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
146 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
149 … const uint64x2_t vidx = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn), vindex_mask)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
161 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
[all …]
Dneonfma-p5-x4.c56 float32x4_t vn = vfmaq_f32(vmagic_bias, vx, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() local
60 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
63 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
67 float32x4_t vt = vfmaq_f32(vx, vn, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
68 vt = vfmaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
113 float32x4_t vn = vfmaq_f32(vmagic_bias, vx, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() local
117 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
120 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
124 float32x4_t vt = vfmaq_f32(vx, vn, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
125 vt = vfmaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-p6-x8.c50 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() local
51 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
52 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
53 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
55 __m256 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
57 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
85 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() local
86 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
87 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
88 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
[all …]
Dvelu-avx512f-rr1-lut16-p3-perm-x16.c49 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local
50 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
51 const __m512i vl = _mm512_permutexvar_epi32(_mm512_castps_si512(vn), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
53 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
55 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
82 __m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local
83 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
84 const __m512i vl = _mm512_permutexvar_epi32(_mm512_castps_si512(vn), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
86 vn = _mm512_sub_ps(vn, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
88 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
Dvelu-neon-rr2-lut16-p3-x4.c50 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
51 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
52 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
61 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
64 float32x4_t vt = vmlaq_f32(vz, vn, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
65 vt = vmlaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
87 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
88 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
89 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
98 vn = vsubq_f32(vn, vmagic_bias); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
[all …]
Dvelu-avx-rr2-lut4-p4-perm-x8.c52 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
53 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
54 const __m256 vl = _mm256_permutevar_ps(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
56 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
59 __m256 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
61 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
88 __m256 vn = _mm256_add_ps(_mm256_mul_ps(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
89 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
90 const __m256 vl = _mm256_permutevar_ps(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
92 vn = _mm256_sub_ps(vn, vmagic_bias); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
[all …]
Dvelu-wasmsimd-arm-rr2-lut16-p3-x4.c51 v128_t vn = wasm_f32x4_add(wasm_f32x4_mul(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
52 const v128_t vidx = wasm_i32x4_shl(wasm_v128_and(vn, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
53 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
64 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
66 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
67 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
89 v128_t vn = wasm_f32x4_add(wasm_f32x4_mul(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
90 const v128_t vidx = wasm_i32x4_shl(wasm_v128_and(vn, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
91 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
102 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
[all …]
Dvelu-wasmsimd-x86-rr2-lut16-p3-x4.c51 v128_t vn = wasm_f32x4_add(wasm_f32x4_mul(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
52 const v128_t vidx = wasm_i32x4_shl(wasm_v128_and(vn, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
53 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
64 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
66 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
68 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
92 v128_t vn = wasm_f32x4_add(wasm_f32x4_mul(vz, vlog2e), vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
93 const v128_t vidx = wasm_i32x4_shl(wasm_v128_and(vn, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
94 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
105 vn = wasm_f32x4_sub(vn, vmagic_bias); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
[all …]
/external/XNNPACK/src/f32-velu/
Dscalar-rr2-lut16-p3.c.in53 float vn${N} = vz${N} * vlog2e + vmagic_bias;
56 const uint32_t ven${N} = fp32_to_bits(vn${N}) << 19;
57 const uint32_t vidx${N} = fp32_to_bits(vn${N}) & vindex_mask;
58 vn${N} -= vmagic_bias;
61 float vt${N} = vn${N} * vminus_ln2_hi + vz${N};
65 vt${N} = vn${N} * vminus_ln2_lo + vt${N};
114 float vn = vz * vlog2e + vmagic_bias;
115 const uint32_t ven = fp32_to_bits(vn) << 19;
116 const uint32_t vidx = fp32_to_bits(vn) & vindex_mask;
117 vn -= vmagic_bias;
[all …]

12345678910>>...37