Home
last modified time | relevance | path

Searched refs:vn0 (Results 1 – 25 of 310) sorted by relevance

12345678910>>...13

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dscalar-lut64-p2-x2.c60 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() local
73 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
77 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
84 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
89 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
92 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
Dscalar-lut64-p2-x2-acc2.c61 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() local
74 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
78 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
85 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
90 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
93 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
Dscalar-p5-x2.c57 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() local
62 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
66 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
71 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
Dscalar-p5-x2-acc2.c58 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() local
63 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
67 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
72 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
75 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
Dscalar-lut64-p2-x4.c64 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local
79 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
85 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
96 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
103 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
108 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
Dscalar-lut64-p2-x4-acc2.c65 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local
80 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
86 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
97 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
104 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
109 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
Dscalar-lut64-p2-x4-acc4.c67 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local
82 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
88 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
99 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
106 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
111 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
/external/XNNPACK/src/f32-sigmoid/gen/
Dscalar-lut2048-p1-div-x2.c47 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local
50 const uint32_t ve0 = fp32_to_bits(vn0) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
53 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
58 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
61 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
64 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
Dscalar-lut64-p2-div-x2.c47 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local
50 const uint32_t ve0 = fp32_to_bits(vn0) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
53 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
58 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
61 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
64 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
Dscalar-p5-div-x2.c47 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local
50 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
53 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
56 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
59 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x32.c54 __m512 vn0 = _mm512_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32() local
57 const __m512 vl0 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn0), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32()
60 vn0 = _mm512_sub_ps(vn0, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32()
63 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vz0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32()
66 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32()
78 const __m512 ve0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32()
Dscalar-lut2048-p1-div-x4.c51 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local
56 const uint32_t ve0 = fp32_to_bits(vn0) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
61 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
70 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
75 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
80 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
Dscalar-lut64-p2-div-x4.c51 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local
56 const uint32_t ve0 = fp32_to_bits(vn0) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
61 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
70 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
75 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
80 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c54 __m512 vn0 = _mm512_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local
57 const __m512 vl0 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn0), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
60 vn0 = _mm512_sub_ps(vn0, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
63 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vz0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
66 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
78 const __m512 ve0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x48.c56 __m512 vn0 = _mm512_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48() local
60 const __m512 vl0 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn0), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48()
64 vn0 = _mm512_sub_ps(vn0, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48()
68 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vz0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48()
72 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48()
88 const __m512 ve0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-wasm-rr2-lut16-p3-x2.c51 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() local
54 const uint32_t ven0 = fp32_to_bits(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
55 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
56 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
61 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
66 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
Dvelu-scalar-rr2-lut16-p3-x2.c51 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() local
54 const uint32_t ven0 = fp32_to_bits(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
55 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
56 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
61 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
66 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
Dvelu-wasm-rr2-lut16-p3-x3.c53 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local
57 const uint32_t ven0 = fp32_to_bits(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
58 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
59 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
67 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
Dvelu-scalar-rr2-lut16-p3-x3.c53 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local
57 const uint32_t ven0 = fp32_to_bits(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
58 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
59 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
67 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
Dvelu-wasm-rr2-p6-x2.c51 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() local
54 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
55 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
59 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
62 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
Dvelu-scalar-rr2-p6-x2.c51 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() local
54 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
55 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
59 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
62 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
Dvelu-scalar-rr2-lut16-p3-x4.c55 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local
60 const uint32_t ven0 = fp32_to_bits(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
61 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
62 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
73 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
82 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
Dvelu-wasm-rr2-lut16-p3-x4.c55 float vn0 = vz0 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local
60 const uint32_t ven0 = fp32_to_bits(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
61 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
62 vn0 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
73 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
82 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x8.c54 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() local
58 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
61 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
65 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
67 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
Davx2-p5-x16.c56 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() local
61 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
65 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
70 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
73 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()

12345678910>>...13