Home
last modified time | relevance | path

Searched refs:vn0 (Results 1 – 25 of 167) sorted by relevance

1234567

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dscalar-lut64-p2-x2-acc2.c61 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() local
74 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
78 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
85 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
90 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
93 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
Dscalar-lut64-p2-x2.c60 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() local
73 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
77 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
84 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
89 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
92 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
Dscalar-p5-x2-acc2.c58 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() local
63 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
67 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
72 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
75 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
Dscalar-p5-x2.c57 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() local
62 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
66 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
71 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
Dscalar-lut64-p2-x4-acc2.c65 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local
80 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
86 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
97 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
104 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
109 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
Dscalar-lut64-p2-x4.c64 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local
79 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
85 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
96 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
103 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
108 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
Dscalar-lut64-p2-x4-acc4.c67 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local
82 const uint32_t ve0 = (fp32_to_bits(vn0) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
88 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
99 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
106 float vt0 = vn0 * vminus_ln2_o64_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
111 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
Dscalar-p5-x4.c61 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local
68 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
74 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
81 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
86 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
Dscalar-p5-x4-acc2.c62 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local
69 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
75 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
82 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
87 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
Dscalar-p5-x4-acc4.c64 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local
71 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
77 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
84 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
89 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
/external/XNNPACK/src/f32-sigmoid/gen/
Dscalar-lut2048-p1-div-x2.c66 float vn0 = vz0 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local
79 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
83 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
90 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
95 float vt0 = vn0 * vln2_o2048_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
98 vt0 = vn0 * vln2_o2048_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
Dscalar-lut64-p2-div-x2.c66 float vn0 = vz0 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local
79 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
83 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
90 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
95 float vt0 = vn0 * vln2_o64_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
98 vt0 = vn0 * vln2_o64_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
Dscalar-lut2048-p1-div-x4.c70 float vn0 = vz0 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local
85 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
91 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
102 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
109 float vt0 = vn0 * vln2_o2048_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
114 vt0 = vn0 * vln2_o2048_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
Dscalar-p5-div-x2.c64 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local
69 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
73 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
78 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
81 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
Dscalar-lut64-p2-div-x4.c70 float vn0 = vz0 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local
85 const uint32_t ve0 = (fp32_to_bits(vn0) & ~vindex_mask) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
91 const uint32_t vidx0 = fp32_to_bits(vn0) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
102 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
109 float vt0 = vn0 * vln2_o64_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
114 vt0 = vn0 * vln2_o64_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
Dscalar-p5-div-x4.c68 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local
75 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
81 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
88 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
93 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
Davx2-rr1-p5-div-x16.c64 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() local
69 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
73 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
77 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x8.c54 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() local
58 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
61 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
65 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
67 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
Davx2-p5-x16.c56 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() local
61 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
65 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
70 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
73 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
Davx2-p5-x24.c58 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24() local
64 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24()
69 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24()
75 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24()
79 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24()
Davx512f-p5-scalef-x16.c50 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() local
54 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16()
56 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16()
72 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16()
Davx2-p5-x32.c60 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() local
67 const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
73 vn0 = _mm256_sub_ps(vn0, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
80 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
85 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
Davx512f-p5-scalef-x32.c52 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() local
57 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32()
60 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32()
82 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x16.c48 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local
52 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
54 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
75 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
Davx2-p5-x8.c54 …const __m256 vn0 = _mm256_round_ps(_mm256_mul_ps(vx0, vlog2e), _MM_FROUND_TO_NEAREST_INT | _MM_FRO… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() local
58 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
60 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
81 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()

1234567