Home
last modified time | relevance | path

Searched refs:vmagic_bias (Results 1 – 25 of 369) sorted by relevance

12345678910>>...15

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dscalar-p5-x4.c27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local
61 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
62 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
63 float vn2 = vx2 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
64 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
74 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
75 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
76 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
77 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
168 float vn = vx * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
[all …]
Dscalar-lut64-p2-x4-acc2.c30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local
65 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
66 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
67 float vn2 = vx2 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
68 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
97 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
98 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
99 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
100 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
181 float vn = vx * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
[all …]
Dscalar-p5-x4-acc2.c27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local
62 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
63 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
64 float vn2 = vx2 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
65 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
75 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
76 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
77 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
78 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
171 float vn = vx * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
[all …]
Dscalar-lut64-p2-x4.c30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local
64 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
65 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
66 float vn2 = vx2 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
67 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
96 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
97 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
98 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
99 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
178 float vn = vx * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
[all …]
Dscalar-p5-x4-acc4.c27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local
64 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
65 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
66 float vn2 = vx2 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
67 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
77 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
78 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
79 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
80 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
175 float vn = vx * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
[all …]
Dscalar-lut64-p2-x4-acc4.c30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local
67 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
68 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
69 float vn2 = vx2 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
70 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
99 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
100 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
101 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
102 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
185 float vn = vx * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
[all …]
Davx2-p5-x96-acc2.c28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local
76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
[all …]
Davx2-p5-x96-acc3.c28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local
77 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
78 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
79 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
80 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
81 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
82 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
83 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
84 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
85 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
[all …]
Davx2-p5-x96-acc6.c28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local
80 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
81 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
82 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
83 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
84 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
85 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
86 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
87 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
88 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
[all …]
Davx2-p5-x96.c28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local
75 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
76 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
77 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
78 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
79 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
80 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
81 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
82 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Dscalar-lut2048-p1-div-x4.c30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local
70 float vn0 = vz0 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
71 float vn1 = vz1 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
72 float vn2 = vz2 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
73 float vn3 = vz3 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
102 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
103 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
104 vn2 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
105 vn3 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
196 float vn = vz * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
[all …]
Dscalar-p5-div-x4.c27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local
68 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
69 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
70 float vn2 = vz2 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
71 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
81 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
82 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
83 vn2 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
84 vn3 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
194 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
[all …]
Dscalar-lut64-p2-div-x4.c30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local
70 float vn0 = vz0 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
71 float vn1 = vz1 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
72 float vn2 = vz2 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
73 float vn3 = vz3 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
102 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
103 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
104 vn2 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
105 vn3 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
202 float vn = vz * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
[all …]
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x88.c29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local
74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
[all …]
Davx2-p5-x96.c29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local
76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
[all …]
Davx2-p5-x80.c29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local
72 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
73 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
74 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
75 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
76 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
77 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
78 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
79 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
80 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
[all …]
Davx2-p5-x72.c29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local
70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
[all …]
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x96-acc3.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local
76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
[all …]
Davx2-p5-x96-acc6.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local
79 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
80 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
81 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
82 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
83 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
84 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
85 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
86 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
87 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
[all …]
Davx2-p5-x96.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local
74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
[all …]
Davx2-p5-x96-acc2.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local
75 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
76 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
77 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
78 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
79 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
80 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
81 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
82 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
[all …]
Davx2-p5-x80.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local
70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
[all …]
Davx2-p5-x80-acc2.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local
71 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
72 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
73 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
74 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
75 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
76 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
77 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
78 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
[all …]
Davx2-p5-x72.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local
68 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
69 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
70 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
71 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
72 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
75 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
76 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
[all …]
Davx2-p5-x72-acc3.c27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local
70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
[all …]

12345678910>>...15