Home
last modified time | relevance | path

Searched refs:vt4 (Results 1 – 25 of 292) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/f32-velu/gen/
Dvelu-scalar-rr2-p6-x5.c76 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local
82 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
102 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
109 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
115 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
121 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
127 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
133 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
143 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
150 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
Dvelu-wasm-rr2-p6-x5.c76 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local
82 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
89 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
95 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
101 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
107 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
113 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
123 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
130 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
Dvelu-scalar-rr2-p6-x6.c81 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local
88 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
109 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
120 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
127 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
134 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
141 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
148 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
159 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
168 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
Dvelu-wasm-rr2-p6-x6.c81 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local
88 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
96 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
103 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
110 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
117 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
124 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
135 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
144 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
Dvelu-scalar-rr2-lut16-p3-x5.c85 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
108 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
111 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
118 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
124 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
134 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
141 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
Dvelu-scalar-rr2-lut16-p3-x6.c91 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
116 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
119 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
131 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
138 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
149 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
158 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-lut16-p3-x5.c85 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
92 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
98 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
104 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
114 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
121 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
Dvelu-wasm-rr2-lut16-p3-x6.c91 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
100 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
107 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
114 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
125 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
134 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-avx-rr2-p6-x40.c85 __m256 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_hi), vz4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40() local
92 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_lo), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
98 __m256 vp4 = _mm256_add_ps(_mm256_mul_ps(vc6, vt4), vc5); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
104 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
110 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc3); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
116 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc2); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
122 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
132 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
139 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
Dvelu-avx512f-rr1-p6-x80.c76 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local
82 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
88 vp4 = _mm512_fmadd_ps(vp4, vt4, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
94 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
100 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
110 vp4 = _mm512_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
111 vt4 = _mm512_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
123 vp4 = _mm512_fmadd_ps(vp4, vt4, vt4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
Dvelu-avx2-rr1-p6-x40.c74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() local
80 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
86 vp4 = _mm256_fmadd_ps(vp4, vt4, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
92 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
98 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
108 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
109 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
120 vp4 = _mm256_fmadd_ps(vp4, vt4, vt4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
Dvelu-avx-rr2-p6-x48.c91 __m256 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_hi), vz4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48() local
100 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_lo), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
107 __m256 vp4 = _mm256_add_ps(_mm256_mul_ps(vc6, vt4), vc5); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
114 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
121 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc3); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
128 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc2); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
135 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
146 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
155 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x40.c89 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() local
95 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
102 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
108 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
114 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
120 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
130 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
136 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
Davx2-p5-x48.c94 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() local
101 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
109 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
116 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
123 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
130 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
141 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
148 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
Davx512f-p5-scalef-x80.c70 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() local
76 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
83 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
Davx512f-p5-scalef-x96.c73 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local
80 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
88 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
Davx2-p5-x56.c99 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local
107 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
116 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
124 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
132 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
140 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
152 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
160 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
Davx512f-p5-scalef-x112.c76 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local
84 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
Davx2-p5-x64.c104 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local
113 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
123 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
132 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
141 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
150 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
163 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
172 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x80.c64 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local
70 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
77 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
83 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
Davx512f-p5-scalef-x96.c66 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local
73 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
81 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
88 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
Davx2-p5-x40.c70 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local
76 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
83 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
89 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
95 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
107 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
Davx512f-p5-scalef-x112.c68 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local
76 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
85 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
93 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x64-acc2.c103 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local
112 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
122 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
131 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
140 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
149 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
162 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
171 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
Davx2-p5-x64.c102 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local
111 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
121 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
130 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
139 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
148 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
161 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
170 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()

12345678910>>...12