Home
last modified time | relevance | path

Searched refs:vt8 (Results 1 – 25 of 90) sorted by relevance

1234

/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x72-acc3.c113 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local
123 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
134 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
144 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
154 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
164 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
178 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
188 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
Davx2-p5-x72.c111 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local
121 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
132 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
142 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
152 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
162 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
176 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
186 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
Davx512f-p5-scalef-x144.c85 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local
95 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
106 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
116 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
126 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
136 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
146 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
Davx512f-p5-scalef-x144-acc3.c87 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local
97 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
108 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
118 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
128 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
138 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
148 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
Davx2-p5-x80-acc2.c117 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local
128 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
140 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
151 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
162 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
173 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
188 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
199 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x80.c116 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local
127 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
139 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
150 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
161 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
172 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
187 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
198 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
Davx512f-p5-scalef-x160-acc5.c92 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local
103 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
115 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
126 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
148 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
159 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
Davx512f-p5-scalef-x160.c88 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local
99 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
111 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
122 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
133 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
144 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
155 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
Davx512f-p5-scalef-x160-acc2.c89 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local
100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x72.c113 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local
123 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
134 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
144 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
154 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
164 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
178 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
188 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
Davx512f-p5-scalef-x144.c86 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local
96 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
107 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
147 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
Davx2-p5-x80.c118 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local
129 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
141 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
152 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
163 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
174 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
189 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
200 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
Davx512f-p5-scalef-x160.c89 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local
100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx512f-p5-scalef-x144.c86 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local
96 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
107 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
147 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
Davx512f-p5-scalef-x144-acc3.c88 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local
98 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
119 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
129 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
139 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
149 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
Davx2-p5-x72-acc3.c114 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() local
124 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
135 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
145 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
155 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
165 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
179 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
189 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
Davx2-p5-x72.c112 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() local
122 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
133 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
143 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
153 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
163 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
177 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
187 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
Davx512f-p5-scalef-x160-acc2.c90 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local
101 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
113 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
124 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
135 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
146 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
157 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
Davx512f-p5-scalef-x160.c89 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local
100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
Davx2-p5-x80.c117 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local
128 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
140 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
151 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
162 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
173 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
188 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
199 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x80-acc5.c121 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local
132 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
144 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
155 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
166 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
177 vp8 = _mm256_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
192 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
203 __m256 vf8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x144.c76 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local
86 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
97 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
107 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
Davx512f-p5-scalef-x160.c78 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local
89 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
101 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
112 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x144.c76 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local
86 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
97 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
107 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
127 vp8 = _mm512_fmadd_ps(vp8, vt8, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx2-rr1-p6-x72.c101 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local
111 __m256 vp8 = _mm256_fmadd_ps(vc6, vt8, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
121 vp8 = _mm256_fmadd_ps(vp8, vt8, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
131 vp8 = _mm256_fmadd_ps(vp8, vt8, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
141 vp8 = _mm256_fmadd_ps(vp8, vt8, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
159 vp8 = _mm256_mul_ps(vp8, vt8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
160 vt8 = _mm256_mul_ps(vt8, vs8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
179 vp8 = _mm256_fmadd_ps(vp8, vt8, vt8); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()

1234