Home
last modified time | relevance | path

Searched refs:vf7 (Results 1 – 25 of 91) sorted by relevance

1234

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-div-x64.c166 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() local
175 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
184 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
193 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
Davx2-rr1-p5-div-x72.c179 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() local
189 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
199 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
209 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
Davx2-rr1-p5-nr1fma-x64.c185 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local
194 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
203 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
212 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
Davx2-rr1-p5-div-x80.c192 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local
203 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
214 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
225 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
Davx-rr2-p5-div-x64.c192 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() local
201 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
210 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
219 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
Davx2-rr1-p5-nr2fma-x64.c193 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
202 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
211 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
220 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c209 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
219 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
229 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
239 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx2-rr1-p5-nr1fma-x72.c200 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local
210 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
220 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
230 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
Davx2-rr1-p5-nr1fma-x80.c215 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local
226 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
237 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
248 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
Davx512f-rr1-p5-scalef-div-x128.c154 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() local
163vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
172 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x128.c163 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local
172vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
181 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
Davx512f-rr1-lut16-p3-perm-scalef-div-x128.c157 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local
166vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
175 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
Davx-rr2-p5-div-x72.c208 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() local
218 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
228 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
238 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
Davx2-rr1-p5-nr2fma-x80.c225 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local
236 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
247 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
258 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c175 __m512 vf7 = _mm512_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local
185vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
194 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
Davx512f-rr1-p5-scalef-nr1fma-x128.c172 __m512 vf7 = _mm512_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local
182vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
191 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x64.c175 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local
186 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
196 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
206 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
Davx2-p5-x72.c187 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local
199 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
210 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
221 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
Davx2-p5-x80.c199 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local
212 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
224 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
236 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x88.c211 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local
225 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
238 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
251 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
Davx512f-p5-scalef-x128.c149 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local
159 vf7 = _mm512_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
170 _mm512_storeu_ps(output + 112, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x64.c174 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local
185 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
195 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
206 vacc0 = _mm256_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
Davx2-p5-x64-acc2.c175 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local
186 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
196 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
207 vacc1 = _mm256_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
Davx2-p5-x64-acc4.c177 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() local
188 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4()
198 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4()
209 vacc3 = _mm256_add_ps(vacc3, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x128.c143 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local
162 vf7 = _mm512_scalef_ps(vf7, ve7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
173 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()

1234