/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x88.c | 214 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 228 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 241 vf10 = _mm256_mul_ps(vf10, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 254 _mm256_storeu_ps(output + 80, vf10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 241 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 255 vf10 = _mm256_mul_ps(vf10, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 269 _mm256_storeu_ps(output + 80, vf10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x176.c | 182 __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 195 vf10 = _mm512_mul_ps(vf10, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 209 _mm512_storeu_ps(output + 160, vf10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 192 __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 206 vf10 = _mm512_mul_ps(vf10, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 221 _mm512_storeu_ps(output + 160, vf10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x176.c | 173 __m512 vf10 = _mm512_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 198 vf10 = _mm512_scalef_ps(vf10, ve10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 212 _mm512_storeu_ps(y + 160, vf10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 182 __m512 vf10 = _mm512_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 209 vf10 = _mm512_scalef_ps(vf10, ve10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 224 _mm512_storeu_ps(y + 160, vf10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x88.c | 179 __m256 vf10 = _mm256_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 235 vf10 = _mm256_mul_ps(vf10, vs10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 248 _mm256_storeu_ps(y + 80, vf10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 188 __m256 vf10 = _mm256_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 248 vf10 = _mm256_mul_ps(vf10, vs10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 262 _mm256_storeu_ps(y + 80, vf10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-rr1-p5-x96-acc2.c | 195 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() local 208 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 221 _mm256_storeu_ps(output + 80, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 235 vacc0 = _mm256_add_ps(vacc0, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
|
D | avx2-rr1-p5-x96.c | 194 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() local 207 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 220 _mm256_storeu_ps(output + 80, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 234 vacc0 = _mm256_add_ps(vacc0, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96()
|
D | avx2-rr1-p5-x96-acc3.c | 196 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() local 209 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 222 _mm256_storeu_ps(output + 80, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 236 vacc1 = _mm256_add_ps(vacc1, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3()
|
D | avx2-rr1-p5-x96-acc6.c | 199 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() local 212 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 225 _mm256_storeu_ps(output + 80, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 239 vacc4 = _mm256_add_ps(vacc4, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6()
|
D | avx512f-rr1-p5-scalef-x192-acc2.c | 169 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() local 182 _mm512_storeu_ps(output + 160, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 196 vacc0 = _mm512_add_ps(vacc0, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
|
D | avx512f-rr1-p5-scalef-x192.c | 168 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() local 181 _mm512_storeu_ps(output + 160, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 195 vacc0 = _mm512_add_ps(vacc0, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
|
D | avx512f-rr1-p5-scalef-x192-acc3.c | 170 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() local 183 _mm512_storeu_ps(output + 160, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 197 vacc1 = _mm512_add_ps(vacc1, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
|
D | avx512f-rr1-p5-scalef-x192-acc6.c | 173 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() local 186 _mm512_storeu_ps(output + 160, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 200 vacc4 = _mm512_add_ps(vacc4, vf10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96.c | 224 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 239 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 253 vacc0 = _mm256_add_ps(vacc0, vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc6.c | 229 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 244 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 258 vacc4 = _mm256_add_ps(vacc4, vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc2.c | 225 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 240 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 254 vacc0 = _mm256_add_ps(vacc0, vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 241 vf10 = _mm256_andnot_ps(_mm256_cmp_ps(vx10, vdenorm_cutoff, _CMP_LT_OS), vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 255 vacc1 = _mm256_add_ps(vacc1, vf10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx512f-p5-scalef-x192-acc2.c | 192 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 206 vacc0 = _mm512_add_ps(vacc0, vf10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 193 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 207 vacc1 = _mm512_add_ps(vacc1, vf10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 191 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 205 vacc0 = _mm512_add_ps(vacc0, vf10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 196 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 210 vacc4 = _mm512_add_ps(vacc4, vf10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|