/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-lut8-p4-perm-x80.c | 52 __m256 vx9 = _mm256_loadu_ps(x + 72); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local 64 const __m256 vz9 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx9, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 212 vx9 = _mm256_mul_ps(vx9, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 223 const __m256 vy9 = _mm256_blendv_ps(vx9, ve9, vx9); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
|
D | velu-avx2-rr1-p6-x80.c | 52 __m256 vx9 = _mm256_loadu_ps(x + 72); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local 64 const __m256 vz9 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx9, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 214 vx9 = _mm256_mul_ps(vx9, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 225 const __m256 vy9 = _mm256_blendv_ps(vx9, ve9, vx9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
|
D | velu-avx2-rr1-lut4-p4-perm-x80.c | 53 __m256 vx9 = _mm256_loadu_ps(x + 72); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local 65 const __m256 vz9 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx9, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 213 vx9 = _mm256_mul_ps(vx9, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 224 const __m256 vy9 = _mm256_blendv_ps(vx9, ve9, vx9); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
|
D | velu-avx2-rr1-lut16-p3-gather-x80.c | 52 __m256 vx9 = _mm256_loadu_ps(x + 72); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local 64 const __m256 vz9 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx9, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 212 vx9 = _mm256_mul_ps(vx9, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 223 const __m256 vy9 = _mm256_blendv_ps(vx9, ve9, vx9); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x80-acc2.c | 68 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 80 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 118 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 67 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 79 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 117 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 212 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 71 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 121 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 216 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx512f-p5-scalef-x160-acc5.c | 68 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 80 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 93 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx512f-p5-scalef-x160.c | 64 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 76 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 89 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x160-acc2.c | 65 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 77 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 90 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx2-p5-x96.c | 69 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 127 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 238 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 70 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 128 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 239 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 71 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 129 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x80.c | 69 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 81 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 119 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 70 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 124 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 227 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx512f-p5-scalef-x160.c | 65 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 77 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 90 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx2-p5-x96.c | 71 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 129 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x80.c | 68 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local 80 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 118 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 72 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local 84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 122 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 217 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x80-acc2.c | 69 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local 81 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 119 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx512f-p5-scalef-x160-acc2.c | 66 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 78 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 91 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x160.c | 65 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 77 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 90 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx2-p5-x96-acc6.c | 75 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 89 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 133 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 244 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc3.c | 72 const __m256 vx9 = _mm256_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 86 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 130 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 241 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x160.c | 53 const __m512 vx9 = _mm512_loadu_ps(x + 144); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 66 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 79 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
|