/external/XNNPACK/src/x8-lut/gen/ |
D | lut-scalar-x16.c | 38 const size_t vx11 = (size_t) x[11]; in xnn_x8_lut_ukernel__scalar_x16() local 56 const uint32_t vt11 = (uint32_t) t[vx11]; in xnn_x8_lut_ukernel__scalar_x16()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x96.c | 73 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 87 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 242 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 69 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 83 __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-rr1-p5-x96-acc2.c | 66 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() local 79 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 118 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 209 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
|
D | avx2-rr1-p5-x96.c | 65 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() local 78 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 117 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 208 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96()
|
D | avx2-rr1-p5-x96-acc3.c | 67 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() local 80 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 119 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 210 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3()
|
D | avx2-rr1-p5-x96-acc6.c | 70 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() local 83 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 122 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 213 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6()
|
D | avx512f-rr1-p5-scalef-x192-acc2.c | 66 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() local 79 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 92 const __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
|
D | avx512f-rr1-p5-scalef-x192.c | 65 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() local 78 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 91 const __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
|
D | avx512f-rr1-p5-scalef-x192-acc3.c | 67 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() local 80 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 93 const __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
|
D | avx512f-rr1-p5-scalef-x192-acc6.c | 70 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() local 83 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 96 const __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96.c | 71 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 85 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 129 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 240 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc6.c | 76 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 90 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 134 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 245 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc2.c | 72 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 86 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 130 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 241 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 73 const __m256 vx11 = _mm256_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 87 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 242 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx512f-p5-scalef-x192-acc2.c | 69 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 83 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 70 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 84 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 99 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 68 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 82 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 97 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 73 const __m512 vx11 = _mm512_sub_ps(vi11, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 87 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 102 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 55 const __m512 vx11 = _mm512_loadu_ps(x + 176); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 70 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 85 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96.c | 61 const __m256 vx11 = _mm256_loadu_ps(x + 88); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 76 …const __m256 vn11 = _mm256_round_ps(_mm256_mul_ps(vx11, vlog2e), _MM_FROUND_TO_NEAREST_INT | _MM_F… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 91 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 57 const __m512 vx11 = _mm512_loadu_ps(x + 176); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 72 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 87 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192.c | 55 const __m512 vx11 = _mm512_loadu_ps(x + 176); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 70 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 85 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 65 const __m512 vx11 = _mm512_loadu_ps(x + 176); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 80 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 95 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx512f-p5-scalef-x192-acc3.c | 59 const __m512 vx11 = _mm512_loadu_ps(x + 176); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 74 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 89 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|