/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 70 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local 78 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 86 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 94 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 102 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 134 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 70 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 78 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 86 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 94 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 102 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 134 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 72 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 81 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 90 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 99 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 108 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 144 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local 72 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 80 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 128 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 72 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local 81 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 90 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 99 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 108 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 144 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 72 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 80 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 128 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local 75 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 84 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 93 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 138 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | avx-rr2-p5-div-x56.c | 65 __m256 vn6 = _mm256_add_ps(_mm256_mul_ps(vz6, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() local 85 …_m128 vs_lo6 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn6)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 86 …128 vs_hi6 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn6, 1)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 103 __m256 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_hi), vz6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 111 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_lo), vt6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
|
D | avx512f-rr1-p5-scalef-div-x112.c | 61 __m512 vn6 = _mm512_mul_ps(vz6, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local 69 vn6 = _mm512_roundscale_ps(vn6, 0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 77 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 125 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local 75 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 84 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 93 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 138 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x56.c | 72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 82 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 91 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 101 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 109 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx2-p5-x64.c | 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 85 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 84 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 94 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64-acc4.c | 75 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local 86 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 96 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 107 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 116 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx2-p5-x64.c | 72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local 83 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 93 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 113 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x72-acc3.c | 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 88 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 99 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 86 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 97 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 109 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 119 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x64.c | 73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local 84 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 94 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc2.c | 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local 85 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 80 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 81 const __m512i vl6 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn6), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 96 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 104 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 68 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 83 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 84 const __m512i vl6 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn6), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 101 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 111 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
|
D | velu-avx2-rr1-lut4-p4-perm-x56.c | 67 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local 87 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() 88 const __m256i vl6 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn6))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() 89 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() 104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
|
D | velu-avx2-rr1-lut8-p4-perm-x56.c | 66 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local 86 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 87 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 88 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 103 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 63 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 74 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 83 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 148 vmax_e0 = _mm512_max_ps(vmax_e0, vn6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 159 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 61 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 72 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 81 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 146 vmax_e0 = _mm512_max_ps(vmax_e0, vn6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 156 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|