/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x6.c | 64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 81 const uint32_t ven5 = fp32_to_bits(vn5) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 82 const uint32_t vidx5 = fp32_to_bits(vn5) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 83 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 95 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 123 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 81 const uint32_t ven5 = fp32_to_bits(vn5) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 82 const uint32_t vidx5 = fp32_to_bits(vn5) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 83 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 95 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 103 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x6.c | 64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 76 float vs5 = fp32_from_bits(fp32_to_bits(vn5) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 77 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 84 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 91 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-scalar-rr2-p6-x6.c | 64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 76 float vs5 = fp32_from_bits(fp32_to_bits(vn5) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 77 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 84 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 91 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-avx2-rr1-lut16-p3-gather-x48.c | 63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() local 75 const __m256i vidx5 = _mm256_and_si256(_mm256_castps_si256(vn5), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 88 const __m256i ven5 = _mm256_slli_epi32(_mm256_castps_si256(vn5), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 89 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 102 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
|
D | velu-avx512f-rr1-lut16-p3-perm-x96.c | 63 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local 75 const __m512i ven5 = _mm512_slli_epi32(_mm512_castps_si512(vn5), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 76 const __m512i vl5 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn5), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 89 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 96 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
|
D | velu-avx2-rr1-lut8-p4-perm-x48.c | 63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() local 80 const __m256i ven5 = _mm256_slli_epi32(_mm256_castps_si256(vn5), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 81 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 82 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 95 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
|
D | velu-avx2-rr1-lut4-p4-perm-x48.c | 64 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() local 81 const __m256i ven5 = _mm256_slli_epi32(_mm256_castps_si256(vn5), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() 82 const __m256i vl5 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn5))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() 83 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() 96 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x96.c | 67 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() local 74 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 81 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 88 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 95 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 123 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 69 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local 77 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 85 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 93 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 101 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 133 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c | 67 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local 74 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 81 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 88 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 95 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 123 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x96.c | 61 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() local 68 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 75 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 82 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 117 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 69 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 77 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 85 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 93 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 101 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 133 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 71 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 80 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 89 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 98 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 107 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 143 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 63 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local 71 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 79 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 87 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 127 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c | 61 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local 68 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() 75 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() 82 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() 117 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 71 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local 80 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 89 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 98 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 107 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 143 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | avx-rr2-p5-div-x48.c | 62 __m256 vn5 = _mm256_add_ps(_mm256_mul_ps(vz5, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() local 79 …_m128 vs_lo5 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn5)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 80 …128 vs_hi5 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn5, 1)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 88 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 95 __m256 vt5 = _mm256_add_ps(_mm256_mul_ps(vn5, vminus_ln2_hi), vz5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 102 vt5 = _mm256_add_ps(_mm256_mul_ps(vn5, vminus_ln2_lo), vt5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
|
D | avx512f-rr1-p5-scalef-div-x96.c | 58 __m512 vn5 = _mm512_mul_ps(vz5, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() local 65 vn5 = _mm512_roundscale_ps(vn5, 0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 72 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 114 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 63 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 71 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 79 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 87 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 127 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x48.c | 69 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() local 78 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 86 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 95 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 102 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
|
D | avx2-p5-x56.c | 71 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 81 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 90 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 100 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 108 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx2-p5-x64.c | 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 84 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 94 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 105 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 114 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 83 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 93 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 104 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 113 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x64.c | 72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local 83 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 93 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 104 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 113 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
|