/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 62 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local 70 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 78 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 86 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 94 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 126 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 73 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 82 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 91 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 100 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 136 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 62 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 70 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 78 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 86 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 94 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 126 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 60 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local 68 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 76 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 84 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 124 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local 73 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 82 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 91 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 100 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 136 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 60 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 68 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 76 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 84 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 124 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 62 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local 71 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 80 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 89 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 134 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | vsigmoid-avx-rr2-p5-div-x56.c | 63 __m256 vn6 = _mm256_add_ps(_mm256_mul_ps(vz6, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() local 83 …_m128 vs_lo6 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn6)), 23)); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() 84 …128 vs_hi6 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn6, 1)), 23)); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() 93 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() 101 __m256 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_hi), vz6); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() 109 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_lo), vt6); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x56.c | 72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 82 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 91 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 101 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 109 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx2-p5-x64.c | 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 85 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x72.c | 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 88 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 99 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 84 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 94 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64.c | 72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local 83 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 93 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 113 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc4.c | 75 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local 86 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 96 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 107 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 116 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx2-p5-x72-acc3.c | 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 88 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 99 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 86 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 97 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 109 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 119 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-neonfp16arith-rr2-p2-div-x56.c | 60 float16x8_t vn6 = vfmaq_f16(vmagic_bias, vz6, vminus_log2e); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() local 68 const float16x8_t vs6 = vreinterpretq_f16_s16(vshlq_n_s16(vreinterpretq_s16_f16(vn6), 10)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 76 vn6 = vsubq_f16(vn6, vmagic_bias); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 84 float16x8_t vt6 = vfmaq_f16(vz6, vn6, vln2_hi); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 92 vt6 = vfmaq_f16(vt6, vn6, vln2_lo); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x64.c | 62 float16x8_t vn6 = vfmaq_f16(vmagic_bias, vz6, vminus_log2e); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() local 71 const float16x8_t vs6 = vreinterpretq_f16_s16(vshlq_n_s16(vreinterpretq_s16_f16(vn6), 10)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 80 vn6 = vsubq_f16(vn6, vmagic_bias); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 89 float16x8_t vt6 = vfmaq_f16(vz6, vn6, vln2_hi); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 98 vt6 = vfmaq_f16(vt6, vn6, vln2_lo); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64()
|
/external/XNNPACK/src/f16-raddstoreexpminusmax/gen/ |
D | neonfp16arith-rr2-p2-x64-acc4.c | 68 float16x8_t vn6 = vfmaq_f16(vmagic_bias, vx6, vlog2e); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() local 77 const float16x8_t vs6 = vreinterpretq_f16_s16(vshlq_n_s16(vreinterpretq_s16_f16(vn6), 10)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 86 vn6 = vsubq_f16(vn6, vmagic_bias); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 95 float16x8_t vt6 = vfmaq_f16(vx6, vn6, vminus_ln2_hi); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 104 vt6 = vfmaq_f16(vt6, vn6, vminus_ln2_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4()
|
D | neonfp16arith-rr2-p2-x64-acc2.c | 66 float16x8_t vn6 = vfmaq_f16(vmagic_bias, vx6, vlog2e); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() local 75 const float16x8_t vs6 = vreinterpretq_f16_s16(vshlq_n_s16(vreinterpretq_s16_f16(vn6), 10)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 84 vn6 = vsubq_f16(vn6, vmagic_bias); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 93 float16x8_t vt6 = vfmaq_f16(vx6, vn6, vminus_ln2_hi); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 102 vt6 = vfmaq_f16(vt6, vn6, vminus_ln2_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2()
|
D | neonfp16arith-rr2-p2-x64.c | 65 float16x8_t vn6 = vfmaq_f16(vmagic_bias, vx6, vlog2e); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() local 74 const float16x8_t vs6 = vreinterpretq_f16_s16(vshlq_n_s16(vreinterpretq_s16_f16(vn6), 10)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 83 vn6 = vsubq_f16(vn6, vmagic_bias); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 92 float16x8_t vt6 = vfmaq_f16(vx6, vn6, vminus_ln2_hi); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 101 vt6 = vfmaq_f16(vt6, vn6, vminus_ln2_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64()
|
D | neonfp16arith-rr2-p2-x72.c | 67 float16x8_t vn6 = vfmaq_f16(vmagic_bias, vx6, vlog2e); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() local 77 const float16x8_t vs6 = vreinterpretq_f16_s16(vshlq_n_s16(vreinterpretq_s16_f16(vn6), 10)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 87 vn6 = vsubq_f16(vn6, vmagic_bias); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 97 float16x8_t vt6 = vfmaq_f16(vx6, vn6, vminus_ln2_hi); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 107 vt6 = vfmaq_f16(vt6, vn6, vminus_ln2_lo); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 63 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 77 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 78 const __m512i vl6 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn6), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 93 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 101 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx2-rr1-lut16-p3-gather-x56.c | 63 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() local 77 const __m256i vidx6 = _mm256_and_si256(_mm256_castps_si256(vn6), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 92 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 93 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 108 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
|
D | velu-avx2-rr1-lut8-p4-perm-x56.c | 62 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local 82 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 83 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 84 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 99 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
|