/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-lut8-p4-perm-x80.c | 126 __m256 vs9 = _mm256_castsi256_ps(_mm256_add_epi32(vl9, ven9)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local 170 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 190 vs9 = _mm256_fmsub_ps(vs9, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 211 const __m256 ve9 = _mm256_fmadd_ps(vp9, valpha, vs9); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
|
D | velu-avx2-rr1-p6-x80.c | 95 __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local 172 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 192 vs9 = _mm256_fmsub_ps(vs9, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 213 const __m256 ve9 = _mm256_fmadd_ps(vp9, valpha, vs9); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
|
D | velu-avx2-rr1-lut4-p4-perm-x80.c | 127 __m256 vs9 = _mm256_castsi256_ps(_mm256_add_epi32(vl9, ven9)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local 171 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 191 vs9 = _mm256_fmsub_ps(vs9, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 212 const __m256 ve9 = _mm256_fmadd_ps(vp9, valpha, vs9); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
|
D | velu-avx2-rr1-lut16-p3-gather-x80.c | 137 __m256 vs9 = _mm256_castsi256_ps(_mm256_add_epi32(vl9, ven9)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local 170 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 190 vs9 = _mm256_fmsub_ps(vs9, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 211 const __m256 ve9 = _mm256_fmadd_ps(vp9, valpha, vs9); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x80-acc2.c | 93 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 189 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 92 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 188 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 199 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 96 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local 192 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 203 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x96.c | 98 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 210 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 223 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 99 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 211 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 224 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 100 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 212 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 103 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 215 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 228 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x80.c | 94 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 190 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 97 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 201 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 213 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 100 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 212 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x80.c | 93 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local 189 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 97 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local 193 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 204 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x80-acc2.c | 94 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local 190 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x96-acc6.c | 104 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 216 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 229 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc3.c | 101 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 213 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 226 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 100 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 212 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96.c | 99 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 211 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 224 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x80.c | 84 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 161 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 172 const __m256 ve9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|
D | avx2-rr1-p5-nr1fma-x80.c | 84 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 161 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 172 const __m256 ve9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx2-rr1-p5-nr2fma-x80.c | 84 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 161 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 172 const __m256 ve9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx-rr2-p5-div-x80.c | 105 const __m256 vs9 = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo9), vs_hi9, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() local 193 vt9 = _mm256_mul_ps(vt9, vs9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 204 const __m256 ve9 = _mm256_add_ps(_mm256_mul_ps(vt9, vp9), vs9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
|