/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx2-p5-x72.c | 169 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() local 183 ve8 = _mm256_max_ps(ve8, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 196 …const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve8, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72()
|
D | avx2-p5-x80.c | 179 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() local 194 ve8 = _mm256_max_ps(ve8, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 208 …const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve8, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 189 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 205 ve8 = _mm256_max_ps(ve8, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 220 …const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve8, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 199 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 216 ve8 = _mm256_max_ps(ve8, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 232 …const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve8, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x144.c | 163 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 174 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x160.c | 173 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 185 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x176.c | 183 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 196 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 193 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 207 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x80.c | 196 const __m256 ve8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 208 const __m256 vd8 = _mm256_add_ps(ve8, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 220 __m256 vf8 = _mm256_div_ps(ve8, vd8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|
D | avx2-rr1-p5-div-x72.c | 185 const __m256 ve8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() local 196 const __m256 vd8 = _mm256_add_ps(ve8, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 207 __m256 vf8 = _mm256_div_ps(ve8, vd8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
|
D | avx2-rr1-p5-nr1fma-x72.c | 185 const __m256 ve8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local 196 const __m256 vd8 = _mm256_add_ps(ve8, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 231 __m256 vf8 = _mm256_mul_ps(ve8, vr8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
|
D | avx2-rr1-p5-nr1fma-x80.c | 196 const __m256 ve8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 208 const __m256 vd8 = _mm256_add_ps(ve8, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 246 __m256 vf8 = _mm256_mul_ps(ve8, vr8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx2-rr1-p5-nr2fma-x72.c | 185 const __m256 ve8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 196 const __m256 vd8 = _mm256_add_ps(ve8, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 240 __m256 vf8 = _mm256_mul_ps(ve8, vr8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx2-rr1-p5-nr2fma-x80.c | 196 const __m256 ve8 = _mm256_fmadd_ps(vt8, vp8, vs8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 208 const __m256 vd8 = _mm256_add_ps(ve8, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 256 __m256 vf8 = _mm256_mul_ps(ve8, vr8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|