| /external/XNNPACK/src/f32-vsqrt/gen/ |
| D | avx512f-nr1fma1adj-x64.c | 47 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 53 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 62 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 67 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 72 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
|
| D | fma3-nr1fma1adj-x32.c | 46 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local 52 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 61 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 66 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 71 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
|
| D | avx512f-nr1fma1adj-x80.c | 49 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local 57 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 67 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 74 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 80 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
|
| D | fma3-nr1fma1adj-x40.c | 48 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local 56 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 66 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 73 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 79 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
|
| D | avx512f-nr1fma1adj-x96.c | 51 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local 61 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 72 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 81 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 88 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
|
| D | fma3-nr1fma1adj-x48.c | 50 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local 60 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 71 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 80 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 87 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
|
| D | avx512f-nr1fma1adj-x112.c | 53 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local 65 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 77 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 88 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 96 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
|
| D | fma3-nr1fma1adj-x56.c | 52 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local 64 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 76 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 87 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 95 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
|
| D | avx512f-nr1fma1adj-x128.c | 55 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local 69 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 82 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 95 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 104 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
|
| D | fma3-nr1fma1adj-x64.c | 54 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local 68 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 81 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 94 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 103 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
|