/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x64.c | 48 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 53 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 61 vhalfrsqrtx3 = _mm512_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 72 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
|
D | fma3-nr1fma1adj-x32.c | 49 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local 54 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 62 vhalfrsqrtx3 = _mm256_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 73 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
|
D | avx512f-nr1fma1adj-x80.c | 50 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local 57 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 66 vhalfrsqrtx3 = _mm512_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 80 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
|
D | fma3-nr1fma1adj-x40.c | 51 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local 58 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 67 vhalfrsqrtx3 = _mm256_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 81 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
|
D | avx512f-nr1fma1adj-x96.c | 52 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local 61 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 71 vhalfrsqrtx3 = _mm512_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 88 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
|
D | fma3-nr1fma1adj-x48.c | 53 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local 62 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 72 vhalfrsqrtx3 = _mm256_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 89 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
|
D | avx512f-nr1fma1adj-x112.c | 54 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local 65 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 76 vhalfrsqrtx3 = _mm512_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 96 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
|
D | fma3-nr1fma1adj-x56.c | 55 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local 66 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 77 vhalfrsqrtx3 = _mm256_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 97 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
|
D | avx512f-nr1fma1adj-x128.c | 56 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local 69 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 81 vhalfrsqrtx3 = _mm512_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 104 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
|
D | fma3-nr1fma1adj-x64.c | 57 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local 70 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 82 vhalfrsqrtx3 = _mm256_fmadd_ps(vhalfrsqrtx3, vresidual3, vhalfrsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 105 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
|