/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x48.c | 48 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local 54 vhalfrsqrtx2 = _mm512_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() 55 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
|
D | fma3-nr1fma1adj-x24.c | 49 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local 55 vhalfrsqrtx2 = _mm256_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() 56 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
|
D | avx512f-nr1fma1adj-x64.c | 52 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 59 vhalfrsqrtx2 = _mm512_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 60 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
|
D | fma3-nr1fma1adj-x32.c | 53 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local 60 vhalfrsqrtx2 = _mm256_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 61 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
|
D | avx512f-nr1fma1adj-x80.c | 56 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local 64 vhalfrsqrtx2 = _mm512_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 65 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
|
D | fma3-nr1fma1adj-x40.c | 57 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local 65 vhalfrsqrtx2 = _mm256_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 66 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
|
D | avx512f-nr1fma1adj-x96.c | 60 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local 69 vhalfrsqrtx2 = _mm512_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 70 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
|
D | fma3-nr1fma1adj-x48.c | 61 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local 70 vhalfrsqrtx2 = _mm256_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 71 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
|
D | avx512f-nr1fma1adj-x112.c | 64 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local 74 vhalfrsqrtx2 = _mm512_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 75 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
|
D | fma3-nr1fma1adj-x56.c | 65 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local 75 vhalfrsqrtx2 = _mm256_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 76 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
|
D | avx512f-nr1fma1adj-x128.c | 68 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local 79 vhalfrsqrtx2 = _mm512_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 80 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
|
D | fma3-nr1fma1adj-x64.c | 69 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local 80 vhalfrsqrtx2 = _mm256_fmadd_ps(vhalfrsqrtx2, vresidual2, vhalfrsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 81 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
|