/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x48.c | 43 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local 48 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() 55 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() 59 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() 63 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
|
D | fma3-nr1fma1adj-x24.c | 44 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local 49 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() 56 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() 60 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() 64 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
|
D | avx512f-nr1fma1adj-x64.c | 45 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 52 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 60 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 66 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 71 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
|
D | fma3-nr1fma1adj-x32.c | 46 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local 53 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 61 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 67 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 72 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
|
D | avx512f-nr1fma1adj-x80.c | 47 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local 56 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 65 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 73 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 79 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
|
D | fma3-nr1fma1adj-x40.c | 48 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local 57 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 66 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 74 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 80 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
|
D | avx512f-nr1fma1adj-x96.c | 49 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local 60 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 70 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 80 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 87 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
|
D | fma3-nr1fma1adj-x48.c | 50 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local 61 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 71 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 81 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 88 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
|
D | avx512f-nr1fma1adj-x112.c | 51 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local 64 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 75 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 87 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 95 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
|
D | fma3-nr1fma1adj-x56.c | 52 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local 65 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 76 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 88 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 96 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
|
D | avx512f-nr1fma1adj-x128.c | 53 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local 68 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 80 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 94 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 103 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
|
D | fma3-nr1fma1adj-x64.c | 54 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local 69 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 81 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 95 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 104 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
|