Home
last modified time | relevance | path

Searched refs:vsqrtx2 (Results 1 – 12 of 12) sorted by relevance

/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x48.c43 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local
48 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
55 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
59 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
63 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
Dfma3-nr1fma1adj-x24.c44 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local
49 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
56 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
60 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
64 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
Davx512f-nr1fma1adj-x64.c45 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
52 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
60 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
66 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
71 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
Dfma3-nr1fma1adj-x32.c46 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
53 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
61 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
67 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
72 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
Davx512f-nr1fma1adj-x80.c47 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local
56 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
65 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
73 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
79 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
Dfma3-nr1fma1adj-x40.c48 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local
57 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
66 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
74 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
80 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
Davx512f-nr1fma1adj-x96.c49 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local
60 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
70 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
80 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
87 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
Dfma3-nr1fma1adj-x48.c50 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local
61 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
71 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
81 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
88 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
Davx512f-nr1fma1adj-x112.c51 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local
64 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
75 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
87 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
95 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
Dfma3-nr1fma1adj-x56.c52 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local
65 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
76 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
88 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
96 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
Davx512f-nr1fma1adj-x128.c53 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local
68 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
80 vsqrtx2 = _mm512_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
94 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
103 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
Dfma3-nr1fma1adj-x64.c54 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local
69 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
81 vsqrtx2 = _mm256_fmadd_ps(vsqrtx2, vresidual2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
95 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
104 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()