Home
last modified time | relevance | path

Searched refs:vsqrtx3 (Results 1 – 10 of 10) sorted by relevance

/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x64.c47 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
53 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
62 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
67 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
72 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
Dfma3-nr1fma1adj-x32.c46 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
52 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
61 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
66 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
71 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
Davx512f-nr1fma1adj-x80.c49 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local
57 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
67 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
74 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
80 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
Dfma3-nr1fma1adj-x40.c48 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local
56 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
66 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
73 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
79 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
Davx512f-nr1fma1adj-x96.c51 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local
61 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
72 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
81 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
88 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
Dfma3-nr1fma1adj-x48.c50 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local
60 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
71 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
80 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
87 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
Davx512f-nr1fma1adj-x112.c53 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local
65 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
77 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
88 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
96 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
Dfma3-nr1fma1adj-x56.c52 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local
64 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
76 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
87 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
95 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
Davx512f-nr1fma1adj-x128.c55 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local
69 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
82 vsqrtx3 = _mm512_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
95 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
104 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
Dfma3-nr1fma1adj-x64.c54 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local
68 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
81 vsqrtx3 = _mm256_fmadd_ps(vsqrtx3, vresidual3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
94 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
103 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()