Lines Matching refs:__m512
28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
38 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
39 const __m512 vrsqrtx1 = _mm512_rsqrt14_ps(vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
40 const __m512 vrsqrtx2 = _mm512_rsqrt14_ps(vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
41 const __m512 vrsqrtx3 = _mm512_rsqrt14_ps(vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
42 const __m512 vrsqrtx4 = _mm512_rsqrt14_ps(vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
43 const __m512 vrsqrtx5 = _mm512_rsqrt14_ps(vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
45 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
46 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
47 __m512 vsqrtx1 = _mm512_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
48 __m512 vhalfrsqrtx1 = _mm512_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
49 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
50 __m512 vhalfrsqrtx2 = _mm512_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
51 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
52 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
53 __m512 vsqrtx4 = _mm512_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
54 __m512 vhalfrsqrtx4 = _mm512_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
55 __m512 vsqrtx5 = _mm512_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
56 __m512 vhalfrsqrtx5 = _mm512_mul_ps(vrsqrtx5, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
58 const __m512 vresidual0 = _mm512_fnmadd_ps(vsqrtx0, vhalfrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
59 const __m512 vresidual1 = _mm512_fnmadd_ps(vsqrtx1, vhalfrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
60 const __m512 vresidual2 = _mm512_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
61 const __m512 vresidual3 = _mm512_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
62 const __m512 vresidual4 = _mm512_fnmadd_ps(vsqrtx4, vhalfrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
63 const __m512 vresidual5 = _mm512_fnmadd_ps(vsqrtx5, vhalfrsqrtx5, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
78 const __m512 vadjustment0 = _mm512_fnmadd_ps(vsqrtx0, vsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
79 const __m512 vadjustment1 = _mm512_fnmadd_ps(vsqrtx1, vsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
80 const __m512 vadjustment2 = _mm512_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
81 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
82 const __m512 vadjustment4 = _mm512_fnmadd_ps(vsqrtx4, vsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
83 const __m512 vadjustment5 = _mm512_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
85 const __m512 vy0 = _mm512_fmadd_ps(vhalfrsqrtx0, vadjustment0, vsqrtx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
86 const __m512 vy1 = _mm512_fmadd_ps(vhalfrsqrtx1, vadjustment1, vsqrtx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
87 const __m512 vy2 = _mm512_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
88 const __m512 vy3 = _mm512_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
89 const __m512 vy4 = _mm512_fmadd_ps(vhalfrsqrtx4, vadjustment4, vsqrtx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
90 const __m512 vy5 = _mm512_fmadd_ps(vhalfrsqrtx5, vadjustment5, vsqrtx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
101 const __m512 vx = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
104 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
105 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
106 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
107 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
110 const __m512 vadjustment = _mm512_fnmadd_ps(vsqrtx, vsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
111 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
123 const __m512 vx = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
124 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
125 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
126 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
127 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
130 const __m512 vadjustment = _mm512_fnmadd_ps(vsqrtx, vsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
131 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()