• Home
  • Raw
  • Download

Lines Matching refs:__m256

29   const __m256 vhalf = _mm256_broadcast_ss(&params->fma.half);  in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
40 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
41 const __m256 vrsqrtx1 = _mm256_rsqrt_ps(vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
42 const __m256 vrsqrtx2 = _mm256_rsqrt_ps(vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
43 const __m256 vrsqrtx3 = _mm256_rsqrt_ps(vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
44 const __m256 vrsqrtx4 = _mm256_rsqrt_ps(vx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
45 const __m256 vrsqrtx5 = _mm256_rsqrt_ps(vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
46 const __m256 vrsqrtx6 = _mm256_rsqrt_ps(vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
48 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
49 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
50 __m256 vsqrtx1 = _mm256_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
51 __m256 vhalfrsqrtx1 = _mm256_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
52 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
53 __m256 vhalfrsqrtx2 = _mm256_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
54 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
55 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
56 __m256 vsqrtx4 = _mm256_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
57 __m256 vhalfrsqrtx4 = _mm256_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
58 __m256 vsqrtx5 = _mm256_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
59 __m256 vhalfrsqrtx5 = _mm256_mul_ps(vrsqrtx5, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
60 __m256 vsqrtx6 = _mm256_mul_ps(vrsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
61 __m256 vhalfrsqrtx6 = _mm256_mul_ps(vrsqrtx6, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
63 const __m256 vresidual0 = _mm256_fnmadd_ps(vsqrtx0, vhalfrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
64 const __m256 vresidual1 = _mm256_fnmadd_ps(vsqrtx1, vhalfrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
65 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
66 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
67 const __m256 vresidual4 = _mm256_fnmadd_ps(vsqrtx4, vhalfrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
68 const __m256 vresidual5 = _mm256_fnmadd_ps(vsqrtx5, vhalfrsqrtx5, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
69 const __m256 vresidual6 = _mm256_fnmadd_ps(vsqrtx6, vhalfrsqrtx6, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
86 const __m256 vadjustment0 = _mm256_fnmadd_ps(vsqrtx0, vsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
87 const __m256 vadjustment1 = _mm256_fnmadd_ps(vsqrtx1, vsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
88 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
89 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
90 const __m256 vadjustment4 = _mm256_fnmadd_ps(vsqrtx4, vsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
91 const __m256 vadjustment5 = _mm256_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
92 const __m256 vadjustment6 = _mm256_fnmadd_ps(vsqrtx6, vsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
94 const __m256 vy0 = _mm256_fmadd_ps(vhalfrsqrtx0, vadjustment0, vsqrtx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
95 const __m256 vy1 = _mm256_fmadd_ps(vhalfrsqrtx1, vadjustment1, vsqrtx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
96 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
97 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
98 const __m256 vy4 = _mm256_fmadd_ps(vhalfrsqrtx4, vadjustment4, vsqrtx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
99 const __m256 vy5 = _mm256_fmadd_ps(vhalfrsqrtx5, vadjustment5, vsqrtx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
100 const __m256 vy6 = _mm256_fmadd_ps(vhalfrsqrtx6, vadjustment6, vsqrtx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
112 const __m256 vx = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
115 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
116 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
117 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
118 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
121 const __m256 vadjustment = _mm256_fnmadd_ps(vsqrtx, vsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
122 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
132 const __m256 vx = _mm256_maskload_ps(x, vmask); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
134 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
135 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
136 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
137 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
140 const __m256 vadjustment = _mm256_fnmadd_ps(vsqrtx, vsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
141 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()