• Home
  • Raw
  • Download

Lines Matching refs:__m256

29   const __m256 vhalf = _mm256_broadcast_ss(&params->fma.half);  in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
39 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
40 const __m256 vrsqrtx1 = _mm256_rsqrt_ps(vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
41 const __m256 vrsqrtx2 = _mm256_rsqrt_ps(vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
42 const __m256 vrsqrtx3 = _mm256_rsqrt_ps(vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
43 const __m256 vrsqrtx4 = _mm256_rsqrt_ps(vx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
44 const __m256 vrsqrtx5 = _mm256_rsqrt_ps(vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
46 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
47 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
48 __m256 vsqrtx1 = _mm256_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
49 __m256 vhalfrsqrtx1 = _mm256_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
50 __m256 vsqrtx2 = _mm256_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
51 __m256 vhalfrsqrtx2 = _mm256_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
52 __m256 vsqrtx3 = _mm256_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
53 __m256 vhalfrsqrtx3 = _mm256_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
54 __m256 vsqrtx4 = _mm256_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
55 __m256 vhalfrsqrtx4 = _mm256_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
56 __m256 vsqrtx5 = _mm256_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
57 __m256 vhalfrsqrtx5 = _mm256_mul_ps(vrsqrtx5, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
59 const __m256 vresidual0 = _mm256_fnmadd_ps(vsqrtx0, vhalfrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
60 const __m256 vresidual1 = _mm256_fnmadd_ps(vsqrtx1, vhalfrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
61 const __m256 vresidual2 = _mm256_fnmadd_ps(vsqrtx2, vhalfrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
62 const __m256 vresidual3 = _mm256_fnmadd_ps(vsqrtx3, vhalfrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
63 const __m256 vresidual4 = _mm256_fnmadd_ps(vsqrtx4, vhalfrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
64 const __m256 vresidual5 = _mm256_fnmadd_ps(vsqrtx5, vhalfrsqrtx5, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
79 const __m256 vadjustment0 = _mm256_fnmadd_ps(vsqrtx0, vsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
80 const __m256 vadjustment1 = _mm256_fnmadd_ps(vsqrtx1, vsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
81 const __m256 vadjustment2 = _mm256_fnmadd_ps(vsqrtx2, vsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
82 const __m256 vadjustment3 = _mm256_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
83 const __m256 vadjustment4 = _mm256_fnmadd_ps(vsqrtx4, vsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
84 const __m256 vadjustment5 = _mm256_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
86 const __m256 vy0 = _mm256_fmadd_ps(vhalfrsqrtx0, vadjustment0, vsqrtx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
87 const __m256 vy1 = _mm256_fmadd_ps(vhalfrsqrtx1, vadjustment1, vsqrtx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
88 const __m256 vy2 = _mm256_fmadd_ps(vhalfrsqrtx2, vadjustment2, vsqrtx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
89 const __m256 vy3 = _mm256_fmadd_ps(vhalfrsqrtx3, vadjustment3, vsqrtx3); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
90 const __m256 vy4 = _mm256_fmadd_ps(vhalfrsqrtx4, vadjustment4, vsqrtx4); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
91 const __m256 vy5 = _mm256_fmadd_ps(vhalfrsqrtx5, vadjustment5, vsqrtx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
102 const __m256 vx = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
105 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
106 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
107 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
108 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
111 const __m256 vadjustment = _mm256_fnmadd_ps(vsqrtx, vsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
112 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
122 const __m256 vx = _mm256_maskload_ps(x, vmask); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
124 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
125 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
126 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
127 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
130 const __m256 vadjustment = _mm256_fnmadd_ps(vsqrtx, vsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
131 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()