• Home
  • Raw
  • Download

Lines Matching refs:sum

34   __m128 sum = _mm_setzero_ps ();  in inner_product_gfloat_full_1_sse()  local
37 sum = in inner_product_gfloat_full_1_sse()
38 _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 0), in inner_product_gfloat_full_1_sse()
40 sum = in inner_product_gfloat_full_1_sse()
41 _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 4), in inner_product_gfloat_full_1_sse()
44 sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum)); in inner_product_gfloat_full_1_sse()
45 sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55)); in inner_product_gfloat_full_1_sse()
46 _mm_store_ss (o, sum); in inner_product_gfloat_full_1_sse()
54 __m128 sum[2], t; in inner_product_gfloat_linear_1_sse() local
59 sum[0] = sum[1] = _mm_setzero_ps (); in inner_product_gfloat_linear_1_sse()
63 sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i + 0))); in inner_product_gfloat_linear_1_sse()
64 sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i + 0))); in inner_product_gfloat_linear_1_sse()
66 sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i + 4))); in inner_product_gfloat_linear_1_sse()
67 sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i + 4))); in inner_product_gfloat_linear_1_sse()
69 sum[0] = _mm_mul_ps (_mm_sub_ps (sum[0], sum[1]), _mm_load1_ps (icoeff)); in inner_product_gfloat_linear_1_sse()
70 sum[0] = _mm_add_ps (sum[0], sum[1]); in inner_product_gfloat_linear_1_sse()
71 sum[0] = _mm_add_ps (sum[0], _mm_movehl_ps (sum[0], sum[0])); in inner_product_gfloat_linear_1_sse()
72 sum[0] = _mm_add_ss (sum[0], _mm_shuffle_ps (sum[0], sum[0], 0x55)); in inner_product_gfloat_linear_1_sse()
73 _mm_store_ss (o, sum[0]); in inner_product_gfloat_linear_1_sse()
81 __m128 sum[4]; in inner_product_gfloat_cubic_1_sse() local
89 sum[0] = sum[1] = sum[2] = sum[3] = _mm_setzero_ps (); in inner_product_gfloat_cubic_1_sse()
93 sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i))); in inner_product_gfloat_cubic_1_sse()
94 sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i))); in inner_product_gfloat_cubic_1_sse()
95 sum[2] = _mm_add_ps (sum[2], _mm_mul_ps (t, _mm_load_ps (c[2] + i))); in inner_product_gfloat_cubic_1_sse()
96 sum[3] = _mm_add_ps (sum[3], _mm_mul_ps (t, _mm_load_ps (c[3] + i))); in inner_product_gfloat_cubic_1_sse()
98 sum[0] = _mm_mul_ps (sum[0], _mm_shuffle_ps (f, f, 0x00)); in inner_product_gfloat_cubic_1_sse()
99 sum[1] = _mm_mul_ps (sum[1], _mm_shuffle_ps (f, f, 0x55)); in inner_product_gfloat_cubic_1_sse()
100 sum[2] = _mm_mul_ps (sum[2], _mm_shuffle_ps (f, f, 0xaa)); in inner_product_gfloat_cubic_1_sse()
101 sum[3] = _mm_mul_ps (sum[3], _mm_shuffle_ps (f, f, 0xff)); in inner_product_gfloat_cubic_1_sse()
102 sum[0] = _mm_add_ps (sum[0], sum[1]); in inner_product_gfloat_cubic_1_sse()
103 sum[2] = _mm_add_ps (sum[2], sum[3]); in inner_product_gfloat_cubic_1_sse()
104 sum[0] = _mm_add_ps (sum[0], sum[2]); in inner_product_gfloat_cubic_1_sse()
105 sum[0] = _mm_add_ps (sum[0], _mm_movehl_ps (sum[0], sum[0])); in inner_product_gfloat_cubic_1_sse()
106 sum[0] = _mm_add_ss (sum[0], _mm_shuffle_ps (sum[0], sum[0], 0x55)); in inner_product_gfloat_cubic_1_sse()
107 _mm_store_ss (o, sum[0]); in inner_product_gfloat_cubic_1_sse()