• Home
  • Raw
  • Download

Lines Matching refs:sum

43   __m128i sum, ta, tb;  in inner_product_gint32_full_1_sse41()  local
46 sum = _mm_setzero_si128 (); in inner_product_gint32_full_1_sse41()
52 sum = in inner_product_gint32_full_1_sse41()
53 _mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_full_1_sse41()
55 sum = in inner_product_gint32_full_1_sse41()
56 _mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_full_1_sse41()
62 sum = in inner_product_gint32_full_1_sse41()
63 _mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_full_1_sse41()
65 sum = in inner_product_gint32_full_1_sse41()
66 _mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_full_1_sse41()
69 sum = _mm_add_epi64 (sum, _mm_unpackhi_epi64 (sum, sum)); in inner_product_gint32_full_1_sse41()
70 res = _mm_cvtsi128_si64 (sum); in inner_product_gint32_full_1_sse41()
82 __m128i sum[2], ta, tb; in inner_product_gint32_linear_1_sse41() local
88 sum[0] = sum[1] = _mm_setzero_si128 (); in inner_product_gint32_linear_1_sse41()
94 sum[0] = _mm_add_epi64 (sum[0], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_linear_1_sse41()
96 sum[0] = _mm_add_epi64 (sum[0], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_linear_1_sse41()
100 sum[1] = _mm_add_epi64 (sum[1], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_linear_1_sse41()
102 sum[1] = _mm_add_epi64 (sum[1], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_linear_1_sse41()
105 sum[0] = _mm_srli_epi64 (sum[0], PRECISION_S32); in inner_product_gint32_linear_1_sse41()
106 sum[1] = _mm_srli_epi64 (sum[1], PRECISION_S32); in inner_product_gint32_linear_1_sse41()
107 sum[0] = in inner_product_gint32_linear_1_sse41()
108 _mm_mul_epi32 (sum[0], _mm_shuffle_epi32 (f, _MM_SHUFFLE (0, 0, 0, 0))); in inner_product_gint32_linear_1_sse41()
109 sum[1] = in inner_product_gint32_linear_1_sse41()
110 _mm_mul_epi32 (sum[1], _mm_shuffle_epi32 (f, _MM_SHUFFLE (1, 1, 1, 1))); in inner_product_gint32_linear_1_sse41()
111 sum[0] = _mm_add_epi64 (sum[0], sum[1]); in inner_product_gint32_linear_1_sse41()
112 sum[0] = _mm_add_epi64 (sum[0], _mm_unpackhi_epi64 (sum[0], sum[0])); in inner_product_gint32_linear_1_sse41()
113 res = _mm_cvtsi128_si64 (sum[0]); in inner_product_gint32_linear_1_sse41()
125 __m128i sum[4], ta, tb; in inner_product_gint32_cubic_1_sse41() local
133 sum[0] = sum[1] = sum[2] = sum[3] = _mm_setzero_si128 (); in inner_product_gint32_cubic_1_sse41()
139 sum[0] = _mm_add_epi64 (sum[0], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
141 sum[0] = _mm_add_epi64 (sum[0], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
145 sum[1] = _mm_add_epi64 (sum[1], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
147 sum[1] = _mm_add_epi64 (sum[1], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
151 sum[2] = _mm_add_epi64 (sum[2], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
153 sum[2] = _mm_add_epi64 (sum[2], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
157 sum[3] = _mm_add_epi64 (sum[3], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
159 sum[3] = _mm_add_epi64 (sum[3], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta), in inner_product_gint32_cubic_1_sse41()
162 sum[0] = _mm_srli_epi64 (sum[0], PRECISION_S32); in inner_product_gint32_cubic_1_sse41()
163 sum[1] = _mm_srli_epi64 (sum[1], PRECISION_S32); in inner_product_gint32_cubic_1_sse41()
164 sum[2] = _mm_srli_epi64 (sum[2], PRECISION_S32); in inner_product_gint32_cubic_1_sse41()
165 sum[3] = _mm_srli_epi64 (sum[3], PRECISION_S32); in inner_product_gint32_cubic_1_sse41()
166 sum[0] = in inner_product_gint32_cubic_1_sse41()
167 _mm_mul_epi32 (sum[0], _mm_shuffle_epi32 (f, _MM_SHUFFLE (0, 0, 0, 0))); in inner_product_gint32_cubic_1_sse41()
168 sum[1] = in inner_product_gint32_cubic_1_sse41()
169 _mm_mul_epi32 (sum[1], _mm_shuffle_epi32 (f, _MM_SHUFFLE (1, 1, 1, 1))); in inner_product_gint32_cubic_1_sse41()
170 sum[2] = in inner_product_gint32_cubic_1_sse41()
171 _mm_mul_epi32 (sum[2], _mm_shuffle_epi32 (f, _MM_SHUFFLE (2, 2, 2, 2))); in inner_product_gint32_cubic_1_sse41()
172 sum[3] = in inner_product_gint32_cubic_1_sse41()
173 _mm_mul_epi32 (sum[3], _mm_shuffle_epi32 (f, _MM_SHUFFLE (3, 3, 3, 3))); in inner_product_gint32_cubic_1_sse41()
174 sum[0] = _mm_add_epi64 (sum[0], sum[1]); in inner_product_gint32_cubic_1_sse41()
175 sum[2] = _mm_add_epi64 (sum[2], sum[3]); in inner_product_gint32_cubic_1_sse41()
176 sum[0] = _mm_add_epi64 (sum[0], sum[2]); in inner_product_gint32_cubic_1_sse41()
177 sum[0] = _mm_add_epi64 (sum[0], _mm_unpackhi_epi64 (sum[0], sum[0])); in inner_product_gint32_cubic_1_sse41()
178 res = _mm_cvtsi128_si64 (sum[0]); in inner_product_gint32_cubic_1_sse41()