Lines Matching refs:wrapper
111 using TIAcc = wrapper::traits::promote_t<T>; in run_internal()
112 using TAcc = wrapper::traits::promote_t<TIAcc>; in run_internal()
126 auto vsum_row = wrapper::vdup_n(static_cast<TAcc>(0), wrapper::traits::vector_128_tag{}); in run_internal()
139 const auto a0_d8 = wrapper::vloadq(matrix_a + i); in run_internal()
142 const auto tmp_sum0 = wrapper::vaddl(wrapper::vgetlow(a0_d8), wrapper::vgethigh(a0_d8)); in run_internal()
145 vsum_row = wrapper::vadd(vsum_row, wrapper::vpaddl(tmp_sum0)); in run_internal()
156 sum_row += wrapper::vaddv(vsum_row); in run_internal()
158 auto tmp = wrapper::vpadd(wrapper::vgethigh(vsum_row), wrapper::vgetlow(vsum_row)); in run_internal()
159 tmp = wrapper::vpadd(tmp, tmp); in run_internal()
161 sum_row += wrapper::vgetlane(tmp, 0); in run_internal()
236 using TIAcc = wrapper::traits::promote_t<T>; in run_internal()
237 using TAcc = wrapper::traits::promote_t<TIAcc>; in run_internal()
240 …const auto vec_scalar = wrapper::vdup_n(static_cast<TAcc>(_scalar), wrapper::traits::vector_… in run_internal()
269 … typename wrapper::traits::neon_bitvector<TAcc, wrapper::traits::BitWidth::W128>::type sum_col[4] = in run_internal()
271 wrapper::vdup_n(static_cast<TAcc>(0), wrapper::traits::vector_128_tag{}), in run_internal()
272 wrapper::vdup_n(static_cast<TAcc>(0), wrapper::traits::vector_128_tag{}), in run_internal()
273 wrapper::vdup_n(static_cast<TAcc>(0), wrapper::traits::vector_128_tag{}), in run_internal()
274 wrapper::vdup_n(static_cast<TAcc>(0), wrapper::traits::vector_128_tag{}) in run_internal()
288 const auto b0_u8 = wrapper::vloadq(matrix_b + 0 * in_b_stride); in run_internal()
289 const auto b1_u8 = wrapper::vloadq(matrix_b + 1 * in_b_stride); in run_internal()
290 const auto b2_u8 = wrapper::vloadq(matrix_b + 2 * in_b_stride); in run_internal()
291 const auto b3_u8 = wrapper::vloadq(matrix_b + 3 * in_b_stride); in run_internal()
301 …typename wrapper::traits::neon_bitvector<TIAcc, wrapper::traits::BitWidth::W128>::type tmp_sum[2] = in run_internal()
303 wrapper::vdup_n(static_cast<TIAcc>(0), wrapper::traits::vector_128_tag{}), in run_internal()
304 wrapper::vdup_n(static_cast<TIAcc>(0), wrapper::traits::vector_128_tag{}) in run_internal()
307 tmp_sum[0] = wrapper::vaddw(tmp_sum[0], wrapper::vgetlow(b1_u8)); in run_internal()
308 tmp_sum[0] = wrapper::vaddw(tmp_sum[0], wrapper::vgetlow(b0_u8)); in run_internal()
309 tmp_sum[0] = wrapper::vaddw(tmp_sum[0], wrapper::vgetlow(b2_u8)); in run_internal()
310 tmp_sum[0] = wrapper::vaddw(tmp_sum[0], wrapper::vgetlow(b3_u8)); in run_internal()
311 tmp_sum[1] = wrapper::vaddw(tmp_sum[1], wrapper::vgethigh(b0_u8)); in run_internal()
312 tmp_sum[1] = wrapper::vaddw(tmp_sum[1], wrapper::vgethigh(b1_u8)); in run_internal()
313 tmp_sum[1] = wrapper::vaddw(tmp_sum[1], wrapper::vgethigh(b2_u8)); in run_internal()
314 tmp_sum[1] = wrapper::vaddw(tmp_sum[1], wrapper::vgethigh(b3_u8)); in run_internal()
317 sum_col[0] = wrapper::vaddw(sum_col[0], wrapper::vgetlow(tmp_sum[0])); in run_internal()
318 sum_col[1] = wrapper::vaddw(sum_col[1], wrapper::vgethigh(tmp_sum[0])); in run_internal()
319 sum_col[2] = wrapper::vaddw(sum_col[2], wrapper::vgetlow(tmp_sum[1])); in run_internal()
320 sum_col[3] = wrapper::vaddw(sum_col[3], wrapper::vgethigh(tmp_sum[1])); in run_internal()
328 const auto b0_b8 = wrapper::vloadq(matrix_b + 0 * in_b_stride); in run_internal()
331 …const typename wrapper::traits::neon_bitvector<TIAcc, wrapper::traits::BitWidth::W128>::type b0_b1… in run_internal()
333 wrapper::vmovl(wrapper::vgetlow(b0_b8)), in run_internal()
334 wrapper::vmovl(wrapper::vgethigh(b0_b8)) in run_internal()
338 sum_col[0] = wrapper::vaddw(sum_col[0], wrapper::vgetlow(b0_b16[0])); in run_internal()
339 sum_col[1] = wrapper::vaddw(sum_col[1], wrapper::vgethigh(b0_b16[0])); in run_internal()
340 sum_col[2] = wrapper::vaddw(sum_col[2], wrapper::vgetlow(b0_b16[1])); in run_internal()
341 sum_col[3] = wrapper::vaddw(sum_col[3], wrapper::vgethigh(b0_b16[1])); in run_internal()
349 sum_col[0] = wrapper::vmul(sum_col[0], vec_scalar); in run_internal()
350 sum_col[1] = wrapper::vmul(sum_col[1], vec_scalar); in run_internal()
351 sum_col[2] = wrapper::vmul(sum_col[2], vec_scalar); in run_internal()
352 sum_col[3] = wrapper::vmul(sum_col[3], vec_scalar); in run_internal()
358 wrapper::vstore(vector_sum_col + 0, wrapper::vreinterpret(sum_col[0])); in run_internal()
359 wrapper::vstore(vector_sum_col + 4, wrapper::vreinterpret(sum_col[1])); in run_internal()
360 wrapper::vstore(vector_sum_col + 8, wrapper::vreinterpret(sum_col[2])); in run_internal()
361 wrapper::vstore(vector_sum_col + 12, wrapper::vreinterpret(sum_col[3])); in run_internal()