Home
last modified time | relevance | path

Searched refs:vxb0 (Results 1 – 6 of 6) sorted by relevance

/external/XNNPACK/src/q8-gemm/
D4x4c2-sse2.c80 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_4x4c2__sse2() local
83 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
85 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
87 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
89 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
146 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_4x4c2__sse2() local
149 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
151 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
153 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
155 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_gemm_ukernel_4x4c2__sse2()
D2x2-scalar.c56 const int32_t vxb0 = (int32_t) vb0 - vb_zero_point; in xnn_q8_gemm_ukernel_2x2__scalar() local
59 vacc0x0 += va0 * vxb0; in xnn_q8_gemm_ukernel_2x2__scalar()
61 vacc1x0 += va1 * vxb0; in xnn_q8_gemm_ukernel_2x2__scalar()
D2x4c8-sse2.c85 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_2x4c8__sse2() local
94 vacc00 = _mm_add_epi32(vacc00, _mm_madd_epi16(vxa0, vxb0)); in xnn_q8_gemm_ukernel_2x4c8__sse2()
98 vacc10 = _mm_add_epi32(vacc10, _mm_madd_epi16(vxa1, vxb0)); in xnn_q8_gemm_ukernel_2x4c8__sse2()
/external/XNNPACK/src/q8-igemm/
D4x4c2-sse2.c97 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_igemm_ukernel_4x4c2__sse2() local
98 … _mm_add_epi32(vacc0x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
99 … _mm_add_epi32(vacc1x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
100 … _mm_add_epi32(vacc2x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
101 … _mm_add_epi32(vacc3x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
139 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_igemm_ukernel_4x4c2__sse2() local
142 … _mm_add_epi32(vacc0x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
143 … _mm_add_epi32(vacc1x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
144 … _mm_add_epi32(vacc2x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
145 … _mm_add_epi32(vacc3x0123, _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_q8_igemm_ukernel_4x4c2__sse2()
D2x2-scalar.c69 const int32_t vxb0 = (int32_t) vb0 - vb_zero_point; in xnn_q8_igemm_ukernel_2x2__scalar() local
72 vacc0x0 += va0 * vxb0; in xnn_q8_igemm_ukernel_2x2__scalar()
74 vacc1x0 += va1 * vxb0; in xnn_q8_igemm_ukernel_2x2__scalar()
/external/XNNPACK/src/q8-vadd/
Dneon.c40 const int16x8_t vxb0 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(vb01), vb_zero_point)); in xnn_q8_vadd_ukernel__neon() local
58 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
62 vacc0_hi = vmlaq_s32(vacc0_hi, vmovl_high_s16(vxb0), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
110 const int16x8_t vxb0 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(vb01), vb_zero_point)); in xnn_q8_vadd_ukernel__neon() local
123 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
125 vacc0_hi = vmlaq_s32(vacc0_hi, vmovl_s16(vget_high_s16(vxb0)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()