Home
last modified time | relevance | path

Searched refs:vxb3 (Results 1 – 25 of 119) sorted by relevance

12345

/external/XNNPACK/src/qs8-gemm/gen/
D3x4c8-minmax-xop-ld128.c101 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() local
104 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
106 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
108 vacc2x3 = _mm_maddd_epi16(vxa2, vxb3, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
D3x4c8-minmax-sse41-ld128.c96 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() local
99 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
101 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
103 vacc2x3 = _mm_add_epi32(vacc2x3, _mm_madd_epi16(vxa2, vxb3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
D3x4c8-minmax-xop-ld64.c106 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() local
108 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
109 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
110 vacc2x3 = _mm_maddd_epi16(vxa2, vxb3, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
D3x4c8-xw-minmax-xop.c102 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() local
104 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
105 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
106 vacc2x3 = _mm_maddd_epi16(vxa2, vxb3, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
D3x4c8-minmax-sse41-ld64.c101 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() local
103 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
104 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
105 vacc2x3 = _mm_add_epi32(vacc2x3, _mm_madd_epi16(vxa2, vxb3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
D3x4c8-xw-minmax-sse41.c97 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() local
99 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
100 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
101 vacc2x3 = _mm_add_epi32(vacc2x3, _mm_madd_epi16(vxa2, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
D4x4c2-minmax-xop-ld128.c113 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() local
125 _mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc0x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
127 _mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc1x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
129 _mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc2x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
131 _mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc3x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
D4x4c2-minmax-xop-ld64.c122 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() local
125 _mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc0x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
127 _mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc1x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
129 _mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc2x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
131 _mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc3x0123); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
D4x4c2-xw-minmax-xop.c118 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() local
121 _mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc0x0123); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
123 _mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc1x0123); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
125 _mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc2x0123); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
127 _mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc3x0123); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
D4x4c2-xw-minmax-sse41.c113 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() local
116 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
118 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
120 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
122 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
D2x4c8-xw-minmax-xop.c86 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local
88 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
89 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
D4x4c2-minmax-sse41-ld128.c108 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() local
120 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
122 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
124 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
126 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
D4x4c2-minmax-sse41-ld64.c117 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() local
120 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
122 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
124 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
126 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
D2x4c8-minmax-sse41-ld64.c85 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local
87 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
88 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
D2x4c8-xw-minmax-sse41.c81 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() local
83 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
84 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
D2x4c8-minmax-xop-ld128.c86 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() local
89 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
91 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-xop-ld64.c121 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() local
123 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
124 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
125 vacc2x3 = _mm_maddd_epi16(vxa2, vxb3, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
D3x4c8-minmax-xop-ld128.c116 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() local
119 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
121 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
123 vacc2x3 = _mm_maddd_epi16(vxa2, vxb3, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
D3x4c8-minmax-sse41-ld64.c116 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() local
118 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
119 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
120 vacc2x3 = _mm_add_epi32(vacc2x3, _mm_madd_epi16(vxa2, vxb3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
D3x4c8-minmax-sse41-ld128.c111 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() local
114 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
116 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
118 vacc2x3 = _mm_add_epi32(vacc2x3, _mm_madd_epi16(vxa2, vxb3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
D4x4c2-minmax-sse41-ld128.c125 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() local
137 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
139 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
141 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
143 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
D4x4c2-minmax-sse41-ld64.c134 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() local
137 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
139 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
141 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
143 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
D4x4c2-minmax-xop-ld128.c130 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() local
142 _mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc0x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
144 _mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc1x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
146 _mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc2x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
148 _mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc3x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
D4x4c2-minmax-xop-ld64.c139 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() local
142 _mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc0x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
144 _mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc1x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
146 _mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc2x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
148 _mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3, vacc3x0123); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
D2x4c8-minmax-xop-ld64.c103 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local
105 vacc0x3 = _mm_maddd_epi16(vxa0, vxb3, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
106 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()

12345