Home
last modified time | relevance | path

Searched refs:vout_hi (Results 1 – 12 of 12) sorted by relevance

/external/XNNPACK/src/qs8-igemm/
DMRx8c8-avx2.c.in161 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); local
165 _mm_storeh_pi((__m64*) c3, _mm_castsi128_ps(vout_hi));
169 _mm_storel_epi64((__m128i*) c1, vout_hi);
181 *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout_hi, 2);
185 _mm_storeu_si32(c1, vout_hi);
192 vout_hi = _mm_srli_epi64(vout_hi, 32);
196 *((uint16_t*) c3) = (uint16_t) _mm_extract_epi16(vout_hi, 4);
200 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0);
207 vout_hi = _mm_srli_epi32(vout_hi, 16);
211 *c3 = (uint8_t) _mm_extract_epi8(vout_hi, 8);
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-xw-minmax-avx2.c155 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() local
159 _mm_storel_epi64((__m128i*) c1, vout_hi); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
171 _mm_storeu_si32(c1, vout_hi); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
177 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
181 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
187 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
191 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
D2x8c8-minmax-avx2.c159 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() local
163 _mm_storel_epi64((__m128i*) c1, vout_hi); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
175 _mm_storeu_si32(c1, vout_hi); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
181 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
185 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
191 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
195 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
D3x8c8-minmax-avx2.c193 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() local
197 _mm_storel_epi64((__m128i*) c1, vout_hi); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
212 _mm_storeu_si32(c1, vout_hi); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
220 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
224 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
232 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
236 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
D3x8c8-xw-minmax-avx2.c189 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() local
193 _mm_storel_epi64((__m128i*) c1, vout_hi); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
208 _mm_storeu_si32(c1, vout_hi); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
216 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
220 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
228 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
232 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
D1x8c8-xw-minmax-avx2.c124 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2() local
141 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
149 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
D1x8c8-minmax-avx2.c128 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2() local
145 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
153 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
/external/XNNPACK/src/qs8-gemm/
DMRx8c8-avx2.c.in162 __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
167 _mm_storel_epi64((__m128i*) c1, vout_hi);
171 _mm_storeh_pi((__m64*) c3, _mm_castsi128_ps(vout_hi));
184 _mm_storeu_si32(c1, vout_hi);
188 *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout_hi, 2);
194 vout_hi = _mm_srli_epi64(vout_hi, 32);
199 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0);
203 *((uint16_t*) c3) = (uint16_t) _mm_extract_epi16(vout_hi, 4);
209 vout_hi = _mm_srli_epi32(vout_hi, 16);
214 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0);
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-avx2.c174 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() local
177 _mm_storel_epi64((__m128i*) c1, vout_hi); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
188 _mm_storeu_si32(c1, vout_hi); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
195 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
198 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
205 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
208 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
D3x8c8-minmax-avx2.c210 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() local
214 _mm_storel_epi64((__m128i*) c1, vout_hi); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
227 _mm_storeu_si32(c1, vout_hi); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
235 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
239 *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
247 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
251 *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
D1x8c8-minmax-avx2.c141 __m128i vout_hi = _mm256_extracti128_si256(vout, 1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2() local
158 vout_hi = _mm_srli_epi64(vout_hi, 32); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
166 vout_hi = _mm_srli_epi32(vout_hi, 16); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
/external/XNNPACK/src/qu8-dwconv/
Dup8x9-minmax-sse2.c213 …const __m128i vout_hi = _mm_sub_epi32(_mm_sra_epi32(vq31prod_hi0123, vshift), _mm_cmpgt_epi32(vrem… in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() local
216 __m128i vout = _mm_adds_epi16(_mm_packs_epi32(vout_lo, vout_hi), voutput_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
363 …const __m128i vout_hi = _mm_sub_epi32(_mm_sra_epi32(vq31prod_hi0123, vshift), _mm_cmpgt_epi32(vrem… in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() local
366 __m128i vout = _mm_adds_epi16(_mm_packs_epi32(vout_lo, vout_hi), voutput_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()