Home
last modified time | relevance | path

Searched refs:vacc_hi (Results 1 – 12 of 12) sorted by relevance

/external/XNNPACK/src/qu8-vadd/
Dminmax-sse2.c53 …__m128i vacc_hi = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(va_product_lo, va_product_… in xnn_qu8_vadd_minmax_ukernel__sse2() local
56 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vb_product_lo, vb_product_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2()
62 …_mm_add_epi32(_mm_and_si128(vacc_hi, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_h… in xnn_qu8_vadd_minmax_ukernel__sse2()
65vacc_hi = _mm_sub_epi32(_mm_sra_epi32(vacc_hi, vshift), _mm_cmpgt_epi32(vrem_hi, vremainder_thresh… in xnn_qu8_vadd_minmax_ukernel__sse2()
69 const __m128i vacc = _mm_adds_epi16(_mm_packs_epi32(vacc_lo, vacc_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__sse2()
95 …__m128i vacc_hi = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(va_product_lo, va_product_… in xnn_qu8_vadd_minmax_ukernel__sse2() local
98 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vb_product_lo, vb_product_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2()
104 …_mm_add_epi32(_mm_and_si128(vacc_hi, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_h… in xnn_qu8_vadd_minmax_ukernel__sse2()
107vacc_hi = _mm_sub_epi32(_mm_sra_epi32(vacc_hi, vshift), _mm_cmpgt_epi32(vrem_hi, vremainder_thresh… in xnn_qu8_vadd_minmax_ukernel__sse2()
111 const __m128i vacc = _mm_adds_epi16(_mm_packs_epi32(vacc_lo, vacc_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__sse2()
Dminmax-neon.c161 int32x4_t vacc_hi = vmulq_s32(vmovl_high_s16(vxa), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() local
163 int32x4_t vacc_hi = vmulq_s32(vmovl_s16(vget_high_s16(vxa)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() local
168 vacc_hi = vmlaq_s32(vacc_hi, vmovl_high_s16(vxb), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
170 vacc_hi = vmlaq_s32(vacc_hi, vmovl_s16(vget_high_s16(vxb)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
175 vacc_hi = vsraq_n_s32(vacc_hi, vbicq_s32(vacc_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
178 vacc_hi = vrshlq_s32(vacc_hi, vright_shift); in xnn_qu8_vadd_minmax_ukernel__neon()
182 const int16x8_t vacc = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc_lo), vacc_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon()
184 …const int16x8_t vacc = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc_lo), vqmovn_s32(vacc_hi)), vy_zero_… in xnn_qu8_vadd_minmax_ukernel__neon()
204 int32x4_t vacc_hi = vmulq_s32(vmovl_high_s16(vxa), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() local
206 int32x4_t vacc_hi = vmulq_s32(vmovl_s16(vget_high_s16(vxa)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() local
[all …]
/external/XNNPACK/src/qu8-dwconv/
Dup8x9-minmax-sse2.c73 __m128i vacc_hi = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() local
82 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
91 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod1_odd, vprod1_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
100 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod2_odd, vprod2_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
109 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod3_odd, vprod3_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
118 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod4_odd, vprod4_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
127 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod5_odd, vprod5_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
136 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod6_odd, vprod6_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
145 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod7_odd, vprod7_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
154 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod8_odd, vprod8_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
[all …]
Dup8x9-minmax-neon.c144 int32x4_t vacc_hi = vaddq_s32(vaccX0_hi, vaccX1_hi); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() local
147 vacc_hi = vqrdmulhq_s32(vacc_hi, vmultiplier); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
151 vacc_hi = vsraq_n_s32(vacc_hi, vbicq_s32(vacc_hi, vzero_shift_mask), 31); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
154 vacc_hi = vrshlq_s32(vacc_hi, vright_shift); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
157 …const int16x8_t vacc = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc_lo), vacc_hi), voutput_zero_poin… in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
159 …const int16x8_t vacc = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc_lo), vqmovn_s32(vacc_hi)), voutput_… in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
235 int32x4_t vacc_hi = vaddq_s32(vaccX0_hi, vaccX1_hi); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() local
238 vacc_hi = vqrdmulhq_s32(vacc_hi, vmultiplier); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
242 vacc_hi = vsraq_n_s32(vacc_hi, vbicq_s32(vacc_hi, vzero_shift_mask), 31); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
245 vacc_hi = vrshlq_s32(vacc_hi, vright_shift); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
[all …]
/external/XNNPACK/src/qu8-gavgpool/
D7p7x-minmax-neon-c8.c62 const int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
65 vst1q_s32(acc, vacc_hi); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
87 const int32x4_t vacc_hi = vld1q_s32(acc + 4); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
99 vst1q_s32(acc, vaddw_s16(vacc_hi, vget_high_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
149 int32x4_t vacc_hi = vld1q_s32(acc); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
160 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
163 const int32x4_t vneg_mask_hi = vreinterpretq_s32_u32(vcltq_s32(vacc_hi, vmovq_n_s32(0))); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
168 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
169 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
178 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
[all …]
D7x-minmax-neon-c8.c84 int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() local
87 const int32x4_t vneg_mask_hi = vreinterpretq_s32_u32(vcltq_s32(vacc_hi, vmovq_n_s32(0))); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
92 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
93 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
102 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
103 const int64x2_t vproduct67 = vmull_s32(vget_high_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
118vacc_hi = vuzp1q_s32(vreinterpretq_s32_s64(vscaled_acc45), vreinterpretq_s32_s64(vscaled_acc67)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
120 …const int16x8_t vacc = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc_lo), vacc_hi), voutput_zero_poin… in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
123 vacc_hi = vcombine_s32(vmovn_s64(vscaled_acc45), vmovn_s64(vscaled_acc67)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
125 …const int16x8_t vacc = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc_lo), vqmovn_s32(vacc_hi)), voutput_… in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
[all …]
D7p7x-minmax-sse2-c8.c69 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() local
72 _mm_store_si128((__m128i*) acc + 1, vacc_hi); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
94 __m128i vacc_hi = _mm_load_si128((const __m128i*) acc + 1); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() local
113 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
116 _mm_store_si128((__m128i*) acc + 1, vacc_hi); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
161 __m128i vacc_hi = _mm_load_si128((const __m128i*) acc + 1); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() local
181 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
184 const __m128i vneg_mask_hi = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_hi); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
187 const __m128i vabs_hi0123 = _mm_sub_epi32(_mm_xor_si128(vacc_hi, vneg_mask_hi), vneg_mask_hi); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
233 __m128i vacc_hi = _mm_load_si128((const __m128i*) acc + 1); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() local
[all …]
D7x-minmax-sse2-c8.c87 __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local
90 const __m128i vneg_mask_hi = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
93 const __m128i vabs_hi0123 = _mm_sub_epi32(_mm_xor_si128(vacc_hi, vneg_mask_hi), vneg_mask_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
156 __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local
159 const __m128i vneg_mask_hi = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
162 const __m128i vabs_hi0123 = _mm_sub_epi32(_mm_xor_si128(vacc_hi, vneg_mask_hi), vneg_mask_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
/external/XNNPACK/src/qu8-avgpool/
D9p8x-minmax-neon-c8.c115 const int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
118 vst1q_s32(b, vacc_hi); b += 4; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
176 int32x4_t vacc_hi = vld1q_s32(b + 4); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
188 vacc_hi = vaddw_s16(vacc_hi, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
191 vst1q_s32(b, vacc_hi); b += 4; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
271 int32x4_t vacc_hi = vld1q_s32(b); b += 4; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
283 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
286 const int32x4_t vneg_mask_hi = vreinterpretq_s32_u32(vcltq_s32(vacc_hi, vmovq_n_s32(0))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
291 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
292 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
[all …]
D9x-minmax-neon-c8.c139 int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() local
142 const int32x4_t vneg_mask_hi = vreinterpretq_s32_u32(vcltq_s32(vacc_hi, vmovq_n_s32(0))); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
147 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
148 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
157 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
158 const int64x2_t vproduct67 = vmull_s32(vget_high_s32(vacc_hi), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
173vacc_hi = vuzp1q_s32(vreinterpretq_s32_s64(vscaled_acc45), vreinterpretq_s32_s64(vscaled_acc67)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
175 …const int16x8_t vacc = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc_lo), vacc_hi), voutput_zero_poin… in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
178 vacc_hi = vcombine_s32(vmovn_s64(vscaled_acc45), vmovn_s64(vscaled_acc67)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
180 …const int16x8_t vacc = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc_lo), vqmovn_s32(vacc_hi)), voutput_… in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
[all …]
D9p8x-minmax-sse2-c8.c119 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
122 _mm_store_si128((__m128i*) b + 1, vacc_hi); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
181 __m128i vacc_hi = _mm_load_si128((const __m128i*) b + 1); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
202 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
205 _mm_store_si128((__m128i*) b + 1, vacc_hi); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
286 __m128i vacc_hi = _mm_load_si128((const __m128i*) b + 1); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
308 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
311 const __m128i vneg_mask_hi = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_hi); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
314 … const __m128i vabs_hi0123 = _mm_sub_epi32(_mm_xor_si128(vacc_hi, vneg_mask_hi), vneg_mask_hi); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
362 __m128i vacc_hi = _mm_load_si128((const __m128i*) b + 1); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
[all …]
D9x-minmax-sse2-c8.c143 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() local
146 const __m128i vneg_mask_hi = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_hi); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()
149 const __m128i vabs_hi0123 = _mm_sub_epi32(_mm_xor_si128(vacc_hi, vneg_mask_hi), vneg_mask_hi); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()
218 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() local
221 const __m128i vneg_mask_hi = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc_hi); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()
224 const __m128i vabs_hi0123 = _mm_sub_epi32(_mm_xor_si128(vacc_hi, vneg_mask_hi), vneg_mask_hi); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()