Home
last modified time | relevance | path

Searched refs:vacc0x10 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mull-padal.c54 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
99 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
125 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
149 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c54 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
127 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
188 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
214 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
238 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c54 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
115 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
141 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
165 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c60 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
76 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
143 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
180 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
216 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c60 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
76 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
183 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
282 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
319 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
355 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c60 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
76 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
170 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
212 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
248 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c66 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
82 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
98 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
187 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
235 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
283 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c66 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
82 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
98 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
239 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
376 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
424 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
472 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c72 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
88 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
104 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
120 int32x4_t vacc3x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
231 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
290 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
350 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c66 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
82 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
98 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
225 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
283 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
331 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c72 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
88 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
104 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
120 int32x4_t vacc3x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
280 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
354 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
414 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c72 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
88 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
104 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
120 int32x4_t vacc3x10 = vacc0x10; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
295 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
470 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
529 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
589 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mull-padal.c57 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
110 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
139 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
163 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c57 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
138 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
199 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
228 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
252 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c57 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
126 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
155 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
179 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c61 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
77 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
156 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
196 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
232 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
77 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
196 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
295 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
335 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
371 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c65 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
81 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
97 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
202 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
253 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
301 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c16-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
77 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
183 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
228 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
264 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c69 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
85 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
101 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
117 int32x4_t vacc3x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
248 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
310 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
370 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c65 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
81 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
97 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
240 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
301 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
349 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c65 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
81 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
97 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
254 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
391 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
442 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
490 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c69 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
85 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
101 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
117 int32x4_t vacc3x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
297 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
374 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
434 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c69 …int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
85 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
101 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
117 int32x4_t vacc3x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
312 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
487 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
549 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
609 const int32x2_t vpsum0xA = vadd_s32(vget_low_s32(vacc0x10), vget_high_s32(vacc0x10)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()